# IMPORTS

In [1]:
import pandas as pd
import numpy as np
import os
from decimal import Decimal
import pickle
from pathlib import Path

from sklearn import linear_model
import statsmodels.api as sm

# SETUP

In [2]:
dir_tree_util_path = os.path.join("utils", "dir_tree.py")
exec(open(dir_tree_util_path).read())

# INPUTS

In [3]:
# Paths
path_dados = PROJECT_DIRS["DADOS_DERIVADOS_DIR"]
# path_output = TODO

In [4]:
periodos = [['2010-01','2022-01'], ['2015-09','2022-01']] # formato Y/M, inclusive

In [5]:
path_outputs = PROJECT_DIRS["DADOS_DERIVADOS_DIR"]

# CARREGANDO p/MEMORIA OS DADOS

In [6]:
# Dados de tráfego ANTT
df_veqs_comercial = pd.read_parquet(path_dados / 'df_VEQS_COMERCIAL_mensal_ajustado.parquet')
df_veqs_passeio = pd.read_parquet(path_dados / 'df_VEQS_PASSEIO_mensal_ajustado.parquet')

df_periodos = pd.read_parquet(path_dados / 'df_periodos.parquet')

dict_veqs = {'veqs_comercial':df_veqs_comercial,
           'veqs_passeio':df_veqs_passeio}

In [7]:
# Dados de PIB
df_PIB = pd.read_parquet(path_dados / 'PIB-Bacen_mensal.parquet')

# TRATANDO OS DADOS

In [8]:
df_PIB.index = pd.to_datetime(df_PIB['Data'])
df_PIB = df_PIB.drop(columns=['Data'])
df_PIB = df_PIB.dropna()

# DISPONIBILIDADE DE DADOS

In [27]:
df_periodos

Unnamed: 0_level_0,data_inicial,data_final
concessionaria,Unnamed: 1_level_1,Unnamed: 2_level_1
HOLDING DO SISTEMA RODOVIARIO RIO - SAO PAULO S.A.,2024-01-01,2024-06-03
VIA BRASIL,2023-02-01,2023-12-01
ECORIOMINAS,2022-10-01,2024-06-02
ECOVIAS DO ARAGUAIA,2022-10-01,2024-06-02
RIOSP,2022-03-01,2023-12-01
VIA COSTEIRA,2022-01-01,2024-04-01
ECOVIAS DO CERRADO,2020-11-14,2024-06-02
VIA SUL,2019-01-01,2024-05-03
MSVIA,2015-09-01,2024-05-03
CRO,2015-09-01,2024-06-03


# FUNCOES

In [10]:
def filter_PIB_periodo(df, periodo: list):
    # Convert the start and end periods to datetime and adjust to the first and last days of the month
    start_date = pd.to_datetime(periodo[0]) + pd.offsets.MonthBegin(0)
    end_date = pd.to_datetime(periodo[1]) + pd.offsets.MonthEnd(0)
    
    # Create a range of dates from start_date to end_date, ensuring end-of-month dates
    datas = pd.date_range(start=start_date, end=end_date, freq='ME')
    
    # Filter the DataFrame using the date range
    df = df[df.index.isin(datas)]
    
    # Drop rows with missing values
    df = df.dropna()
    
    return df

In [11]:
def filter_conc_periodo(df_periodos, periodo:list):
    data_min = pd.to_datetime(f"{min(periodo)}-01").date()
    data_max = (pd.to_datetime(f"{max(periodo)}-01") + pd.offsets.MonthEnd(0)).date()
    condition = (df_periodos['data_inicial'] <= data_min) & \
                (df_periodos['data_final'] >= data_max)
    df_concs = df_periodos[condition].index
    return df_concs

In [15]:
df_veqs_comercial.columns[0].date()

datetime.date(2010, 1, 31)

In [41]:
def filter_trafego_periodo(df_trafego, df_periodos, periodo:list):
    
    # tirando as concessionárias com meses incompletos no período
    df_concs = filter_conc_periodo(df_periodos, periodo)
    df_trafego = df_trafego.loc[df_concs]
    
    # utilizando apenas os meses no período
    mes_inicial = pd.to_datetime(f"{min(periodo)}-01").date()
    mes_final = (pd.to_datetime(f"{max(periodo)}-01") + pd.offsets.MonthEnd(0)).date()
    meses = [col for col in df_trafego.columns if (col.date() >= mes_inicial) and (col.date() <= mes_final)]
    df_trafego = df_trafego[meses]
    
    df_trafego = df_trafego.T # transpondo
            
    return df_trafego

In [42]:
def calc_perc_change(df_trafego):
    for col in df_trafego:
        df_trafego[col] = df_trafego[col].pct_change()*100
    df_trafego = df_trafego.iloc[1:,:]
    return df_trafego    

In [262]:
# def create_df_PIB_concs(df_trafego, df_PIB, df_periodos, periodo:list):
    
#     X_PIB = pd.DataFrame(filter_PIB_periodo(df_PIB, periodo))
#     y_veqs = filter_trafego_periodo(df_trafego, df_periodos, periodo)
    
#     df_PIB_concs = X_PIB.join(y_veqs)  
        
#     return df_PIB_concs

In [55]:
def regressao(df_y, df_periodos, conc, df_X, periodo, fit_intercept=True):
    y_train = filter_trafego_periodo(df_y, df_periodos, periodo)[conc]
    y_train = calc_perc_change(pd.DataFrame(y_train))
    X_train = filter_PIB_periodo(df_PIB, periodo)
    
    y_train = pd.DataFrame(y_train)
    X_train = pd.DataFrame(X_train)
    
    X_train = X_train.loc[y_train.index,:] # O PIB vai ter um mês a mais (o mês inicial)

    if fit_intercept:
        X_train = sm.add_constant(X_train)
    
    model = sm.OLS(y_train, X_train).fit()
    
    return model

In [69]:
def make_models(df_veqs, df_periodos, df_X, periodo, fit_intercept=True):
    concs = filter_trafego_periodo(df_veqs, df_periodos, periodo).columns
    df_stats = pd.DataFrame(index=concs)
    
    for conc in concs:
        model = regressao(df_veqs, df_periodos, conc, df_X, periodo, fit_intercept=fit_intercept)
        param_name = model.params.index
        df_stats.loc[conc, 'R2'] = model.rsquared
        df_stats.loc[conc, f'{param_name[0]}-coef'] = model.params.iloc[0]
        df_stats.loc[conc, f'{model.pvalues.index[0]}-p-valor'] = model.pvalues.iloc[0]
        df_stats.loc[conc, 'n'] = len(filter_trafego_periodo(df_veqs, df_periodos, periodo).iloc[1:,:])

    return df_stats

# REGRESSÃO

## Periodo 2011-2023

In [70]:
model_11_23 = make_models(df_veqs_comercial, df_periodos, df_PIB, ['2010-01','2022-01'], fit_intercept=False)
model_11_23

Unnamed: 0_level_0,R2,PIB_mensal_real_%-coef,PIB_mensal_real_%-p-valor,n
concessionaria,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
NOVADUTRA,0.067417,0.478604,0.001612,144.0
TRANSBRASILIANA,0.033456,0.26655,0.027659,144.0
RODOVIA DO AÇO,0.090588,0.708618,0.000235,144.0
AUTOPISTA FERNÃO DIAS,0.071743,0.525864,0.001125,144.0
AUTOPISTA FLUMINENSE,0.046036,0.371503,0.009555,144.0
CRT,0.121483,0.578572,1.7e-05,144.0
CONCER,0.063499,0.43504,0.002231,144.0
AUTOPISTA REGIS BITTENCOURT,0.050485,0.431861,0.006587,144.0
AUTOPISTA PLANALTO SUL,0.073894,0.542131,0.000941,144.0
AUTOPISTA LITORAL SUL,0.032304,0.35899,0.030527,144.0


In [71]:
model_11_23['R2'].mean()

np.float64(0.0631262914110482)

In [72]:
model_11_23['PIB_mensal_real_%-p-valor'].mean()

np.float64(0.008393286981317997)

In [73]:
model_11_23['PIB_mensal_real_%-coef'].mean()

np.float64(0.5019885481245611)

## Periodo 2017-2023

In [74]:
model_17_23 = make_models(df_veqs_comercial, df_periodos, df_PIB, ['2015-09','2022-01'], fit_intercept=False)
model_17_23

Unnamed: 0_level_0,R2,PIB_mensal_real_%-coef,PIB_mensal_real_%-p-valor,n
concessionaria,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
MSVIA,0.039159,0.697116,0.0845,76.0
CRO,0.021176,0.427447,0.206667,76.0
VIA 040,0.060703,0.513133,0.030771,76.0
ECOPONTE,0.001938,0.302719,0.703825,76.0
CONCEBRA,0.06197,0.518853,0.029022,76.0
ECO050,0.02691,1.015849,0.153981,76.0
ECO101 CONCESSIONARIA DE RODOVIAS S/A,0.038257,0.390932,0.08824,76.0
VIA BAHIA,0.07884,0.668692,0.013378,76.0
NOVADUTRA,0.105666,0.76254,0.00392,76.0
TRANSBRASILIANA,0.042704,0.384366,0.071356,76.0


In [75]:
model_17_23['R2'].mean()

np.float64(0.05627839177528739)

In [78]:
model_17_23['PIB_mensal_real_%-p-valor'].mean()

np.float64(0.09370676845288983)

In [80]:
model_17_23['PIB_mensal_real_%-coef'].mean()

np.float64(0.5969748377139612)

# Salvando para excel

In [82]:
with pd.ExcelWriter(path_outputs / 'resultado_modelos_PIB_mensal.xlsx') as writer:
    # df_resultados = pd.DataFrame(index=df_periodos.index)
    for p in periodos:
        for key, veq in dict_veqs.items():
            df_resultados = make_models(veq, df_periodos, df_PIB, p, fit_intercept=False)
            sheet_name = f'{p[0]}-{p[1]}-{key}_mensal'
            df_resultados.to_excel(writer, sheet_name = sheet_name)        

