# IMPORTS

In [165]:
import pandas as pd
import numpy as np
import os
from decimal import Decimal
import pickle
from pathlib import Path

from sklearn import linear_model
import statsmodels.api as sm

# SETUP

In [62]:
dir_tree_util_path = os.path.join("utils", "dir_tree.py")
exec(open(dir_tree_util_path).read())

# INPUTS

In [63]:
# Paths
path_dados = PROJECT_DIRS["DADOS_DERIVADOS_DIR"]
# path_output = TODO

In [402]:
periodos = [[2011, 2023], [2017, 2023]]

In [397]:
path_outputs = PROJECT_DIRS["DADOS_DERIVADOS_DIR"]

# CARREGANDO p/MEMORIA OS DADOS

In [400]:
# Dados de tráfego ANTT
df_veqs_comercial = pd.read_parquet(path_dados / 'df_VEQS_COMERCIAL.parquet')
df_veqs_passeio = pd.read_parquet(path_dados / 'df_VEQS_PASSEIO.parquet')

df_periodos = pd.read_parquet(path_dados / 'df_periodos.parquet')

dict_veqs = {'veqs_comercial':df_veqs_comercial,
           'veqs_passeio':df_veqs_passeio}

In [339]:
# Dados de PIB
df_PIB = pd.read_parquet(path_dados / 'PIB-Bacen.parquet')

# TRATANDO OS DADOS

In [341]:
# PIB
df_PIB.columns = ['PIB_real']

# FUNCOES

In [134]:
def filter_PIB_periodo(df, periodo:list):
    ano_min = min(periodo)
    ano_max = max(periodo)
    anos = [ano for ano in range(ano_min, ano_max + 1)]

    start_date = pd.to_datetime(f'{periodo[0]}-01-01').date()
    end_date = pd.to_datetime(f'{periodo[1]}-12-31').date()
    
    df = df.loc[anos]
    df.loc[periodo[0]] = 1
        
    return df

In [242]:
def filter_conc_periodo(df_periodos, periodo:list):
    ano_min = min(periodo)
    ano_max = max(periodo)
    anos = [ano for ano in range(ano_min, ano_max + 1)]
    
    start_date = pd.to_datetime(f'{periodo[0]}-01-01').date()
    end_date = pd.to_datetime(f'{periodo[1]}-12-31').date()
    
    condition = (df_periodos['data_inicial'] <= start_date) & \
                (df_periodos['data_final'] >= end_date)
    
    df_concs = df_periodos[condition].index
    
    return df_concs

In [278]:
def filter_trafego_periodo(df_trafego, df_periodos, periodo:list):
    df_concs = filter_conc_periodo(df_periodos, periodo)
    df_trafego = df_trafego.loc[df_concs]
    anos_drop = [col for col in df_trafego.columns if (col < periodo[0] - 1) or (col) > periodo[1]]
    df_trafego = df_trafego.drop(columns=anos_drop)
    df_trafego = df_trafego.T
    
    # for col in df_trafego:
    #     df_trafego[col] = df_trafego[col].pct_change()*100
        
    # df_trafego = df_trafego.loc[periodo[0]:,:]
            
    return df_trafego

In [269]:
def calc_perc_change(df_trafego, periodo):
    for col in df_trafego:
        df_trafego[col] = df_trafego[col].pct_change()*100
    df_trafego = df_trafego.loc[periodo[0]:,:]
    return df_trafego    

In [288]:
filter_trafego_periodo(df_veqs_comercial, df_periodos, [2010,2023])

concessionaria,TRANSBRASILIANA,RODOVIA DO AÇO,AUTOPISTA FERNÃO DIAS,AUTOPISTA FLUMINENSE,CONCER,AUTOPISTA REGIS BITTENCOURT,AUTOPISTA PLANALTO SUL,AUTOPISTA LITORAL SUL,ECOSUL
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010,15514995.0,12887198.0,92215435.0,23879336.0,13109979.0,114884661.0,19554781.0,69931704.0,14426940.0
2011,16821634.0,12595032.0,111335578.0,26387101.0,14804816.0,122893798.0,20394485.0,74189265.0,16168263.0
2012,18435974.0,11550846.0,115803681.0,27188502.0,15723249.0,123630694.0,19941264.0,77030428.0,15672205.0
2013,18752871.0,12922325.0,117936734.0,26658829.0,16066640.0,126658897.0,20865630.0,78950113.0,19186163.0
2014,18614344.0,12850352.0,114170690.0,25867004.0,15291939.0,123633793.0,21843099.0,98370521.0,19934187.0
2015,16416821.0,11591431.0,100205946.0,23157283.0,12496258.0,109082995.0,18344124.0,85115093.0,19358373.0
2016,15616264.0,10991740.0,95414669.0,20074706.0,10926452.0,102808795.0,17293967.0,79373266.0,18450658.0
2017,14865436.0,11594869.0,99033693.0,18511070.0,10202246.0,107596803.0,17118560.0,74168422.0,18671702.0
2018,14473693.0,11896078.0,100147329.0,18344987.0,10505250.0,110472028.0,18010664.0,75195173.0,18907997.0
2019,15114298.0,11944904.0,120213584.0,19389701.0,11027058.0,115068594.0,18899658.0,79392131.0,18599182.0


In [262]:
def create_df_PIB_concs(df_trafego, df_PIB, df_periodos, periodo:list):
    
    X_PIB = pd.DataFrame(filter_PIB_periodo(df_PIB, periodo))
    y_veqs = filter_trafego_periodo(df_trafego, df_periodos, periodo)
    
    df_PIB_concs = X_PIB.join(y_veqs)  
        
    return df_PIB_concs

In [295]:
def regressao(df_y, df_periodos, conc, df_X, periodo, fit_intercept=True):
    y_train = filter_trafego_periodo(df_y, df_periodos, periodo)[conc]
    y_train = calc_perc_change(pd.DataFrame(y_train), periodo)
    X_train = filter_PIB_periodo(df_PIB, periodo)
    
    y_train = pd.DataFrame(y_train)
    X_train = pd.DataFrame(X_train)

    if fit_intercept:
        X_train = sm.add_constant(X_train)
    
    model = sm.OLS(y_train, X_train).fit()
    
    return model

In [384]:
def make_models(df_veqs, df_periodos, df_X, periodo, fit_intercept=True):
    concs = filter_trafego_periodo(df_veqs, df_periodos, periodo).columns
    df_stats = pd.DataFrame(index=concs)
    
    for conc in concs:
        model = regressao(df_veqs, df_periodos, conc, df_X, periodo, fit_intercept=fit_intercept)
        param_name = model.params.index
        df_stats.loc[conc, 'R2'] = model.rsquared
        df_stats.loc[conc, f'{param_name[0]}-coef'] = model.params.iloc[0]
        df_stats.loc[conc, f'{model.pvalues.index[0]}-p-valor'] = model.pvalues.iloc[0]

    return df_stats

# REGRESSÃO

## Periodo 2011-2023

In [389]:
model_11_23 = make_models(df_veqs_comercial, df_periodos, df_PIB, [2011, 2023], fit_intercept=False)
model_11_23

Unnamed: 0_level_0,R2,PIB_real-coef,PIB_real-p-valor
concessionaria,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
TRANSBRASILIANA,0.187843,0.922658,0.121587
RODOVIA DO AÇO,0.416907,1.411764,0.012624
AUTOPISTA FERNÃO DIAS,0.090191,1.233776,0.296831
AUTOPISTA FLUMINENSE,0.324131,1.742415,0.033583
CONCER,0.415267,1.948134,0.012857
AUTOPISTA REGIS BITTENCOURT,0.478093,1.483594,0.006161
AUTOPISTA PLANALTO SUL,0.545963,2.012901,0.002536
AUTOPISTA LITORAL SUL,0.25649,1.78103,0.064601
ECOSUL,0.2377,2.047181,0.076993


In [391]:
model_11_23['R2'].mean()

np.float64(0.3280651550138982)

In [392]:
model_11_23['PIB_real-p-valor'].mean()

np.float64(0.06975263319666872)

In [395]:
model_11_23['PIB_real-coef'].mean()

np.float64(1.6203835338934016)

## Periodo 2017-2023

In [390]:
model_17_23 = make_models(df_veqs_comercial, df_periodos, df_PIB, [2017, 2023], fit_intercept=False)
model_17_23

Unnamed: 0_level_0,R2,PIB_real-coef,PIB_real-p-valor
concessionaria,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MSVIA,0.287888,1.696313,0.170373
CRO,0.448201,1.309613,0.069372
VIA 040,0.342956,1.257074,0.127178
ECOPONTE,0.181272,5.1404,0.292926
CONCEBRA,0.047675,0.353209,0.603427
ECO050,0.080677,0.752466,0.495381
ECO101 CONCESSIONARIA DE RODOVIAS S/A,0.336337,1.268124,0.131821
VIA BAHIA,0.09974,0.376899,0.44604
TRANSBRASILIANA,6.9e-05,-0.011017,0.984413
RODOVIA DO AÇO,0.610695,1.112483,0.022001


In [393]:
model_17_23['R2'].mean()

np.float64(0.253334267863935)

In [394]:
model_17_23['PIB_real-p-valor'].mean()

np.float64(0.30519121532145)

In [396]:
model_17_23['PIB_real-coef'].mean()

np.float64(1.2744039613345777)

# Salvando para excel

In [403]:
with pd.ExcelWriter(path_outputs / 'resultado_modelos_PIB.xlsx') as writer:
    # df_resultados = pd.DataFrame(index=df_periodos.index)
    for p in periodos:
        for key, veq in dict_veqs.items():
            df_resultados = make_models(veq, df_periodos, df_PIB, p, fit_intercept=False)
            sheet_name = f'{p[0]}-{p[1]}-{key}'
            df_resultados.to_excel(writer, sheet_name = sheet_name)        