In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
import sklearn as sk
import sys
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RANSACRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.compat import lzip
import statsmodels.stats.api as sms
import scipy.stats as stats


# 1. ------------------------- Se carga datos provientes de CSV con pandas

filePath = "C:/Users/Armando/Documents/Econometria_Aplicada/ModelosCenace/PronosticosEnergia_02-11-2020" \
           "/EnergiaInyectada_Retiro_DB.csv"

inputPath = "C:/Users/Armando/Documents/Econometria_Aplicada/ModelosCenace/PronosticosEnergia_02-11-2020" \
           "/INSUMOS_PREDICCIONES.csv"

encabezados = ['PERIODO', 'GEN_TCA_SA', 'REP_TCA_SA', 'TEMP_TCA', 'Y_TCA_SA']

inputDf = pd.read_csv(inputPath)
df = pd.read_csv(filePath)
pd.options.display.float_format = '{:,.3f}'.format      # Formato para únicamente tomar 3 decimales

#print(df)


# 2. ------------------------------------------------------ Estimación del Modelo


#a) Utilizando statsmodel
X = df[['TEMP_TCA', 'Y_TCA_SA']]
#Y = df['GEN_TCA_SA']
Y = df['REP_TCA_SA']
y_name = Y.head().name
print(y_name)
reg = LinearRegression()
model_f = reg.fit(X, Y)

intercepto = model_f.intercept_
temp_tca, y_tca = model_f.coef_         # Separa los dos valores del arreglo (listado) en las dos variables

print(intercepto)
print(temp_tca)
print(y_tca)

# b) Utilizando statsmodel formula
form_lr = smf.ols(formula= y_name + '~ TEMP_TCA + Y_TCA_SA', data=df)
model = form_lr.fit()
modelFSummary = model.summary()


#print(model)
print(modelFSummary)

#Residuales
residuals = model.resid
#print(residuals)


# 3. ------------------------------------------- Pruebas de Especificación

# Normalidad #Jarque Bera
jarqueBeraTest = stats.jarque_bera(residuals)
print("Jarque Bera Test:")
print(jarqueBeraTest)

# Autocorrelación

# Jung-Box and Box-Pierce tests for AR(lags)
lags = 12

jb, jbpvalue, bp, bppvalue = sm.stats.diagnostic.acorr_ljungbox(residuals, lags, True)
# print(sm.stats.diagnostic.acorr_ljungbox(resid1,lags,True,return_df=True))

JB_test = pd.DataFrame({'Jung-Box': jb,
                    'Jung-Box P-value': jbpvalue,
                   'Box-Pierce': bp,
                   'Box-Pierce P-value': bppvalue},
                   index=range(1, lags+1))
print(JB_test)

# Breusch-Godfrey test
bgx = np.zeros(lags)
bgxpv = np.zeros(lags)
bgf = np.zeros(lags)
bgfpv = np.zeros(lags)

for i in range(lags):
    bgx[i], bgxpv[i], bgf[i], bgfpv[i] = sm.stats.diagnostic.acorr_breusch_godfrey(model, i + 1)

BG_test = pd.DataFrame({'Chi-Sq': bgx, 'Prob > Chi-Sq': bgxpv,
                        'F': bgf, 'Prob > F': bgfpv}, index=range(1, lags + 1))
print(BG_test)



# Heteroscedasticidad

names = ['Lagrange multiplier statistic', 'p-value',
        'f-value', 'f p-value']
test = sms.het_breuschpagan(residuals, model.model.exog)

bptest = lzip(names, test)

print(bptest)



# 4. ---------------------------------------------------- Proyecciones
outColumns = ['ANIO', 'MES', 'ESCENARIO', 'PROYECCION']
inputData = inputDf[['PROYECCION_TEMP', 'PROYECCION_PIB']]
#inputData = [tempDf.iloc[:, [5]].values, pibDf.iloc[:, [5]].values]
#print(inputData)

outDF = pd.DataFrame(columns=outColumns)

for index, row in inputDf.iterrows():

    energyGrowthRate_f = intercepto + temp_tca * row['PROYECCION_TEMP'] + y_tca * row['PROYECCION_PIB']

    outDF = outDF.append({
        'ANIO': row['ANIO'],
        'MES': row['MES'],
        'Y' : y_name,
        'ESCENARIO': row['ESCENARIO'],
        'PROYECCION': energyGrowthRate_f
    }, ignore_index=True)

print(outDF)