# Modelo AR para radiación solar en Quito

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Se importan los datos.

In [None]:
df = pd.read_csv('./drive/MyDrive/DatosInvestigacion/quito_2017_2018.csv', index_col=0, parse_dates=True)
df = df.asfreq('h')

Se limita el horario.

In [None]:
df = df.between_time('08:00:00','17:00:00')

In [None]:
len_train = int(len(df)*0.8)
len_train

4120

In [None]:
df_train = df.iloc[:len_train]
df_test = df.iloc[len_train:]

In [None]:
df_train.head()

Unnamed: 0_level_0,RS,VEL,TMP,HUM
Date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-02-01 08:00:00,259.93,0.41,13.9,73.92
2017-02-01 09:00:00,482.96,1.02,15.77,62.89
2017-02-01 10:00:00,608.28,1.65,17.13,53.35
2017-02-01 11:00:00,789.95,2.51,17.75,47.3
2017-02-01 12:00:00,975.22,2.79,18.63,46.45


## Creación del modelo

In [None]:
from statsmodels.tsa.ar_model import AR

  import pandas.util.testing as tm


In [None]:
# Ignore harmless warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
model = AR(df_train['RS'])
ARfit = model.fit(maxlag=24,ic='bic') 
print(f'Lag: {ARfit.k_ar}')
print(f'Coefficients:\n{ARfit.params}')

Lag: 21
Coefficients:
const     90.196831
L1.RS      0.605269
L2.RS     -0.051036
L3.RS      0.008912
L4.RS     -0.057670
L5.RS      0.024059
L6.RS     -0.014997
L7.RS     -0.019632
L8.RS      0.053366
L9.RS      0.079305
L10.RS     0.216600
L11.RS    -0.066506
L12.RS    -0.007087
L13.RS    -0.067058
L14.RS    -0.001084
L15.RS     0.022258
L16.RS    -0.034241
L17.RS    -0.038318
L18.RS     0.040290
L19.RS     0.028187
L20.RS     0.208899
L21.RS    -0.123142
dtype: float64


## Evaluación del modelo

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

In [None]:
coef = ARfit.params

Función para calcular predicciones a partir de los coeficientes.

In [None]:
def get_pred(coef,data):
  prediction = coef[0] # constante
  # los datos se reciben en forma cronológica
  j = 1
  for i in range(len(data)-1,-1,-1):
    prediction += coef[j] * data[i]
    j+=1
  return prediction

In [None]:
get_pred(coef,df_train['RS'][-21:].values)

309.6566943762309

In [None]:
ARfit.predict(start=len(df_train),end=len(df_train)+1)

4120    309.656694
4121    591.339808
dtype: float64

El valor real es más bajo que el predicho.

In [None]:
df_test.iloc[0]['RS']

258.22

Se evaluará todo el conjunto de pruebas

In [None]:
test_pred = []
retrasos = 21

In [None]:
for i in range(retrasos,len(df_test)):
  past_data = df_test.iloc[i-retrasos:i]['RS']
  test_pred.append(get_pred(coef,past_data))

In [None]:
test_pred = np.ndarray.flatten(np.array(test_pred))

In [None]:
test_pred.shape

(1009,)

In [None]:
df_test.shape

(1030, 4)

In [None]:
real_data = df_test.iloc[retrasos:]['RS']

In [None]:
real_data.shape

(1009,)

Resultados

In [None]:
MSE = mean_squared_error(real_data,test_pred)
MAE = mean_absolute_error(real_data,test_pred)
r2 = r2_score(real_data,test_pred)

RMSE

In [None]:
np.sqrt(MSE)

154.58046358508113

MAE

In [None]:
MAE

122.63536915695636

$R^2$

In [None]:
r2

0.6998071034426272