In [50]:
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima.model import ARIMA


In [51]:
window = 5

file_path = './in/copper_log_returns_{}d_final.csv'.format(window)
df = pd.read_csv(file_path)
df

Unnamed: 0,Date,LMCADS03,LMCADY,DXY,SPX,BCOM,MXWD,XAU,XAG,LMCADY_acu_5d_log,LMCADY_std_5d_log
0,2003-01-02,0.019048,0.019834,0.010739,0.032661,0.025337,0.023209,-0.005763,0.007918,0.040542,0.007181
1,2003-01-03,0.013121,0.013478,-0.004965,-0.000484,0.014288,0.003782,0.014915,0.014833,0.015400,0.010511
2,2003-01-06,0.010804,0.010262,-0.004891,0.022226,-0.016118,0.018549,0.000000,0.001533,0.011601,0.009986
3,2003-01-07,-0.004616,-0.004479,0.005867,-0.006566,-0.002697,-0.008853,-0.011453,-0.020109,0.027062,0.009745
4,2003-01-08,0.009822,0.009771,-0.006848,-0.014186,0.004093,-0.014657,0.018206,0.010363,0.023340,0.009467
...,...,...,...,...,...,...,...,...,...,...,...
5545,2024-04-08,0.008751,0.007897,-0.001535,-0.000375,0.000216,0.001968,0.003975,0.013549,0.023212,0.010161
5546,2024-04-09,0.000637,0.000161,0.000096,0.001444,-0.001109,0.001528,0.005861,0.010767,0.007737,0.013632
5547,2024-04-10,-0.004630,-0.004594,0.010506,-0.009502,0.000649,-0.008312,-0.007997,-0.007269,0.026616,0.014112
5548,2024-04-11,-0.003420,-0.003312,0.000285,0.007417,-0.005083,0.002714,0.016352,0.017577,0.046886,0.013921


In [52]:
df['Date'] = pd.to_datetime(df['Date'])

df = df.sort_values(by='Date')

df = df.reset_index(drop=True)

targ = 'LMCADY_acu_{}d_log'.format(window)

time_series = df[targ]

In [53]:
LMCADY_train, LMCADY_test = train_test_split(time_series, test_size=0.005, shuffle=False)

In [54]:
# def find_best_arima(time_series, max_q=20, best_p = 100):

#     best_aic = float('inf')
#     best_params = None

#     for q in range(max_q + 1):
#         try:
#             model_arima = ARIMA(time_series, order=(best_p, 0, q))
#             result_arima = model_arima.fit()

#             aic = result_arima.aic
#             print('ARIMA{} - AIC:{}'.format((best_p, 0, q), aic))

#             if aic < best_aic:
#                 best_aic = aic
#                 best_params = (best_p, q)

#         except Exception as e:
#             continue

#     return best_params, best_aic

In [55]:
# if window == 5:
#     best_p = 58 
# elif window == 10:
#     best_p = 59
# elif window == 20:
#     best_p = 100


# # best_params, best_aic = find_best_arima(time_series,best_p=best_p)
# # best_params, best_aic

# # ((0, 5), -29830.06289218084)

# best_params = (best_p, 10)
# best_aic = -29830.06289218084

In [56]:
def evaluate_arima_model(train_series, test_series, p, q):

    if not isinstance(train_series.index, pd.DatetimeIndex):
        train_series.index = pd.RangeIndex(start=0, stop=len(train_series))

    predictions = []

    current_train = train_series.copy()

    for actual in test_series:
        model_arima = ARIMA(current_train, order=(p, 0, q))
        model_fit = model_arima.fit()

        prediction = model_fit.predict(start=len(current_train), end=len(current_train))
        # print('Prediction: {:.6f}'.format(prediction.iloc[0]))

        predictions.append(prediction.iloc[0])

        current_train = pd.concat([current_train, pd.Series([actual], index=[len(current_train)])])

    return pd.Series(predictions, index=test_series.index)

In [57]:
def calculate_rmse(predictions, actual):
    # las predicciones para n se comparan con los valores reales en n+5 
    # por lo que se deben desplazar 5 posiciones
    actual_shift = actual.shift(-window)

    # recortar los valores nulos generados por el desplazamiento
    actual_shift_drop = actual_shift.dropna()
    # recortar las ultimas 5 predicciones
    predictions_shift = predictions[:-window]

    # Asegurarse de que las series sean de igual longitud
    if len(predictions_shift) != len(actual_shift_drop):
        raise ValueError("Las predicciones y los valores reales deben tener la misma longitud.")

    # Calcular el RMSE
    mse = mean_squared_error(actual_shift_drop, predictions_shift)
    rmse = np.sqrt(mse)
    return rmse

In [58]:
# p, q = best_params
p_range = range(5, 25, 5)
q_range = range(5, 25, 5)

best_rmse = float('inf')
best_config = None
best_predictions = None

i = 0
for p, q in itertools.product(p_range, q_range):
    print("Configuracion",i, 'p:', p, 'q:', q)
    LMCADY_predictions = evaluate_arima_model(LMCADY_train, LMCADY_test, p, q)

    LMCADY_predictions = LMCADY_predictions[:-window]

    LMCADY_rmse = calculate_rmse(LMCADY_predictions, LMCADY_test.shift(-window).dropna())
    print('ARMA> RMSE:', LMCADY_rmse)
    i+=1

    if LMCADY_rmse < best_rmse:
        best_rmse = LMCADY_rmse
        best_config = (p, q)
        best_predictions = LMCADY_predictions
    

# print('p:', p, 'q:', q)
# LMCADY_predictions = evaluate_arima_model(LMCADY_train, LMCADY_test, p, q)
# LMCADY_rmse = calculate_rmse(LMCADY_predictions, LMCADY_test)

Configuracion 0 p: 5 q: 5




ARMA> RMSE: 0.037572105957372856
Configuracion 1 p: 5 q: 10




ARMA> RMSE: 0.036087162588738585
Configuracion 2 p: 5 q: 15




ARMA> RMSE: 0.03649418703292585
Configuracion 3 p: 5 q: 20




ARMA> RMSE: 0.03717994160819218
Configuracion 4 p: 10 q: 5


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA pa

ARMA> RMSE: 0.037417192731479386
Configuracion 5 p: 10 q: 10




ARMA> RMSE: 0.037210392222150813
Configuracion 6 p: 10 q: 15


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-stationary starting autoregressive parameter

ARMA> RMSE: 0.035605250065212944
Configuracion 7 p: 10 q: 20




ARMA> RMSE: 0.03719850094840261
Configuracion 8 p: 15 q: 5




ARMA> RMSE: 0.0363735581465364
Configuracion 9 p: 15 q: 10




ARMA> RMSE: 0.0371649008613075
Configuracion 10 p: 15 q: 15


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA pa

ARMA> RMSE: 0.03675664182203372
Configuracion 11 p: 15 q: 20




ARMA> RMSE: 0.037298985785995394
Configuracion 12 p: 20 q: 5




ARMA> RMSE: 0.03687556067720189
Configuracion 13 p: 20 q: 10


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA pa

ARMA> RMSE: 0.03646541249666799
Configuracion 14 p: 20 q: 15


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA pa

KeyboardInterrupt: 

In [None]:
std_prueva = LMCADY_test.std()
print("Mejor rendimiento ARMA: p>{}, q>{} - RMSE: {} - STD Prueba: {}".format(best_config[0], best_config[1], best_rmse, std_prueva))

TypeError: 'NoneType' object is not subscriptable

In [None]:
def plot_predictions(test_series, predictions, title, best_lags, markersize=3):
    plt.figure(figsize=(14, 2.5))

    plt.plot(test_series.index, test_series, label="Actual", marker='o', linestyle='-', markersize=markersize)
    plt.plot(test_series.index, predictions, label="Predicted", marker='x', linestyle='--', markersize=markersize)
    plt.axhline(y=0, color='gray', linestyle='--', linewidth=0.7)


    plt.title(f"ARMA Model: {title} - {best_lags} lags")
    plt.legend()
    plt.show()

In [None]:
LMCADY_test.describe()['std']

0.025725924025968183

In [None]:
plot_predictions(LMCADY_test, LMCADY_predictions, "LMCADY {}d".format(window))

NameError: name 'LMCADY_predictions' is not defined