In [21]:
import os
import glob
import pandas as pd
import pmdarima as pm
from pmdarima.model_selection import train_test_split

In [22]:
def determine_arima_params(series, start_p=0, start_q=0, max_p=5, max_q=5, m=1, seasonal=False, stepwise=True, suppress_warnings=True, trace=False):
    """
    Determina automáticamente los parámetros p, d, q para un modelo ARIMA dado una serie temporal.

    Args:
        series (pd.Series): Serie temporal de datos.
        start_p (int): Valor inicial de p para la búsqueda.
        start_q (int): Valor inicial de q para la búsqueda.
        max_p (int): Valor máximo de p para considerar.
        max_q (int): Valor máximo de q para considerar.
        m (int): Periodicidad para modelos estacionales. Por defecto es 1 (no estacional).
        seasonal (bool): Si True, busca modelos SARIMA.
        stepwise (bool): Si True, utiliza búsqueda stepwise para acelerar el proceso.
        suppress_warnings (bool): Si True, suprime advertencias durante el ajuste.
        trace (bool): Si True, imprime información detallada durante la búsqueda.

    Returns:
        tuple: (p, d, q) óptimos para el modelo ARIMA.
    """
    # División de la serie en entrenamiento y prueba (opcional)
    train, test = train_test_split(series, train_size=0.8)
    
    # Ajustar el modelo ARIMA automáticamente
    model = pm.auto_arima(
        train,
        start_p=start_p,
        start_q=start_q,
        max_p=max_p,
        max_q=max_q,
        m=m,
        seasonal=seasonal,
        trace=trace,
        error_action='ignore',
        suppress_warnings=suppress_warnings,
        stepwise=stepwise
    )
    
    # Obtener los parámetros óptimos
    p, d, q = model.order
    
    return p, d, q

# Ejemplo de uso:
if __name__ == "__main__":
    file_folder = r'/mnt/c/Users/Administrador/Downloads'
    file_pattern = os.path.join(file_folder, 'GDAXI_*.csv')
    df_file_path = glob.glob(file_pattern)
    df = pd.read_csv(df_file_path[0], delimiter='\t')
    series = (df['<HIGH>'] + df['<LOW>']) / 2.0
    p, d, q = determine_arima_params(series, trace=True)
    print(f"Parámetros ARIMA óptimos: p={p}, d={d}, q={q}")


Performing stepwise search to minimize aic
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=524623.331, Time=1.32 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=518463.895, Time=1.65 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=517678.472, Time=7.39 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=524625.375, Time=0.69 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=517665.405, Time=9.26 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=517667.090, Time=13.57 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=517667.089, Time=7.93 sec
 ARIMA(0,1,2)(0,0,0)[0] intercept   : AIC=517665.231, Time=8.18 sec
 ARIMA(0,1,3)(0,0,0)[0] intercept   : AIC=517667.062, Time=12.04 sec
 ARIMA(1,1,3)(0,0,0)[0] intercept   : AIC=517669.038, Time=16.37 sec
 ARIMA(0,1,2)(0,0,0)[0]             : AIC=517665.892, Time=1.78 sec

Best model:  ARIMA(0,1,2)(0,0,0)[0] intercept
Total fit time: 80.205 seconds
Parámetros ARIMA óptimos: p=0, d=1, q=2
