# **ENCONTRAR EL MEJOR ARIMA PARA LA SERIE TRANSFORMADA**

## **Cargar los datos**

In [1]:
import pandas as pd
import numpy as np

data=pd.read_csv('MXN00021035.csv')

pre=data.iloc[:,6]  # Precipitacion, es la columna 5
date=data.iloc[:,5] # Date, es la columna 6
date = date.astype(str).str.replace(r'(\d{4})(\d{2})', r'\1/\2', regex=True)    # La fecha está como 195210 y la pasamos a 1952/10 
date = pd.to_datetime(date, format='%Y/%m')                                     # Lo convertimos en fecha
pre = pd.Series(pre.values, index=date)                                         # Creamos una Serie

# Primera diferencia estacional con periodicidad 12
dpre = pre.diff(12).dropna() 

# Partir la serie para train y test
dpre_total = dpre.copy()          # Copia de la serie original

# Todas hasta los ultimos 12 meses
dpre = dpre_total[:-12]           # Entrenamiento: todos menos los últimos 12 meses
dpre_test = dpre_total[-12:]      # Test: últimos 12 meses

## **Encontrar el mejor SARIMA basado en AIC**

In [2]:
import pmdarima as pm

In [3]:
# Encontrar el mejor SARIMA, Basado en ul AIC 
auto_sarima_model = pm.auto_arima(dpre, 
                                  seasonal=True, 
                                  m=12,
                                  stepwise=True, 
                                  suppress_warnings=True, 
                                  trace=True)

Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,0,1)[12] intercept   : AIC=inf, Time=1.59 sec
 ARIMA(0,0,0)(0,0,0)[12] intercept   : AIC=10443.104, Time=0.01 sec
 ARIMA(1,0,0)(1,0,0)[12] intercept   : AIC=10310.582, Time=0.33 sec
 ARIMA(0,0,1)(0,0,1)[12] intercept   : AIC=10218.699, Time=0.43 sec
 ARIMA(0,0,0)(0,0,0)[12]             : AIC=10441.104, Time=0.00 sec
 ARIMA(0,0,1)(0,0,0)[12] intercept   : AIC=10440.284, Time=0.03 sec
 ARIMA(0,0,1)(1,0,1)[12] intercept   : AIC=10220.678, Time=0.92 sec
 ARIMA(0,0,1)(0,0,2)[12] intercept   : AIC=10220.674, Time=1.87 sec
 ARIMA(0,0,1)(1,0,0)[12] intercept   : AIC=10310.665, Time=0.27 sec
 ARIMA(0,0,1)(1,0,2)[12] intercept   : AIC=inf, Time=1.99 sec
 ARIMA(0,0,0)(0,0,1)[12] intercept   : AIC=10232.767, Time=0.28 sec
 ARIMA(1,0,1)(0,0,1)[12] intercept   : AIC=10219.566, Time=0.65 sec
 ARIMA(0,0,2)(0,0,1)[12] intercept   : AIC=10220.097, Time=0.73 sec
 ARIMA(1,0,0)(0,0,1)[12] intercept   : AIC=10217.831, Time=0.38 sec
 ARIMA(1,0,0)(0,0

## **Poner Lindo el Output**

In [9]:
import re

In [10]:
texto = """
Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,0,1)[12] intercept   : AIC=inf, Time=1.59 sec
 ARIMA(0,0,0)(0,0,0)[12] intercept   : AIC=10443.104, Time=0.01 sec
 ARIMA(1,0,0)(1,0,0)[12] intercept   : AIC=10310.582, Time=0.33 sec
 ARIMA(0,0,1)(0,0,1)[12] intercept   : AIC=10218.699, Time=0.43 sec
 ARIMA(0,0,0)(0,0,0)[12]             : AIC=10441.104, Time=0.00 sec
 ARIMA(0,0,1)(0,0,0)[12] intercept   : AIC=10440.284, Time=0.03 sec
 ARIMA(0,0,1)(1,0,1)[12] intercept   : AIC=10220.678, Time=0.92 sec
 ARIMA(0,0,1)(0,0,2)[12] intercept   : AIC=10220.674, Time=1.87 sec
 ARIMA(0,0,1)(1,0,0)[12] intercept   : AIC=10310.665, Time=0.27 sec
 ARIMA(0,0,1)(1,0,2)[12] intercept   : AIC=inf, Time=1.99 sec
 ARIMA(0,0,0)(0,0,1)[12] intercept   : AIC=10232.767, Time=0.28 sec
 ARIMA(1,0,1)(0,0,1)[12] intercept   : AIC=10219.566, Time=0.65 sec
 ARIMA(0,0,2)(0,0,1)[12] intercept   : AIC=10220.097, Time=0.73 sec
 ARIMA(1,0,0)(0,0,1)[12] intercept   : AIC=10217.831, Time=0.38 sec
 ARIMA(1,0,0)(0,0,0)[12] intercept   : AIC=10440.419, Time=0.02 sec
 ARIMA(1,0,0)(1,0,1)[12] intercept   : AIC=10219.806, Time=0.87 sec
 ARIMA(1,0,0)(0,0,2)[12] intercept   : AIC=10219.801, Time=1.61 sec
 ARIMA(1,0,0)(1,0,2)[12] intercept   : AIC=inf, Time=2.09 sec
 ARIMA(2,0,0)(0,0,1)[12] intercept   : AIC=10219.591, Time=0.72 sec
 ARIMA(2,0,1)(0,0,1)[12] intercept   : AIC=inf, Time=1.09 sec
 ARIMA(1,0,0)(0,0,1)[12]             : AIC=10216.058, Time=0.27 sec
 ARIMA(1,0,0)(0,0,0)[12]             : AIC=10438.419, Time=0.01 sec
 ARIMA(1,0,0)(1,0,1)[12]             : AIC=10218.032, Time=0.26 sec
 ARIMA(1,0,0)(0,0,2)[12]             : AIC=10218.027, Time=0.72 sec
 ARIMA(1,0,0)(1,0,0)[12]             : AIC=10308.603, Time=0.12 sec
 ARIMA(1,0,0)(1,0,2)[12]             : AIC=inf, Time=1.24 sec
 ARIMA(0,0,0)(0,0,1)[12]             : AIC=10231.028, Time=0.05 sec
 ARIMA(2,0,0)(0,0,1)[12]             : AIC=10217.813, Time=0.27 sec
 ARIMA(1,0,1)(0,0,1)[12]             : AIC=10217.788, Time=0.37 sec
 ARIMA(0,0,1)(0,0,1)[12]             : AIC=10216.934, Time=0.21 sec
 ARIMA(2,0,1)(0,0,1)[12]             : AIC=inf, Time=0.50 sec

Best model:  ARIMA(1,0,0)(0,0,1)[12]          
Total fit time: 19.920 seconds
"""

In [11]:
def limpiar_linea(linea):
    # Eliminar la palabra "intercept" con espacios alrededor
    linea = re.sub(r'\s*intercept\s*', ' ', linea)
    # Eliminar la parte ", Time=x.xx sec"
    linea = re.sub(r',\s*Time=\d+\.\d+\s*sec', '', linea)
    # Limpiar espacios extras
    linea = re.sub(r'            ', '', linea)
    # Reemplazar : por una coma
    linea = re.sub(r'] :', '],', linea)
    # Si la línea contiene "inf", eliminarla
    if 'inf' in linea:
        return ''
    return linea.strip()

# Procesar todas las líneas
lineas = texto.strip().split('\n')
lineas_limpias = [limpiar_linea(linea) for linea in lineas]

resultado = "\n".join(lineas_limpias)
print(resultado)


Performing stepwise search to minimize aic

ARIMA(0,0,0)(0,0,0)[12], AIC=10443.104
ARIMA(1,0,0)(1,0,0)[12], AIC=10310.582
ARIMA(0,0,1)(0,0,1)[12], AIC=10218.699
ARIMA(0,0,0)(0,0,0)[12], AIC=10441.104
ARIMA(0,0,1)(0,0,0)[12], AIC=10440.284
ARIMA(0,0,1)(1,0,1)[12], AIC=10220.678
ARIMA(0,0,1)(0,0,2)[12], AIC=10220.674
ARIMA(0,0,1)(1,0,0)[12], AIC=10310.665

ARIMA(0,0,0)(0,0,1)[12], AIC=10232.767
ARIMA(1,0,1)(0,0,1)[12], AIC=10219.566
ARIMA(0,0,2)(0,0,1)[12], AIC=10220.097
ARIMA(1,0,0)(0,0,1)[12], AIC=10217.831
ARIMA(1,0,0)(0,0,0)[12], AIC=10440.419
ARIMA(1,0,0)(1,0,1)[12], AIC=10219.806
ARIMA(1,0,0)(0,0,2)[12], AIC=10219.801

ARIMA(2,0,0)(0,0,1)[12], AIC=10219.591

ARIMA(1,0,0)(0,0,1)[12], AIC=10216.058
ARIMA(1,0,0)(0,0,0)[12], AIC=10438.419
ARIMA(1,0,0)(1,0,1)[12], AIC=10218.032
ARIMA(1,0,0)(0,0,2)[12], AIC=10218.027
ARIMA(1,0,0)(1,0,0)[12], AIC=10308.603

ARIMA(0,0,0)(0,0,1)[12], AIC=10231.028
ARIMA(2,0,0)(0,0,1)[12], AIC=10217.813
ARIMA(1,0,1)(0,0,1)[12], AIC=10217.788
ARIMA(0,0,1)(0,0

In [12]:
texto = """
ARIMA(0,0,0)(0,0,0)[12], AIC=10443.104
ARIMA(1,0,0)(1,0,0)[12], AIC=10310.582
ARIMA(0,0,1)(0,0,1)[12], AIC=10218.699
ARIMA(0,0,0)(0,0,0)[12], AIC=10441.104
ARIMA(0,0,1)(0,0,0)[12], AIC=10440.284
ARIMA(0,0,1)(1,0,1)[12], AIC=10220.678
ARIMA(0,0,1)(0,0,2)[12], AIC=10220.674
ARIMA(0,0,1)(1,0,0)[12], AIC=10310.665
ARIMA(0,0,0)(0,0,1)[12], AIC=10232.767
ARIMA(1,0,1)(0,0,1)[12], AIC=10219.566
ARIMA(0,0,2)(0,0,1)[12], AIC=10220.097
ARIMA(1,0,0)(0,0,1)[12], AIC=10217.831
ARIMA(1,0,0)(0,0,0)[12], AIC=10440.419
ARIMA(1,0,0)(1,0,1)[12], AIC=10219.806
ARIMA(1,0,0)(0,0,2)[12], AIC=10219.801
ARIMA(2,0,0)(0,0,1)[12], AIC=10219.591
ARIMA(1,0,0)(0,0,1)[12], AIC=10216.058
ARIMA(1,0,0)(0,0,0)[12], AIC=10438.419
ARIMA(1,0,0)(1,0,1)[12], AIC=10218.032
ARIMA(1,0,0)(0,0,2)[12], AIC=10218.027
ARIMA(1,0,0)(1,0,0)[12], AIC=10308.603
ARIMA(0,0,0)(0,0,1)[12], AIC=10231.028
ARIMA(2,0,0)(0,0,1)[12], AIC=10217.813
ARIMA(1,0,1)(0,0,1)[12], AIC=10217.788
ARIMA(0,0,1)(0,0,1)[12], AIC=10216.934
"""

In [13]:
def extraer_aic(linea):
    match = re.search(r'AIC=([0-9]+\.[0-9]+)', linea)
    return float(match.group(1)) if match else float('inf')

def extraer_modelo(linea):
    match = re.match(r'(ARIMA\([^)]+\)\([^)]+\)\[\d+\])', linea)
    return match.group(1) if match else None

# Limpiar líneas vacías
lineas = [linea.strip() for linea in texto.strip().split('\n') if linea.strip()]

# Diccionario para eliminar duplicados (se queda con el primero que aparece)
modelos_unicos = {}
for linea in lineas:
    modelo = extraer_modelo(linea)
    if modelo and modelo not in modelos_unicos:
        modelos_unicos[modelo] = linea

# Extraer líneas únicas
lineas_unicas = list(modelos_unicos.values())

# Ordenar por AIC
lineas_ordenadas = sorted(lineas_unicas, key=extraer_aic)

# Añadir índice
lineas_indexadas = [f"({i+1}) {linea}" for i, linea in enumerate(lineas_ordenadas)]

# Resultado final
resultado = "\n".join(lineas_indexadas)
print(resultado)


(1) ARIMA(1,0,0)(0,0,1)[12], AIC=10217.831
(2) ARIMA(0,0,1)(0,0,1)[12], AIC=10218.699
(3) ARIMA(1,0,1)(0,0,1)[12], AIC=10219.566
(4) ARIMA(2,0,0)(0,0,1)[12], AIC=10219.591
(5) ARIMA(1,0,0)(0,0,2)[12], AIC=10219.801
(6) ARIMA(1,0,0)(1,0,1)[12], AIC=10219.806
(7) ARIMA(0,0,2)(0,0,1)[12], AIC=10220.097
(8) ARIMA(0,0,1)(0,0,2)[12], AIC=10220.674
(9) ARIMA(0,0,1)(1,0,1)[12], AIC=10220.678
(10) ARIMA(0,0,0)(0,0,1)[12], AIC=10232.767
(11) ARIMA(1,0,0)(1,0,0)[12], AIC=10310.582
(12) ARIMA(0,0,1)(1,0,0)[12], AIC=10310.665
(13) ARIMA(0,0,1)(0,0,0)[12], AIC=10440.284
(14) ARIMA(1,0,0)(0,0,0)[12], AIC=10440.419
(15) ARIMA(0,0,0)(0,0,0)[12], AIC=10443.104


## **RESULTADOS**

(1) ARIMA(1,0,0)(0,0,1)[12], AIC=10217.831  
(2) ARIMA(0,0,1)(0,0,1)[12], AIC=10218.699  
(3) ARIMA(1,0,1)(0,0,1)[12], AIC=10219.566  
(4) ARIMA(2,0,0)(0,0,1)[12], AIC=10219.591  
(5) ARIMA(1,0,0)(0,0,2)[12], AIC=10219.801  
(6) ARIMA(1,0,0)(1,0,1)[12], AIC=10219.806  
(7) ARIMA(0,0,2)(0,0,1)[12], AIC=10220.097  
(8) ARIMA(0,0,1)(0,0,2)[12], AIC=10220.674  
(9) ARIMA(0,0,1)(1,0,1)[12], AIC=10220.678  
(10) ARIMA(0,0,0)(0,0,1)[12], AIC=10232.767  
(11) ARIMA(1,0,0)(1,0,0)[12], AIC=10310.582  
(12) ARIMA(0,0,1)(1,0,0)[12], AIC=10310.665  
(13) ARIMA(0,0,1)(0,0,0)[12], AIC=10440.284  
(14) ARIMA(1,0,0)(0,0,0)[12], AIC=10440.419  
(15) ARIMA(0,0,0)(0,0,0)[12], AIC=10443.104  