# **ENCONTRAR EL MEJOR ARIMA PARA LA SERIE TRANSFORMADA**

## **Cargar los datos**

In [1]:
import pandas as pd
from sklearn.preprocessing import PowerTransformer
import numpy as np

data=pd.read_csv('MXN00021035.csv')

pre=data.iloc[:,6]  # Precipitacion, es la columna 5
date=data.iloc[:,5] # Date, es la columna 6
date = date.astype(str).str.replace(r'(\d{4})(\d{2})', r'\1/\2', regex=True)    # La fecha está como 195210 y la pasamos a 1952/10 
date = pd.to_datetime(date, format='%Y/%m')                                     # Lo convertimos en fecha
pre = pd.Series(pre.values, index=date)                                         # Creamos una Serie

X = pre.values.reshape(-1, 1)

# Ajuste original para obtener lambda estimado
pt = PowerTransformer(method='yeo-johnson', standardize=False)
ypre = pt.fit_transform(X)
lambda_est = pt.lambdas_[0]

# Partir la serie para train y test
ypre_total = ypre.copy()          # Copia de la serie original

# Todas hasta los ultimos 12 meses
ypre = ypre_total[:-12]           # Entrenamiento: todos menos los últimos 12 meses
ypre_test = ypre_total[-12:]      # Test: últimos 12 meses

## **Encontrar el mejor SARIMA basado en AIC**

In [2]:
import pmdarima as pm

In [3]:
# Encontrar el mejor SARIMA, Basado en ul AIC 
auto_sarima_model = pm.auto_arima(ypre, 
                                  seasonal=True, 
                                  m=12,
                                  stepwise=True, 
                                  suppress_warnings=True, 
                                  trace=True)

Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,0,1)[12] intercept   : AIC=inf, Time=1.31 sec
 ARIMA(0,0,0)(0,0,0)[12] intercept   : AIC=4561.998, Time=0.00 sec
 ARIMA(1,0,0)(1,0,0)[12] intercept   : AIC=4071.776, Time=0.33 sec
 ARIMA(0,0,1)(0,0,1)[12] intercept   : AIC=4190.035, Time=0.18 sec
 ARIMA(0,0,0)(0,0,0)[12]             : AIC=5416.302, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[12] intercept   : AIC=4224.664, Time=0.04 sec
 ARIMA(1,0,0)(2,0,0)[12] intercept   : AIC=4020.549, Time=1.17 sec
 ARIMA(1,0,0)(2,0,1)[12] intercept   : AIC=3985.479, Time=1.48 sec
 ARIMA(1,0,0)(1,0,1)[12] intercept   : AIC=3983.519, Time=0.44 sec
 ARIMA(1,0,0)(0,0,1)[12] intercept   : AIC=4135.332, Time=0.20 sec
 ARIMA(1,0,0)(1,0,2)[12] intercept   : AIC=3985.469, Time=1.32 sec
 ARIMA(1,0,0)(0,0,2)[12] intercept   : AIC=4107.876, Time=0.75 sec
 ARIMA(1,0,0)(2,0,2)[12] intercept   : AIC=inf, Time=2.87 sec
 ARIMA(0,0,0)(1,0,1)[12] intercept   : AIC=4042.135, Time=0.54 sec
 ARIMA(2,0,0)(1,0,1)[12] inte

## **Poner Lindo el Output**

In [4]:
import re

In [5]:
texto = """
Performing stepwise search to minimize aic
 ARIMA(2,0,2)(1,0,1)[12] intercept   : AIC=inf, Time=1.31 sec
 ARIMA(0,0,0)(0,0,0)[12] intercept   : AIC=4561.998, Time=0.00 sec
 ARIMA(1,0,0)(1,0,0)[12] intercept   : AIC=4071.776, Time=0.33 sec
 ARIMA(0,0,1)(0,0,1)[12] intercept   : AIC=4190.035, Time=0.18 sec
 ARIMA(0,0,0)(0,0,0)[12]             : AIC=5416.302, Time=0.00 sec
 ARIMA(1,0,0)(0,0,0)[12] intercept   : AIC=4224.664, Time=0.04 sec
 ARIMA(1,0,0)(2,0,0)[12] intercept   : AIC=4020.549, Time=1.17 sec
 ARIMA(1,0,0)(2,0,1)[12] intercept   : AIC=3985.479, Time=1.48 sec
 ARIMA(1,0,0)(1,0,1)[12] intercept   : AIC=3983.519, Time=0.44 sec
 ARIMA(1,0,0)(0,0,1)[12] intercept   : AIC=4135.332, Time=0.20 sec
 ARIMA(1,0,0)(1,0,2)[12] intercept   : AIC=3985.469, Time=1.32 sec
 ARIMA(1,0,0)(0,0,2)[12] intercept   : AIC=4107.876, Time=0.75 sec
 ARIMA(1,0,0)(2,0,2)[12] intercept   : AIC=inf, Time=2.87 sec
 ARIMA(0,0,0)(1,0,1)[12] intercept   : AIC=4042.135, Time=0.54 sec
 ARIMA(2,0,0)(1,0,1)[12] intercept   : AIC=3979.726, Time=0.45 sec
 ARIMA(2,0,0)(0,0,1)[12] intercept   : AIC=4137.331, Time=0.34 sec
 ARIMA(2,0,0)(1,0,0)[12] intercept   : AIC=4068.635, Time=0.37 sec
 ARIMA(2,0,0)(2,0,1)[12] intercept   : AIC=3981.660, Time=1.83 sec
 ARIMA(2,0,0)(1,0,2)[12] intercept   : AIC=3981.646, Time=1.73 sec
 ARIMA(2,0,0)(0,0,0)[12] intercept   : AIC=4217.832, Time=0.08 sec
 ARIMA(2,0,0)(0,0,2)[12] intercept   : AIC=4107.673, Time=1.12 sec
 ARIMA(2,0,0)(2,0,0)[12] intercept   : AIC=4015.043, Time=1.53 sec
 ARIMA(2,0,0)(2,0,2)[12] intercept   : AIC=inf, Time=3.05 sec
 ARIMA(3,0,0)(1,0,1)[12] intercept   : AIC=3976.458, Time=0.96 sec
 ARIMA(3,0,0)(0,0,1)[12] intercept   : AIC=4103.753, Time=0.28 sec
 ARIMA(3,0,0)(1,0,0)[12] intercept   : AIC=4060.689, Time=0.56 sec
 ARIMA(3,0,0)(2,0,1)[12] intercept   : AIC=3978.409, Time=3.41 sec
 ARIMA(3,0,0)(1,0,2)[12] intercept   : AIC=3978.399, Time=2.46 sec
 ARIMA(3,0,0)(0,0,0)[12] intercept   : AIC=4157.272, Time=0.08 sec
 ARIMA(3,0,0)(0,0,2)[12] intercept   : AIC=4082.936, Time=1.37 sec
 ARIMA(3,0,0)(2,0,0)[12] intercept   : AIC=4011.744, Time=2.45 sec
 ARIMA(3,0,0)(2,0,2)[12] intercept   : AIC=inf, Time=4.61 sec
 ARIMA(4,0,0)(1,0,1)[12] intercept   : AIC=3963.079, Time=1.29 sec
 ARIMA(4,0,0)(0,0,1)[12] intercept   : AIC=4051.497, Time=0.52 sec
 ARIMA(4,0,0)(1,0,0)[12] intercept   : AIC=4034.057, Time=0.91 sec
 ARIMA(4,0,0)(2,0,1)[12] intercept   : AIC=3964.900, Time=4.70 sec
 ARIMA(4,0,0)(1,0,2)[12] intercept   : AIC=3965.097, Time=3.39 sec
 ARIMA(4,0,0)(0,0,0)[12] intercept   : AIC=4082.110, Time=0.09 sec
 ARIMA(4,0,0)(0,0,2)[12] intercept   : AIC=4038.271, Time=1.59 sec
 ARIMA(4,0,0)(2,0,0)[12] intercept   : AIC=3998.322, Time=3.01 sec
 ARIMA(4,0,0)(2,0,2)[12] intercept   : AIC=inf, Time=4.49 sec
 ARIMA(5,0,0)(1,0,1)[12] intercept   : AIC=3969.811, Time=1.97 sec
 ARIMA(4,0,1)(1,0,1)[12] intercept   : AIC=3965.197, Time=1.51 sec
 ARIMA(3,0,1)(1,0,1)[12] intercept   : AIC=inf, Time=1.34 sec
 ARIMA(5,0,1)(1,0,1)[12] intercept   : AIC=inf, Time=2.20 sec
 ARIMA(4,0,0)(1,0,1)[12]             : AIC=4001.623, Time=0.70 sec

Best model:  ARIMA(4,0,0)(1,0,1)[12] intercept
Total fit time: 65.080 seconds
"""

In [6]:
def limpiar_linea(linea):
    # Eliminar la palabra "intercept" con espacios alrededor
    linea = re.sub(r'\s*intercept\s*', ' ', linea)
    # Eliminar la parte ", Time=x.xx sec"
    linea = re.sub(r',\s*Time=\d+\.\d+\s*sec', '', linea)
    # Limpiar espacios extras
    linea = re.sub(r'            ', '', linea)
    # Reemplazar : por una coma
    linea = re.sub(r'] :', '],', linea)
    # Si la línea contiene "inf", eliminarla
    if 'inf' in linea:
        return ''
    return linea.strip()

# Procesar todas las líneas
lineas = texto.strip().split('\n')
lineas_limpias = [limpiar_linea(linea) for linea in lineas]

resultado = "\n".join(lineas_limpias)
print(resultado)


Performing stepwise search to minimize aic

ARIMA(0,0,0)(0,0,0)[12], AIC=4561.998
ARIMA(1,0,0)(1,0,0)[12], AIC=4071.776
ARIMA(0,0,1)(0,0,1)[12], AIC=4190.035
ARIMA(0,0,0)(0,0,0)[12], AIC=5416.302
ARIMA(1,0,0)(0,0,0)[12], AIC=4224.664
ARIMA(1,0,0)(2,0,0)[12], AIC=4020.549
ARIMA(1,0,0)(2,0,1)[12], AIC=3985.479
ARIMA(1,0,0)(1,0,1)[12], AIC=3983.519
ARIMA(1,0,0)(0,0,1)[12], AIC=4135.332
ARIMA(1,0,0)(1,0,2)[12], AIC=3985.469
ARIMA(1,0,0)(0,0,2)[12], AIC=4107.876

ARIMA(0,0,0)(1,0,1)[12], AIC=4042.135
ARIMA(2,0,0)(1,0,1)[12], AIC=3979.726
ARIMA(2,0,0)(0,0,1)[12], AIC=4137.331
ARIMA(2,0,0)(1,0,0)[12], AIC=4068.635
ARIMA(2,0,0)(2,0,1)[12], AIC=3981.660
ARIMA(2,0,0)(1,0,2)[12], AIC=3981.646
ARIMA(2,0,0)(0,0,0)[12], AIC=4217.832
ARIMA(2,0,0)(0,0,2)[12], AIC=4107.673
ARIMA(2,0,0)(2,0,0)[12], AIC=4015.043

ARIMA(3,0,0)(1,0,1)[12], AIC=3976.458
ARIMA(3,0,0)(0,0,1)[12], AIC=4103.753
ARIMA(3,0,0)(1,0,0)[12], AIC=4060.689
ARIMA(3,0,0)(2,0,1)[12], AIC=3978.409
ARIMA(3,0,0)(1,0,2)[12], AIC=3978.399
ARIM

In [7]:
texto = """
ARIMA(0,0,0)(0,0,0)[12], AIC=4561.998
ARIMA(1,0,0)(1,0,0)[12], AIC=4071.776
ARIMA(0,0,1)(0,0,1)[12], AIC=4190.035
ARIMA(0,0,0)(0,0,0)[12], AIC=5416.302
ARIMA(1,0,0)(0,0,0)[12], AIC=4224.664
ARIMA(1,0,0)(2,0,0)[12], AIC=4020.549
ARIMA(1,0,0)(2,0,1)[12], AIC=3985.479
ARIMA(1,0,0)(1,0,1)[12], AIC=3983.519
ARIMA(1,0,0)(0,0,1)[12], AIC=4135.332
ARIMA(1,0,0)(1,0,2)[12], AIC=3985.469
ARIMA(1,0,0)(0,0,2)[12], AIC=4107.876
ARIMA(0,0,0)(1,0,1)[12], AIC=4042.135
ARIMA(2,0,0)(1,0,1)[12], AIC=3979.726
ARIMA(2,0,0)(0,0,1)[12], AIC=4137.331
ARIMA(2,0,0)(1,0,0)[12], AIC=4068.635
ARIMA(2,0,0)(2,0,1)[12], AIC=3981.660
ARIMA(2,0,0)(1,0,2)[12], AIC=3981.646
ARIMA(2,0,0)(0,0,0)[12], AIC=4217.832
ARIMA(2,0,0)(0,0,2)[12], AIC=4107.673
ARIMA(2,0,0)(2,0,0)[12], AIC=4015.043
ARIMA(3,0,0)(1,0,1)[12], AIC=3976.458
ARIMA(3,0,0)(0,0,1)[12], AIC=4103.753
ARIMA(3,0,0)(1,0,0)[12], AIC=4060.689
ARIMA(3,0,0)(2,0,1)[12], AIC=3978.409
ARIMA(3,0,0)(1,0,2)[12], AIC=3978.399
ARIMA(3,0,0)(0,0,0)[12], AIC=4157.272
ARIMA(3,0,0)(0,0,2)[12], AIC=4082.936
ARIMA(3,0,0)(2,0,0)[12], AIC=4011.744
ARIMA(4,0,0)(1,0,1)[12], AIC=3963.079
ARIMA(4,0,0)(0,0,1)[12], AIC=4051.497
ARIMA(4,0,0)(1,0,0)[12], AIC=4034.057
ARIMA(4,0,0)(2,0,1)[12], AIC=3964.900
ARIMA(4,0,0)(1,0,2)[12], AIC=3965.097
ARIMA(4,0,0)(0,0,0)[12], AIC=4082.110
ARIMA(4,0,0)(0,0,2)[12], AIC=4038.271
ARIMA(4,0,0)(2,0,0)[12], AIC=3998.322
ARIMA(5,0,0)(1,0,1)[12], AIC=3969.811
ARIMA(4,0,1)(1,0,1)[12], AIC=3965.197
ARIMA(4,0,0)(1,0,1)[12], AIC=4001.623
"""

In [8]:
def extraer_aic(linea):
    match = re.search(r'AIC=([0-9]+\.[0-9]+)', linea)
    return float(match.group(1)) if match else float('inf')

def extraer_modelo(linea):
    match = re.match(r'(ARIMA\([^)]+\)\([^)]+\)\[\d+\])', linea)
    return match.group(1) if match else None

# Limpiar líneas vacías
lineas = [linea.strip() for linea in texto.strip().split('\n') if linea.strip()]

# Diccionario para eliminar duplicados (se queda con el primero que aparece)
modelos_unicos = {}
for linea in lineas:
    modelo = extraer_modelo(linea)
    if modelo and modelo not in modelos_unicos:
        modelos_unicos[modelo] = linea

# Extraer líneas únicas
lineas_unicas = list(modelos_unicos.values())

# Ordenar por AIC
lineas_ordenadas = sorted(lineas_unicas, key=extraer_aic)

# Añadir índice
lineas_indexadas = [f"({i+1}) {linea}" for i, linea in enumerate(lineas_ordenadas)]

# Resultado final
resultado = "\n".join(lineas_indexadas)
print(resultado)


(1) ARIMA(4,0,0)(1,0,1)[12], AIC=3963.079
(2) ARIMA(4,0,0)(2,0,1)[12], AIC=3964.900
(3) ARIMA(4,0,0)(1,0,2)[12], AIC=3965.097
(4) ARIMA(4,0,1)(1,0,1)[12], AIC=3965.197
(5) ARIMA(5,0,0)(1,0,1)[12], AIC=3969.811
(6) ARIMA(3,0,0)(1,0,1)[12], AIC=3976.458
(7) ARIMA(3,0,0)(1,0,2)[12], AIC=3978.399
(8) ARIMA(3,0,0)(2,0,1)[12], AIC=3978.409
(9) ARIMA(2,0,0)(1,0,1)[12], AIC=3979.726
(10) ARIMA(2,0,0)(1,0,2)[12], AIC=3981.646
(11) ARIMA(2,0,0)(2,0,1)[12], AIC=3981.660
(12) ARIMA(1,0,0)(1,0,1)[12], AIC=3983.519
(13) ARIMA(1,0,0)(1,0,2)[12], AIC=3985.469
(14) ARIMA(1,0,0)(2,0,1)[12], AIC=3985.479
(15) ARIMA(4,0,0)(2,0,0)[12], AIC=3998.322
(16) ARIMA(3,0,0)(2,0,0)[12], AIC=4011.744
(17) ARIMA(2,0,0)(2,0,0)[12], AIC=4015.043
(18) ARIMA(1,0,0)(2,0,0)[12], AIC=4020.549
(19) ARIMA(4,0,0)(1,0,0)[12], AIC=4034.057
(20) ARIMA(4,0,0)(0,0,2)[12], AIC=4038.271
(21) ARIMA(0,0,0)(1,0,1)[12], AIC=4042.135
(22) ARIMA(4,0,0)(0,0,1)[12], AIC=4051.497
(23) ARIMA(3,0,0)(1,0,0)[12], AIC=4060.689
(24) ARIMA(2,0,0)(1,

## **RESULTADOS**

(1) ARIMA(4,0,0)(1,0,1)[12], AIC=3963.079  
(2) ARIMA(4,0,0)(2,0,1)[12], AIC=3964.900  
(3) ARIMA(4,0,0)(1,0,2)[12], AIC=3965.097  
(4) ARIMA(4,0,1)(1,0,1)[12], AIC=3965.197  
(5) ARIMA(5,0,0)(1,0,1)[12], AIC=3969.811  
(6) ARIMA(3,0,0)(1,0,1)[12], AIC=3976.458  
(7) ARIMA(3,0,0)(1,0,2)[12], AIC=3978.399  
(8) ARIMA(3,0,0)(2,0,1)[12], AIC=3978.409  
(9) ARIMA(2,0,0)(1,0,1)[12], AIC=3979.726  
(10) ARIMA(2,0,0)(1,0,2)[12], AIC=3981.646  
(11) ARIMA(2,0,0)(2,0,1)[12], AIC=3981.660  
(12) ARIMA(1,0,0)(1,0,1)[12], AIC=3983.519  
(13) ARIMA(1,0,0)(1,0,2)[12], AIC=3985.469  
(14) ARIMA(1,0,0)(2,0,1)[12], AIC=3985.479  
(15) ARIMA(4,0,0)(2,0,0)[12], AIC=3998.322  
(16) ARIMA(3,0,0)(2,0,0)[12], AIC=4011.744  
(17) ARIMA(2,0,0)(2,0,0)[12], AIC=4015.043  
(18) ARIMA(1,0,0)(2,0,0)[12], AIC=4020.549  
(19) ARIMA(4,0,0)(1,0,0)[12], AIC=4034.057  
(20) ARIMA(4,0,0)(0,0,2)[12], AIC=4038.271  
(21) ARIMA(0,0,0)(1,0,1)[12], AIC=4042.135  
(22) ARIMA(4,0,0)(0,0,1)[12], AIC=4051.497  
(23) ARIMA(3,0,0)(1,0,0)[12], AIC=4060.689  
(24) ARIMA(2,0,0)(1,0,0)[12], AIC=4068.635  
(25) ARIMA(1,0,0)(1,0,0)[12], AIC=4071.776  
(26) ARIMA(4,0,0)(0,0,0)[12], AIC=4082.110  
(27) ARIMA(3,0,0)(0,0,2)[12], AIC=4082.936  
(28) ARIMA(3,0,0)(0,0,1)[12], AIC=4103.753  
(29) ARIMA(2,0,0)(0,0,2)[12], AIC=4107.673  
(30) ARIMA(1,0,0)(0,0,2)[12], AIC=4107.876  
(31) ARIMA(1,0,0)(0,0,1)[12], AIC=4135.332  
(32) ARIMA(2,0,0)(0,0,1)[12], AIC=4137.331  
(33) ARIMA(3,0,0)(0,0,0)[12], AIC=4157.272  
(34) ARIMA(0,0,1)(0,0,1)[12], AIC=4190.035  
(35) ARIMA(2,0,0)(0,0,0)[12], AIC=4217.832  
(36) ARIMA(1,0,0)(0,0,0)[12], AIC=4224.664  
(37) ARIMA(0,0,0)(0,0,0)[12], AIC=4561.998  