In [13]:
import warnings
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from sklearn.model_selection import train_test_split


In [14]:
# Load time series
dataset_name = "co2_emissions_coal"
dataset_folder = "datasets"
value_column = "Emissions [MMT]"

df = pd.read_csv(Path(dataset_folder) / dataset_name / f"{dataset_name}.csv", index_col="YYYYMM") 
ts = df[value_column]
ts.index = pd.to_datetime(ts.index)

# Split the series into training and test sets
train, test = train_test_split(ts, test_size=0.3, shuffle=False)

In [21]:
def grid_search_arima(train_data, p_values, d_values, q=1):
    """Perform a grid search for ARIMA parameters and return the best model."""
    warnings.filterwarnings("ignore")
    results = []

    # Loop over p and d values
    for p in p_values:
        for d in d_values:
            try:
                # Fit the ARIMA model
                model = sm.tsa.ARIMA(train_data, order=(p, d, q))
                model_fit = model.fit()
                # Collect the log likelihood
                log_likelihood = model_fit.llf
                results.append((p, d, log_likelihood))
            except Exception as e:
                print(f"Failed to fit ARIMA({p}, {d}, {q}): {e}")
                results.append((p, d, -float('inf')))

    # Convert results to a DataFrame
    results_df = pd.DataFrame(results, columns=['p', 'd', 'log_likelihood'])

    # Find the best model
    best_result = results_df.loc[results_df['log_likelihood'].idxmax()]
    best_p = int(best_result['p'])
    best_d = int(best_result['d'])
    best_model = sm.tsa.ARIMA(train_data, order=(best_p, best_d, q)).fit()
    print(best_model.summary())
    
    return best_model

In [22]:
arima = grid_search_arima(train, range(0, 5), range(0, 5))

                               SARIMAX Results                                
Dep. Variable:        Emissions [MMT]   No. Observations:                  366
Model:                 ARIMA(4, 1, 1)   Log Likelihood               -1260.634
Date:                Tue, 11 Feb 2025   AIC                           2533.267
Time:                        18:46:30   BIC                           2556.667
Sample:                    01-01-1973   HQIC                          2542.567
                         - 06-01-2003                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.5904      0.093     -6.321      0.000      -0.773      -0.407
ar.L2         -0.2838      0.040     -7.020      0.000      -0.363      -0.205
ar.L3         -0.5700      0.062     -9.166      0.0