In [16]:
from pmdarima import auto_arima
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Load test and train data
train = pd.read_csv('../data/out/buffalo_train.csv', dtype={'count': float})
valid = pd.read_csv('../data/out/buffalo_test.csv', dtype={'count': float})

# Convert 'week_start' column to datetime in train dataframe
train['week_start'] = pd.to_datetime(train['week_start'])

# Convert 'week_start' column to datetime in valid dataframe
valid['week_start'] = pd.to_datetime(valid['week_start'])

# Define parameter grid
p_values = range(1, 3)  # Example range for p
d_values = range(1, 2)  # Example range for d
q_values = range(1, 3)  # Example range for q
P_values = range(0, 2)  # Example range for P
D_values = range(0, 2)  # Example range for D
Q_values = range(0, 2)  # Example range for Q
m = 12  # Example seasonal period

# Initialize evaluation metrics lists
evaluation_metrics = []

# Iterate over parameter combinations
for p in p_values:
    for d in d_values:
        for q in q_values:
            for P in P_values:
                for D in D_values:
                    for Q in Q_values:
                        # Train SARIMA model
                        try:
                            model = auto_arima(train['count'], start_p=p, start_q=q,
                                                max_p=3, max_q=3, m=m,
                                                start_P=P, seasonal=True,
                                                d=d, D=D, trace=True,
                                                error_action='ignore',  
                                                suppress_warnings=True, 
                                                stepwise=True)
                            
                            # Generate forecast
                            forecast = model.predict(n_periods=len(valid))
                            
                            # Compute evaluation metrics
                            mse = mean_squared_error(valid['count'], forecast)
                            mae = mean_absolute_error(valid['count'], forecast)
                            rmse = np.sqrt(mse)
                            
                            # Store evaluation metrics along with model parameters
                            evaluation_metrics.append({
                                'p': p, 'd': d, 'q': q,
                                'P': P, 'D': D, 'Q': Q,
                                'MSE': mse, 'MAE': mae, 'RMSE': rmse
                            })
                            
                            print("Model (p={}, d={}, q={}, P={}, D={}, Q={}):".format(p, d, q, P, D, Q))
                            print("Mean Squared Error (MSE):", mse)
                            print("Mean Absolute Error (MAE):", mae)
                            print("Root Mean Squared Error (RMSE):", rmse)
                            print()
                            
                        except Exception as e:
                            print("Error occurred for model (p={}, d={}, q={}, P={}, D={}, Q={}):".format(p, d, q, P, D, Q))
                            print(e)
                            print()

# Find the best-performing model based on evaluation metrics
best_model = min(evaluation_metrics, key=lambda x: x['MSE'])  # Change to the metric you prefer
print("Best Model Parameters:", best_model)
'''
In this example, I've created a grid of SARIMA parameters (p, d, q, P, D, Q) and iterated over all possible combinations. 
For each combination, the SARIMA model is trained, and evaluation metrics (MSE, MAE, RMSE) are computed using the validation data. 
Finally, the best-performing model is selected based on the minimum MSE, but you can change it to the metric you prefer.

Please note that training SARIMA models with multiple parameter combinations can be computationally expensive, especially for large datasets or a large grid of parameters. 
You may need to adjust the parameter grid and consider parallelization or distributed computing if necessary.
'''

Performing stepwise search to minimize aic
 ARIMA(1,1,1)(0,0,1)[12] intercept   : AIC=6848.255, Time=0.95 sec
 ARIMA(0,1,0)(0,0,0)[12] intercept   : AIC=6924.081, Time=0.02 sec
 ARIMA(1,1,0)(1,0,0)[12] intercept   : AIC=6870.508, Time=0.09 sec
 ARIMA(0,1,1)(0,0,1)[12] intercept   : AIC=6848.428, Time=0.30 sec
 ARIMA(0,1,0)(0,0,0)[12]             : AIC=6922.288, Time=0.01 sec
 ARIMA(1,1,1)(0,0,0)[12] intercept   : AIC=6847.092, Time=0.18 sec
 ARIMA(1,1,1)(1,0,0)[12] intercept   : AIC=6848.328, Time=0.51 sec
 ARIMA(1,1,1)(1,0,1)[12] intercept   : AIC=6850.767, Time=0.53 sec
 ARIMA(0,1,1)(0,0,0)[12] intercept   : AIC=6847.124, Time=0.09 sec
 ARIMA(1,1,0)(0,0,0)[12] intercept   : AIC=6869.082, Time=0.03 sec
 ARIMA(2,1,1)(0,0,0)[12] intercept   : AIC=6848.716, Time=0.27 sec
 ARIMA(1,1,2)(0,0,0)[12] intercept   : AIC=6848.507, Time=0.17 sec
 ARIMA(0,1,2)(0,0,0)[12] intercept   : AIC=6847.295, Time=0.16 sec
 ARIMA(2,1,0)(0,0,0)[12] intercept   : AIC=6854.480, Time=0.03 sec
 ARIMA(2,1,2)(0,0,0

Non-seasonal ARIMA parameters:

p: 1
d: 1
q: 1
Seasonal ARIMA parameters:

P: 0
D: 0
Q: 0

These parameters define the SARIMA model that yielded the lowest MSE (Mean Squared Error) among the models evaluated. Here's what each parameter means:

p, d, q: Non-seasonal ARIMA parameters specifying the autoregressive order, differencing order, and moving average order, respectively.

P, D, Q: Seasonal ARIMA parameters specifying the seasonal autoregressive order, seasonal differencing order, and seasonal moving average order, respectively.

MSE: Mean Squared Error, which measures the average of the squares of the errors between predicted and actual values. In this case, it's approximately 1830343.178.

MAE: Mean Absolute Error, which measures the average of the absolute errors between predicted and actual values. In this case, it's approximately 1300.873.

RMSE: Root Mean Squared Error, which is the square root of the MSE and measures the average magnitude of the errors in the predicted values. In this case, it's approximately 1352.902.

These parameters and evaluation metrics provide insights into the SARIMA model's performance and can be used for further analysis and comparison with other models.







In [5]:
from pmdarima import auto_arima
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# Load test and train data
train = pd.read_csv('../data/out/buffalo_train.csv', dtype={'count': float})
valid = pd.read_csv('../data/out/buffalo_test.csv', dtype={'count': float})

# Convert 'week_start' column to datetime in train dataframe
train['week_start'] = pd.to_datetime(train['week_start'])

# Convert 'week_start' column to datetime in valid dataframe
valid['week_start'] = pd.to_datetime(valid['week_start'])

# Define parameter grid
p_values = [1]  # Include 1 for p
d_values = [1]  # Include 1 for d
q_values = [1]  # Include 1 for q
P_values = [1]  # Include 1 for P
D_values = [1]  # Include 1 for D
Q_values = [1]  # Include 1 for Q
m = 12  # Example seasonal period

# Initialize evaluation metrics lists
evaluation_metrics = []

# Iterate over parameter combinations
for p in p_values:
    for d in d_values:
        for q in q_values:
            for P in P_values:
                for D in D_values:
                    for Q in Q_values:
                        # Train SARIMA model
                        try:
                            model = auto_arima(train['count'], start_p=p, start_q=q,
                                                max_p=3, max_q=3, m=m,
                                                start_P=P, seasonal=True,
                                                d=d, D=D, trace=True,
                                                error_action='ignore',  
                                                suppress_warnings=True, 
                                                stepwise=True)
                            
                            # Generate forecast
                            forecast = model.predict(n_periods=len(valid))
                            
                            # Compute evaluation metrics
                            mse = mean_squared_error(valid['count'], forecast)
                            mae = mean_absolute_error(valid['count'], forecast)
                            rmse = np.sqrt(mse)
                            
                            # Store evaluation metrics along with model parameters
                            evaluation_metrics.append({
                                'p': p, 'd': d, 'q': q,
                                'P': P, 'D': D, 'Q': Q,
                                'MSE': mse, 'MAE': mae, 'RMSE': rmse
                            })
                            
                            print("Model (p={}, d={}, q={}, P={}, D={}, Q={}):".format(p, d, q, P, D, Q))
                            print("Mean Squared Error (MSE):", mse)
                            print("Mean Absolute Error (MAE):", mae)
                            print("Root Mean Squared Error (RMSE):", rmse)
                            print()
                            
                        except Exception as e:
                            print("Error occurred for model (p={}, d={}, q={}, P={}, D={}, Q={}):".format(p, d, q, P, D, Q))
                            print(e)
                            print()

# Find the best-performing model based on evaluation metrics
best_model = min(evaluation_metrics, key=lambda x: x['MSE'])  # Change to the metric you prefer
print("Best Model Parameters:", best_model)


Performing stepwise search to minimize aic
 ARIMA(1,1,1)(1,1,1)[12]             : AIC=inf, Time=1.99 sec
 ARIMA(0,1,0)(0,1,0)[12]             : AIC=7052.582, Time=0.04 sec
 ARIMA(1,1,0)(1,1,0)[12]             : AIC=6867.265, Time=0.38 sec
 ARIMA(0,1,1)(0,1,1)[12]             : AIC=inf, Time=1.46 sec
 ARIMA(1,1,0)(0,1,0)[12]             : AIC=6985.200, Time=0.03 sec
 ARIMA(1,1,0)(2,1,0)[12]             : AIC=6822.705, Time=0.82 sec
 ARIMA(1,1,0)(2,1,1)[12]             : AIC=inf, Time=3.10 sec
 ARIMA(1,1,0)(1,1,1)[12]             : AIC=inf, Time=1.40 sec
 ARIMA(0,1,0)(2,1,0)[12]             : AIC=6883.265, Time=0.26 sec
 ARIMA(2,1,0)(2,1,0)[12]             : AIC=6810.113, Time=0.89 sec
 ARIMA(2,1,0)(1,1,0)[12]             : AIC=6857.513, Time=0.43 sec
 ARIMA(2,1,0)(2,1,1)[12]             : AIC=inf, Time=3.05 sec
 ARIMA(2,1,0)(1,1,1)[12]             : AIC=inf, Time=1.11 sec
 ARIMA(3,1,0)(2,1,0)[12]             : AIC=6811.778, Time=0.92 sec
 ARIMA(2,1,1)(2,1,0)[12]             : AIC=inf, T

In [6]:
# Best Model Parameters: {'p': 1, 'd': 1, 'q': 1, 'P': 1, 'D': 1, 'Q': 1, 'MSE': 6811786.009614338, 'MAE': 2416.1465494569643, 'RMSE': 2609.939847891966}
# Best Model Parameters: {'p': 1, 'd': 1, 'q': 1, 'P': 0, 'D': 0, 'Q': 0, 'MSE': 1830343.1783731617, 'MAE': 1300.8733546952224, 'RMSE': 1352.9017622773508}