In [1]:
# Stationarity Check

def test_stationarity(dataFrame, var, window = 12):

    dataFrame['rollMean'] = dataFrame[var].rolling(window = window).mean()
    dataFrame['rollStd'] = dataFrame[var].rolling(window = window).std()
    adf_result = adfuller(dataFrame[var])
    p_value = adf_result[1]

    print(f"ADF p-value: {p_value:.4f}")

    if p_value <= 0.05:
        print("The time series is stationary (reject H0).")
    else:
        print("The time series is not stationary (fail to reject H0).")

    plt.figure(figsize = (10, 5))
    sns.lineplot(x = dataFrame.index, y = dataFrame[var], label = 'Original')
    sns.lineplot(x = dataFrame.index, y = dataFrame['rollMean'], label = 'Rolling Mean')
    sns.lineplot(x = dataFrame.index, y = dataFrame['rollStd'], label = 'Rolling Std')
    plt.title("Rolling Statistics")
    plt.legend()
    plt.show()

In [4]:
# Test stationarity

test_stationarity(airP, 'passengers')

# Not stationary ... Try with diff of 1

airP['shift'] = airP.passengers.shift(1)
airP['shiftDiff'] = airP['passengers'] - airP['shift']
print(airP.head(20))
test_stationarity(airP.dropna(), 'shiftDiff')

# Since shiftdiff of 1 is not working, let us try with 2

airP['shift'] = airP.passengers.shift(2)
airP['shiftDiff'] = airP['passengers'] - airP['shift']
print(airP.head(20))
test_stationarity(airP.dropna(), 'shiftDiff')

In [5]:
# For ACF - Differencing of 1 and 12

airP['firstDiff'] = airP['passengers'].diff()
airP['Diff12'] = airP['passengers'].diff(12)

In [7]:
# PACF and ACF Plots

plot_pacf(airP['firstDiff'].dropna(), lags = 20)
plt.show()

In [11]:
plot_acf(airP['firstDiff'].dropna(), lags = 20)
plt.show()

In [10]:
# Train-test split

train_size = int(len(airP) * 0.7)
train = airP.iloc[:train_size]
train = airP.iloc[train_size:]

In [None]:
# ARIMA MODEL
model_arima = ARIMA(train['passenger'], order=(1,2,1))
model_arima_fit = model_arima.fit()
arima_pred = model_arima_fit.pred(start = len(train), end = len(airP) - 1)

In [None]:
# Add ARIMA prediction to dataframe

airP['arimaPred'] = np.nan
airP.iloc[train_size:, airP.columns.get_loc('arimaPred')] = arima_pred.values
print(airP.tail())

In [None]:
# To calculate P and Q for SARIMAX
airP['diff_combined'] = airP['passengers'].diff(2).diff(12)

In [None]:
plot_pacf(airP['diff_combined'].dropna(), lags = 50)
plt.title("PACF - Seasonal Differencing")
plt.show()

In [None]:
plot_acf(airP['diff_combined'].dropna(), lags = 50)
plt.title("ACF = Seasonal Differencing")
plt.show()

In [None]:
model_sarimax = SARIMAX(train['passengers'], order = (1, 2, 1), seasonal_order = (1, 2, 1, 12))
model_sarimax_fit = model_sarimax.fit()
sarimax_pred = model_sarimax_fit.predict(start = len(train), end = len(airP) - 1)

In [None]:
# Add SARIMAX prediction to dataframe

airP['sarimaxPred'] = np.nan
airP.iloc[train_size:, airP.columns.get_loc('sarimaxPred')] = sarimax_pred.values
print(airP.tail(20))

In [None]:
# Plot Predictions

plt.figure(figsize = (10, 5))
sns.lineplot(x = airP.index, y = airP['passengers'], label = 'Actual Passengers')
sns.lineplot(x = airP.index, y = airP['arimaPred'], label = 'ARIMA Prediction')
sns.lineplot(x = airP.index, y = airP['sarimaxPred'], label = 'SARIMAX Prediction')
plt.title('Actual vs Predicted (ARIMA and SARIMAX)')
plt.legend()
plt.show()

In [None]:
# Future Forecast using SARIMAX

future_dates = pd.DataFrame(pd.date_range(start = '1961-01-01', end = '1962-12-01', freq = 'MS'), columns = ['Dates'])
future_dates.set_index('Dates', inplace = True)

In [None]:
future_forecast = model_sarimax_fit.predict(start = future_dates.index[0], end = future_dates.index[0], end = future_dates.index[-1])
print(future_forecast)

In [None]:
# Plot Future Forecast

plt.figure(figsize = (10, 5))
sns.lineplot(x = airP.index

In [None]:
Model Evaluation
airP = airP.dropna()

In [None]:
# ARIMA Metrics

arima_mae = mean_absolute_error(airP['passengers'], airP['arimaPred'])
arima_mse = mean_squared_error(airP['passengers'], airP['arimaPred'])
arima_rmse = math.sqrt(arima_mse)
arima_r2 = r2_score(airP['passengers'], airP['arimaPred'])
print(f"ARIMA -> MAE: {arima_mae:.2f}, RMSE: {arima_rmse:.2f}, R^2: {arima_r2:.2f}")

In [None]:
# SARIMAX Metrics

sarimax_mae = mean_absolute_error(airP['passengers'], airP['sarimaxPred'])
sarimax_mse = mean_squared_error(airP['passengers'], airP['sarimaxPred'])
sarimax_rmse = math.sqrt(sarimax_mse)
sarimax_r2 = r2_score(airP['passengers'], airP['sarimaxPred'])
print(f"SARIMAX -> MAE: {sarimax_mae:.2f}, RMSE: {sarimax_rmse:.2f}, R^2: {sarimax_r2:.2f}")

In [None]:
# 1. Perform Time Series Analysis on monthly-cola-production-in-austr.csv dataset.
# 2. Perform Time Series Analysis on gold_price_data.csv dataset.