In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
pip install pmdarima

In [None]:
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import seaborn as sns

In [None]:
data=pd.read_csv("/kaggle/input/air-passenger-data-for-time-series-analysis/AirPassengers.csv")

In [None]:
data.info()

In [None]:
data['Month']=pd.to_datetime(data['Month'],format='%Y-%m')

In [None]:
msk=(data.index<len(data)-30)
train = data[msk]
test = data[~msk]
print(train)

In [None]:
data.set_index("Month",inplace=True)
train.set_index("Month",inplace=True)
test.set_index("Month",inplace=True)

In [None]:
data.head()

In [None]:
data.info()

In [None]:
sns.lineplot(train,x='Month',y='#Passengers')
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plot_acf(train, lags=20)
plt.xlabel('Lag')
plt.ylabel('Autocorrelation')
plt.title('Autocorrelation Function (ACF)')
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plot_pacf(train, lags=20)
plt.xlabel('Lag')
plt.ylabel('Partial Autocorrelation')
plt.title('Partial Autocorrelation Function (PACF)')
plt.show()

In [None]:
from statsmodels.tsa.stattools import adfuller
adf_test=adfuller(train)
print(f'p-value {adf_test[1]}')

In [None]:
sns.lineplot(train.diff(),x='Month',y='#Passengers')
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plot_acf(train.diff().dropna(), lags=20)
plt.xlabel('Lag')
plt.ylabel('Autocorrelation')
plt.title('Autocorrelation Function (ACF)')
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plot_pacf(train.diff().dropna(), lags=20)
plt.xlabel('Lag')
plt.ylabel('Partial Autocorrelation')
plt.title('Partial Autocorrelation Function (PACF)')
plt.show()

In [None]:
dataf=train.diff().dropna()
adf_test=adfuller(dataf)
print(f'p-value {adf_test[1]}')

In [None]:
sns.lineplot(train.diff().diff(),x='Month',y='#Passengers')
plt.show()

In [None]:
dataf=train.diff().diff().dropna()
adf_test=adfuller(dataf)
print(f'p-value {adf_test[1]}')

In [None]:
plt.figure(figsize=(12, 6))
plot_acf(train.diff().diff().dropna(), lags=20)
plt.xlabel('Lag')
plt.ylabel('Autocorrelation')
plt.title('Autocorrelation Function (ACF)')
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plot_pacf(train.diff().diff().dropna(), lags=20)
plt.xlabel('Lag')
plt.ylabel('Partial Autocorrelation')
plt.title('Partial Autocorrelation Function (PACF)')
plt.show()

In [None]:
from statsmodels.tsa.arima.model import ARIMA
model=ARIMA(train,order=(1,2,2))
model_fit=model.fit()
print(model_fit.summary())

In [None]:
residules=model_fit.resid[1:]
fig,ax=plt.subplots(1,2)
residules.plot(title='residual',ax=ax[0])
residules.plot(title='density',kind='kde',ax=ax[1])
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plot_acf(residules, lags=20)
plt.xlabel('Lag')
plt.ylabel('Autocorrelation')
plt.title('Autocorrelation Function (ACF)')
plt.show()

In [None]:
plt.figure(figsize=(12, 6))
plot_pacf(residules, lags=20)
plt.xlabel('Lag')
plt.ylabel('Partial Autocorrelation')
plt.title('Partial Autocorrelation Function (PACF)')
plt.show()

In [None]:
forecast_test=model_fit.forecast(len(test))
data['forecast manual']=[None]*len(train)+list(forecast_test)
data.plot()

In [None]:
import pmdarima as pm
auto_predict=pm.auto_arima(train,stepwise=False,seasonal=False)
auto_predict.summary()

In [None]:
forecast_test_auto=auto_predict.predict(n_periods=len(test))
data['forecast_Auto']=[None]*len(train)+list(forecast_test_auto)
data.plot()

In [None]:
from sklearn.metrics import mean_absolute_error,mean_absolute_percentage_error,mean_squared_error
mae=mean_absolute_error(test,forecast_test)
mape=mean_absolute_percentage_error(test,forecast_test)
mse=np.sqrt(mean_squared_error(test,forecast_test))
print(mae)
print(mape)
print(mse)

In [None]:
mae=mean_absolute_error(test,forecast_test_auto)
mape=mean_absolute_percentage_error(test,forecast_test_auto)
mse=np.sqrt(mean_squared_error(test,forecast_test_auto))
print(mae)
print(mape)
print(mse)

In [None]:
from statsmodels.tsa.statespace.sarimax import SARIMAX

In [None]:
# Assuming you know the order of seasonal and non-seasonal components (p, d, q, P, D, Q, s)
# You can determine these values by analyzing the autocorrelation and partial autocorrelation plots of the data
order = (4,1,0)  # Non-seasonal order
seasonal_order = (4,1,0,12)  # Seasonal order

model = SARIMAX(train, order=order, seasonal_order=seasonal_order)
result = model.fit()

# Make predictions for the forecast horizon
predictions = result.get_forecast(steps=len(test))

# Extract the predicted values and confidence intervals
predicted_passengers = predictions.predicted_mean
confidence_intervals = predictions.conf_int()


In [None]:
# Plot the training data, test data, and predicted sales
plt.figure(figsize=(12, 6))
plt.plot(train.index, train, label='Training Data')
plt.plot(test.index, test, label='Test Data')
plt.plot(test.index, predicted_passengers, label='Predicted passengers', color='green')

# Fill the area between the confidence intervals
plt.fill_between(test.index, confidence_intervals.iloc[:, 0], confidence_intervals.iloc[:, 1], color='purple', alpha=0.3)

plt.xlabel('Month')
plt.ylabel('passengers')
plt.title('passenger data Forecasting using SARIMA')
plt.legend()
plt.show()


In [None]:
mae=mean_absolute_error(test,predicted_passengers)
mape=mean_absolute_percentage_error(test,predicted_passengers)
mse=np.sqrt(mean_squared_error(test,predicted_passengers))
print(mae)
print(mape)
print(mse)