In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

sns.set_style("darkgrid")

In [None]:
df=pd.read_csv("perrin-freres-monthly-champagne-.csv")

In [None]:
df.columns=["Month", "Sales"]
df.head()

In [None]:
df.tail()

In [None]:
df=df.iloc[:-2,:]
df.tail()

In [None]:
df.info()

In [None]:
df["Month"]=pd.to_datetime(df["Month"])

In [None]:
df.head()

In [None]:
df.info()

In [None]:
#df=df.set_index("Month")

In [None]:
#df.head()

In [None]:
df.describe()

In [None]:
df[df.isna().any(axis=1)]

In [None]:
plt.figure(figsize=(8,4))
sns.lineplot(x="Month", y="Sales", data=df)

In [None]:
from statsmodels.tsa.stattools import adfuller

In [None]:
adfuller(df["Sales"])

In [None]:
#Ho: It is non stationary
#H1: It is stationary

def adfuller_test(series):
    results=adfuller(series)
    labels=["ADF test stats", "p-value", "# Lags Used", "# Observation used"]
    for result, label in zip(results, labels):
        print(f"{result} : {label}")
        
    if(results[1]<=0.05):
        print("Null Hypothesis rejected. Data is Stationary")
    else:
        print("Null Hypothesis stands. Data is non-stationary")

In [None]:
adfuller_test(df["Sales"])

In [None]:
df.head()

In [None]:
df["Seasonal First Diff"]=df["Sales"]-df["Sales"].shift(12)

In [None]:
df.head(14)

In [None]:
plt.figure(figsize=(10,6))
sns.lineplot(x="Month", y="Seasonal First Diff", data=df)

In [None]:
adfuller_test(df["Seasonal First Diff"].dropna())

In [None]:
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf

In [None]:
fig=plt.figure(figsize=(10,6))
ax1=fig.add_subplot(211)
fig=plot_acf(df["Seasonal First Diff"].dropna(), lags=40, ax=ax1)
ax2=fig.add_subplot(212)
fig=plot_pacf(df["Seasonal First Diff"].dropna(), lags=40, ax=ax2)

p=1, q=0 or 1

We did seasonal differencing once, so d=1

In [None]:
from statsmodels.tsa.arima_model import ARIMA

In [None]:
model=ARIMA(df["Sales"], order=(1,1,1))
model_fit=model.fit()

In [None]:
df["Forecast"]=model_fit.predict(start=90, end=103, dynamic=True)

In [None]:
plt.figure(figsize=(10,6))
sns.lineplot(x="Month", y="Sales", data=df)
sns.lineplot(x="Month", y="Forecast", data=df)

In [None]:
import statsmodels.api as sm

In [None]:
model=sm.tsa.statespace.SARIMAX(df['Sales'],order=(1, 1, 1),seasonal_order=(1,1,1,12))
results=model.fit()

In [None]:
df["Forecast"]=results.predict(start=90,end=103,dynamic=True)

In [None]:
plt.figure(figsize=(10,6))
sns.lineplot(x="Month", y="Sales", data=df)
sns.lineplot(x="Month", y="Forecast", data=df)

In [None]:
from pandas.tseries.offsets import DateOffset

In [None]:
future_dates=[df.iloc[-1,0]+ DateOffset(months=x) for x in range(1,24)]

In [None]:
future_dates

In [None]:
future_dates_df=pd.DataFrame(future_dates)
future_dates_df.columns=["Month"]
future_dates_df["Sales"]=np.nan
future_dates_df.head()

In [None]:
future_df=pd.concat([df,future_dates_df])
future_df=future_df.reset_index(drop=True)

In [None]:
future_df.head()

In [None]:
future_df.tail()

In [None]:
len(df)

In [None]:
len(future_df)

In [None]:
future_df.iloc[[103,104,105,106],:]

In [None]:
future_df["Sales"]=results.predict(start=104, end=129, dynamic=True)

In [None]:
future_df.iloc[[103,104,105,106],:]

In [None]:
future_df.tail()

In [None]:
forecast_df=future_df.iloc[104:129,:]

In [None]:
plt.figure(figsize=(10,6))
sns.lineplot(x="Month", y="Sales", data=df)
sns.lineplot(x="Month", y="Sales", data=forecast_df, color="red")