In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima_model import ARIMA
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

df = pd.read_csv('./london_weather.csv')
df.drop(columns = ['snow_depth'], inplace = True)
df.dropna(inplace= True)
df.reset_index(drop=True, inplace=True)

#get mean monthly temp
df['date'] = pd.to_datetime(df['date'],format='%Y%m%d')
df['month'] = pd.DatetimeIndex(df['date']).month
df['year'] = pd.DatetimeIndex(df['date']).year
df['month_year'] = pd.to_datetime(df['date']).dt.to_period('M')
df=df.groupby('month_year').mean()

df

In [None]:
#find d

from statsmodels.tsa.stattools import adfuller
from numpy import log
result = adfuller(df.mean_temp.dropna())
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

#if p<0.05, stationary, no differencing required, 
# d=0


# testing 2

In [None]:
# Split data into train / test sets
n=int(len(df)*0.2)
train = df.iloc[:len(df)-n]
test = df.iloc[len(df)-n:]
test

In [None]:
# Import the library
from pmdarima import auto_arima
  
# Ignore harmless warnings
import warnings
warnings.filterwarnings("ignore")

  
# Fit auto_arima function to AirPassengers dataset
stepwise_fit = auto_arima(y=train['mean_temp'], 
                          xreg=train[['min_temp']],
                          start_p = 1, start_q = 1,
                          max_p = 3, max_q = 3, m = 12,
                          start_P = 0, seasonal = True,
                          d = None, D = 0, trace = True,
                          error_action ='ignore',   # we don't want to know if an order does not work
                          suppress_warnings = True,  # we don't want convergence warnings
                          stepwise = True)           # set to stepwise
  
# To print the summary
stepwise_fit.summary()

In [None]:
#build up ARIMA 

from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
import statsmodels.api as sm


model = sm.tsa.VARMAX(
                endog = train[['mean_temp', 'min_temp']],
                order = (1, 1), 
                seasonal_order =(1, 0, 2, 12))
  
result = model.fit(maxiter=2000, disp=False)
result.summary()

# prediction 5 year

In [None]:
#Predictions of ARIMA Model against the test set
start = len(train)
end = len(df)+60

# Predictions for one-year against the test set
predictions = result.predict(start, end, dynamic=True
                             )

# plot predictions and actual values
predictions['mean_temp'].iloc[:len(test)].plot(legend = True)
test['mean_temp'].plot(legend = True)




In [None]:
#Evaluate the model using MSE and RMSE
# Load specific evaluation tools
from sklearn.metrics import mean_squared_error
from statsmodels.tools.eval_measures import rmse
  
# Calculate root mean squared error
RMSE=rmse(test["mean_temp"], predictions["mean_temp"].iloc[:len(test)])
  
# Calculate mean squared error
ERROR=mean_squared_error(test["mean_temp"], predictions["mean_temp"].iloc[:len(test)])

In [None]:
ERROR

In [None]:
RMSE

# forecasting 5 years

In [None]:
df['mean_temp'].plot(legend = True)
predictions['mean_temp'].iloc[-60:].plot(legend = True)