In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
%matplotlib inline 
import time 
import datetime 

In [None]:
# Loading dataset
orig_df = pd.read_csv('/mnt/share/datasets/fala/content/delhi_weather_final.csv')
df = orig_df.copy()
df.columns = ['Date' , 'Temperature', 'Pressure' , 'Humidity']
print(df.shape)
df.head(5)

In [None]:
df.describe()

In [None]:
df = df[(df['Humidity'] >= 39) & (df['Humidity'] <= 78)]
df = df[(df['Pressure'] >= 1002.0) & (df['Pressure'] <= 1014.0)]
df = df[(df['Temperature'] >= 19.0) & (df['Temperature'] <= 32.0)]

max_temp = np.max(df['Temperature'])
max_pres = np.max(df['Pressure'])
max_humid = np.max(df['Humidity'])

min_temp = np.min(df['Temperature'])
min_pres = np.min(df['Pressure'])
min_humid = np.min(df['Humidity'])

print(f'Max Temp : {max_temp}')
print(f'Max Pressure : {max_pres}')
print(f'Max Humidity : {max_humid}')

print(f'Min Temp : {min_temp}')
print(f'Min Pressure : {min_pres}')
print(f'Min Humidity : {min_humid}')

In [None]:
df.isnull().sum()

In [None]:
df.shape

In [None]:
df.head()

In [None]:
temp_df = df[df['Date'].str.contains('14:') == True]
temp_df.shape

In [None]:
temp_df.head()

In [None]:
plt.figure(figsize = (16,5))
plt.plot(temp_df['Temperature'])

In [None]:
x = temp_df['Date'].values
y1 = temp_df['Temperature'].values

# Plot
fig, ax = plt.subplots(1, 1, figsize=(16,5), dpi= 120)
plt.fill_between(x, y1=y1, y2=-y1, alpha=0.5, linewidth=2, color='seagreen')
plt.ylim(-32, 32)
plt.title('Temperature variation at 2 pm', fontsize=16)
plt.hlines(y=0, xmin=np.min(temp_df.Date), xmax=np.max(temp_df.Date), linewidth=.5)
plt.show()

In [None]:
temp_df.tail()

In [None]:
idx = pd.date_range('1996-11-01 14:00:00', periods=626 , freq='1h')

temp_df = temp_df.set_index(idx)
temp_df.index.name = 'Timestamp' 
temp_df.head()

In [None]:
temp_df['Temperature'] = temp_df['Temperature'] / 50 

### Decomposing Time series into its components 

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
from dateutil.parser import parse

result_mul = seasonal_decompose(temp_df['Temperature'], model='multiplicative')

# Additive Decomposition
result_add = seasonal_decompose(temp_df['Temperature'], model='additive')


#### Mulplicative Decompose 

In [None]:
plt.rcParams.update({'figure.figsize': (10,10)})
result_mul.plot().suptitle('Multiplicative Decompose', fontsize=22)

#### Additive Decompose 

In [None]:
plt.rcParams.update({'figure.figsize': (10,10)})
result_add.plot().suptitle('Additive Decompose', fontsize=22)

### Detrend 

In [None]:
plt.rcParams.update({'figure.figsize': (9,5)})
from scipy import signal
detrended = signal.detrend(temp_df['Temperature'])
plt.title('Temperature Detrend')
plt.plot(detrended)

# Working For ARIMA 

### Finding the differencing value 

In [None]:
from statsmodels.tsa.stattools import adfuller
from numpy import log
result = adfuller(temp_df['Temperature'].dropna())
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

Since the p value is << 0.05 there for series is stationary and we don't actually need to perform differencing 

### Verifying differencing with graph 

#### no differencing (d = 0)

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plt.rcParams.update({'figure.figsize':(9,7), 'figure.dpi':120})

# Original Series
plt.rcParams.update({'figure.figsize':(6,3), 'figure.dpi':120})
plt.plot(temp_df['Temperature'])
plot_acf(temp_df['Temperature'])
plt.plot

#### first order differencing (d = 1)

In [None]:
plt.rcParams.update({'figure.figsize':(6,3), 'figure.dpi':120})
plt.plot(temp_df['Temperature'].diff()); 
plot_acf(temp_df['Temperature'].diff().dropna())
plt.plot()

#### second order differencing (d = 2)

In [None]:
plt.rcParams.update({'figure.figsize':(6,3), 'figure.dpi':120})
plt.plot(temp_df['Temperature'].diff().diff()); 
plot_acf(temp_df['Temperature'].diff().diff().dropna())

plt.plot()

As we can see, autorelation value quickly becomes negative in the second graph, therefore we fix the value of d  as 0 

### Finding the order of AR term (p)

#### no differencing 

In [None]:

plt.rcParams.update({'figure.figsize':(6,3), 'figure.dpi':120})
plt.plot(temp_df.Temperature)
plot_pacf(temp_df.Temperature.dropna())
plt.plot()

As we can see the first two lines of PACF are significant therefore we keep as 2

#### first order differencing 

In [None]:
# PACF plot of 1st differenced series
plt.rcParams.update({'figure.figsize':(6,3), 'figure.dpi':120})

plt.plot(temp_df.Temperature.diff())
plot_pacf(temp_df.Temperature.diff().dropna())
plt.plot()

### Finding order of MA term 

#### no differencing 

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plt.rcParams.update({'figure.figsize':(6,3), 'figure.dpi':120})

plt.plot(temp_df.Temperature)
plot_acf(temp_df.Temperature.dropna())

plt.show()

#### first order differncing 

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plt.rcParams.update({'figure.figsize':(6,3), 'figure.dpi':120})

plt.plot(temp_df.Temperature.diff())
plot_acf(temp_df.Temperature.diff().dropna())

plt.show()

 we fix the value of q as 2

In [None]:
from statsmodels.tsa.arima_model import ARIMA

# ARIMA Model  (p,d,q)
model = ARIMA(temp_df.Temperature, order=(2,0,2))
model_fit = model.fit(disp=0)
print(model_fit.summary())

In [None]:
# Plot residual errors
residuals = pd.DataFrame(model_fit.resid)
fig, ax = plt.subplots(1,2)
residuals.plot(title="Residuals", ax=ax[0])
residuals.plot(kind='kde', title='Density', ax=ax[1])
plt.show()

In [None]:
# Actual vs Fitted
plt.rcParams.update({'figure.figsize':(12,3), 'figure.dpi':120})
model_fit.plot_predict(dynamic=False)
plt.show()