# Time Series Descriptive and Classical Methods

# Concept Session

## Demo - 2.1: Time Series Components

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.api import SimpleExpSmoothing
from statsmodels.tsa.api import ExponentialSmoothing

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
help(seasonal_decompose)

Help on function seasonal_decompose in module statsmodels.tsa.seasonal:

seasonal_decompose(x, model='additive', filt=None, period=None, two_sided=True, extrapolate_trend=0)
    Seasonal decomposition using moving averages.
    
    Parameters
    ----------
    x : array_like
        Time series. If 2d, individual series are in columns. x must contain 2
        complete cycles.
    model : {"additive", "multiplicative"}, optional
        Type of seasonal component. Abbreviations are accepted.
    filt : array_like, optional
        The filter coefficients for filtering out the seasonal component.
        The concrete moving average method used in filtering is determined by
        two_sided.
    period : int, optional
        Period of the series. Must be used if x is not a pandas object or if
        the index of x does not have  a frequency. Overrides default
        periodicity of x if x is a pandas object with a timeseries index.
    two_sided : bool, optional
        The moving a

In [4]:
# read the datasets
mrf = pd.read_csv("DS3_C1_S2_MRF_Data_Concept.csv", parse_dates=['Month'])


FileNotFoundError: [Errno 2] No such file or directory: 'DS3_C1_S2_MRF_Data_Concept.csv'

In [None]:
mrf.head()

In [None]:
# add a new date colum 
d1=pd.date_range(start="1/1/2006", end="5/31/2016",freq="M")
mrf['tdate']=d1

In [None]:
# set time data as index
mrf=mrf.set_index('tdate')
print(mrf)

In [None]:
closeprice = mrf[['Close Price']]
print(closeprice.head())

In [None]:
# different components of the Time series data
# period = 12 since the data is monthly

#decomposition=seasonal_decompose(closeprice, period=12)
decomposition=seasonal_decompose(np.asarray(closeprice), freq=12)
d_trend=decomposition.trend
d_seasonal=decomposition.seasonal
d_residual=decomposition.resid

# plotting all the 4 graphs
fig,ax = plt.subplots(4,1,figsize=(20,12))

plt.subplot(411)
plt.plot(closeprice,label='MRF Stock Data - as it is')
plt.legend(loc='best')

plt.subplot(412)
plt.plot(d_trend,label='Trend')
plt.legend(loc='best')

plt.subplot(413)
plt.plot(d_seasonal,label='Seasonal')
plt.legend(loc='best')

plt.subplot(414)
plt.plot(d_residual,label='Residual')
plt.legend(loc='best')

plt.tight_layout()

In the above figure we can see the original series with different time series component. We can trend component in which there is a upward trend. Seasonal component shows the seasonal pattern in the graph. Residual shows the irregular component. 

## Demo - 2.2: Moving Average

In [None]:
lags = 4
moving_avg = closeprice.rolling(window=lags).mean()

In [None]:
df_ts = pd.DataFrame({'actual':closeprice['Close Price'], 'ma':moving_avg['Close Price']})
print(df_ts)

In [None]:
df_ts.actual.head(lags).mean()

In [None]:
# plot the actual and moving average for comparison
plt.plot(df_ts.actual, color='blue',label = 'Actual')
plt.plot(df_ts.ma, color='red', label = 'Moving Average')
plt.title('Actual vs Moving Average for lags = ' + str(lags))
plt.legend()
plt.show()

Moving average is to help smooth the data. In this Graph we can see that by applying moving average, randomness and short term fluctuation in the closing price has been mitigated over the period of time.

In [None]:
help(SimpleExpSmoothing)

## Demo - 2.3: Exponential Smoothing

Let's consider the 4 plots from Demo - 2.1

Compare the original data range and seasonality data range, we find data range in seasonality is very minimal and that can be ignored.

Hence we can say there is no seasonality in this MRF dataset.

Seasonality components must be checked with respect to the base of the original and not just on the shape.

### Simple Exponent Smoothing (no trend/seasonality)

In [None]:
# Simple Exponent - when there is no trend/seasonality
def simplesmoothing(data,alpha):
    model = SimpleExpSmoothing(data).fit(smoothing_level=alpha,optimized=False)
    pdct = model.fittedvalues
    df = pd.DataFrame({'actual':data, 'forecast':pdct})
    return(model,df)

In [None]:
data = closeprice['Close Price']

In [None]:
# create forecast for different values of alpha. Plot and understand the charts

# dictionary to store the forecast for each alpha
d_simple_exp = {}
alphas = [0, 0.2, 0.6, 0.8]

for a in alphas:
    model, df = simplesmoothing(data,a)
    d_simple_exp[a] = df.forecast

In [None]:
colors = ['yellow','blue','green','orange']


# plot the actual and forecast data
fig,ax = plt.subplots(1,1,figsize=(20,12))
plt.plot(data, marker='o', color='black',label='Actual Close Price')

for i in range(len(alphas)):
    plt.plot(d_simple_exp[alphas[i]],marker='.',color=colors[i],label='Simple Smoothening : alpha=' + str(alphas[i]))

plt.title('Simple Exponent Smoothing')
plt.legend()
plt.show()

In the above time series plot using simple exponent smoothing we can observe that the higher the value of alpha has a more fitted line tahn the other line with respect to actual close price line. This line adjust to the changing condition more smoothly.  

### Double Exponent Smoothing (trend, but no seasonality)

In [None]:
# Double Exponent - when there is a trend, no seasonality (Holts' method) 
# parameters: alpha, beta

model = ExponentialSmoothing(data,trend='additive').fit(smoothing_level=0.2, smoothing_slope=0.5)

# forecast with double exponent smoothing
f_dexp = model.fittedvalues
print(f_dexp)

In [None]:
# plot the actual and smooth data
plt.plot(data,marker='o',color='black',label='Actual Close Price')
plt.plot(f_dexp, marker='.',color='yellow',label='Double Exp Smoothing [0.2, 0.5]')
plt.legend()
plt.title('Double Exponent Smoothing')

In the above Double exponential Smoothing plot we can see that time series data has a trend at the estimate level(.5) and trend (.2) smoothing constatnt from the data to optimize the fitness of line. 

### Triple Exponent Smoothing (trend and seasonality)

In [None]:
# read the data
path="DS3_C1_S2_Elecprod_Data_Concept.csv"
elec_data = pd.read_csv(path,index_col='DATE',parse_dates=True)

In [None]:
elec_data.head()
print(elec_data)

In [None]:
# plot the data
plt.plot(elec_data)
#plt.plot(data[0:60])

In [None]:
# build model
model = ExponentialSmoothing(elec_data.Value,trend='add',seasonal='add',seasonal_periods=12).fit()
print(model)

In [None]:
# plot the actual and triple smoothing values
ax = plt.subplots(figsize = (20,10))
plt.plot(elec_data.Value, marker='o',color='black',label='Actual Elec Prod')
plt.plot(model.fittedvalues,marker='.',color='red',label='Triple Exp Smoothing')
plt.legend()

plt.title('Triple Exponent Smoothing')

In the above plot we can see the yearly seasonality with trend over the years. In this plot we can see the upward trend and yearly seasonality. 

In [None]:
# forecast
model.forecast(10)

## Demo - 2.4: Time Series - Stationarity Check

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# dickey fuller test for stationarity check
from statsmodels.tsa.stattools import adfuller

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# read the file
path="DS3_C1_S2_MRF_Data_Concept.csv"
stock = pd.read_csv(path)
# take the feature to do forecast
mystock = stock[['Close Price']]
print(mystock)

In [None]:
# ADF test to determine data stationarity
def checkStationarity(data):
    pvalue = adfuller(data['Close Price'])[1]
    print(pvalue)

    if pvalue < 0.05:
        msg = "pvalue={}. Data is Stationary. Proceed to model building".format(pvalue)
    else:
        msg = "pvalue={}. Data is not Stationary. Make the data stationary before model building".format(pvalue)

    return msg

In [None]:
# function call
print(checkStationarity(mystock))

In [None]:
# difference the data by 1 (default)
diff_mystock = mystock - mystock.shift(2)
print(diff_mystock)

In [None]:
# remove the Nulls from the differenced data
print('Before removing Nulls,total records = ', len(diff_mystock))
diff_mystock.dropna(inplace = True)
print('After removing Nulls, total records = ', len(diff_mystock))
print(diff_mystock)

In [None]:
# check if the differenced data is stationary
checkStationarity(diff_mystock)

In [None]:
# plot the Actual data and Differenced data
ax = plt.subplots(figsize = (10,10))
plt.subplot(121)

plt.plot(mystock,color='red')
plt.title('Actual Data')

plt.subplot(122)
plt.plot(diff_mystock,color='blue')
plt.title('Differenced Data')

plt.title("Actual vs Differenced data")

In the above figure the Actual plot shows the trend in the data which makes the data Not stationary that means we can not proceed with the time series model. But after doing differencing we do not find the trend component in the plot which make the data stationary also indicate that we can now proceed with the time series model.