## 4.1 - Time Series Analysis

#### Get the data

Data from: https://datamarket.com/data/set/22u3/international-airline-passengers-monthly-totals-in-thousands-jan-49-dec-60 

Ref: Time Series Analysis: Forecasting and Control (1970) Box, Jenkins and Reinsel.

In [None]:
import pandas as pd

fname = 'AirPassengers.csv'

data = pd.read_csv(fname)

data.head()

In [None]:
data.isnull().sum()

In [None]:
data.dtypes

In [None]:
data['Month'] = pd.to_datetime(data['Month'])
data.head()

In [None]:
data.dtypes

In [None]:
data['Month'].dt.year.head()

In [None]:
data = data.set_index('Month')
data.head()

In [None]:
%matplotlib inline

data.plot(grid='on')

In [None]:
from datetime import datetime

start_date = datetime(1959, 1, 1)
end_date = datetime(1960, 12, 1)
data[(start_date <= data.index) & (data.index <= end_date)].plot(grid='on')

#### Time Series Decomposition

Additive model

Y(t) = Trend(t) + Seasonality(t) + Residual(t)

In [None]:
import statsmodels.api as sm

decomposition = sm.tsa.seasonal_decompose(data, model='additive')
fig = decomposition.plot()

# decomposition.plot()  # if using outside notebook

In [None]:
import matplotlib

matplotlib.rcParams['figure.figsize'] = [12.0, 8.0]  # double up default plot size

In [None]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

fig, ax = plt.subplots()
ax.grid(True)

year = mdates.YearLocator(month=1)
month = mdates.MonthLocator(interval=3)
year_format = mdates.DateFormatter("%Y")
month_format = mdates.DateFormatter("%m")

ax.xaxis.set_minor_locator(month)

ax.xaxis.grid(True, which='minor')
ax.xaxis.set_major_locator(year)
ax.xaxis.set_major_formatter(year_format)

plt.plot(data.index, data['AirPassengers'], c='blue')
plt.plot(decomposition.trend.index, decomposition.trend, c='red')