In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt 
%matplotlib inline
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 6

In [11]:
FILE_TO_READ = './data/AirPassengers.csv'

data = pd.read_csv(FILE_TO_READ)
print(data.head())
print(data.dtypes)

     Month  #Passengers
0  1949-01          112
1  1949-02          118
2  1949-03          132
3  1949-04          129
4  1949-05          121
Month          object
#Passengers     int64
dtype: object


In [15]:
dateparse = lambda dates: pd.datetime.strptime(dates, '%Y-%m')

data = pd.read_csv(FILE_TO_READ, parse_dates=True, index_col='Month', date_parser=dateparse)


print(data.head())

            #Passengers
Month                  
1949-01-01          112
1949-02-01          118
1949-03-01          132
1949-04-01          129
1949-05-01          121


In [16]:
data.index

DatetimeIndex(['1949-01-01', '1949-02-01', '1949-03-01', '1949-04-01',
               '1949-05-01', '1949-06-01', '1949-07-01', '1949-08-01',
               '1949-09-01', '1949-10-01',
               ...
               '1960-03-01', '1960-04-01', '1960-05-01', '1960-06-01',
               '1960-07-01', '1960-08-01', '1960-09-01', '1960-10-01',
               '1960-11-01', '1960-12-01'],
              dtype='datetime64[ns]', name='Month', length=144, freq=None)

In [17]:
ts = data['#Passengers']
ts.head(10)

Month
1949-01-01    112
1949-02-01    118
1949-03-01    132
1949-04-01    129
1949-05-01    121
1949-06-01    135
1949-07-01    148
1949-08-01    148
1949-09-01    136
1949-10-01    119
Name: #Passengers, dtype: int64

In [18]:
#1. Specific the index as a string constant:

ts['1949-01-01']


112

In [19]:
#2. Import the datetime library and use 'datetime' function:
from datetime import datetime
ts[datetime(1949, 1, 1)]

112

In [22]:
ts[: '1949-05-01']

Month
1949-01-01    112
1949-02-01    118
1949-03-01    132
1949-04-01    129
1949-05-01    121
Name: #Passengers, dtype: int64

In [23]:
ts['1949']

Month
1949-01-01    112
1949-02-01    118
1949-03-01    132
1949-04-01    129
1949-05-01    121
1949-06-01    135
1949-07-01    148
1949-08-01    148
1949-09-01    136
1949-10-01    119
1949-11-01    104
1949-12-01    118
Name: #Passengers, dtype: int64

In [24]:
# A TS is said to be stationary if its statistical properties such as mean, variance remain constant over time. 
# But why is it important? 
# Most of the TS models work on the assumption that the TS is stationary. 
# Intuitively, we can sat that if a TS has a particular behaviour over time, 
# there is a very high probability that it will follow the same in the future. 
# Also, the theories related to stationary series are more mature and easier to implement as compared to non-stationary series.

In [25]:
# Definition of Stationarity 
#  1. constant mean
#  2. constant variance
#  3. an autocovariance that does not depend on time.

In [26]:
# Then How to check Stationarity of time series?

# 1. Plotting Rolling Statistics: We can plot the moving average or moving variance and see if it varies with time. 
#    By moving average/variance I mean that at any instant ‘t’, we’ll take the average/variance of the last year, 
#    i.e. last 12 months. But again this is more of a visual technique.

# 2. Dickey-Fuller Test: This is one of the statistical tests for checking stationarity. 
#    Here the null hypothesis is that the TS is non-stationary. 
#    The test results comprise of a Test Statistic and some Critical Values for difference confidence levels. 
#    If the ‘Test Statistic’ is less than the ‘Critical Value’, 
#    we can reject the null hypothesis and say that the series is stationary

