In [3]:
import numpy as np
import pandas as pd

In [4]:
import cufflinks as cf
cf.set_config_file(offline=True, world_readable=True, theme='ggplot')
from plotly.offline import init_notebook_mode, iplot
from plotly import graph_objs as go

# Initialize plotly
init_notebook_mode(connected=True)

def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-latest.min.js?noext',
            },
          });
        </script>
        '''))
import IPython

IPython.get_ipython().events.register('pre_run_cell', configure_plotly_browser_state)

In [50]:
df = pd.read_csv("data/AirPassengers.csv")
df['Month'] = pd.to_datetime(df["Month"], infer_datetime_format=True)
df = df.set_index(['Month'])
df.head()


Unnamed: 0_level_0,#Passengers
Month,Unnamed: 1_level_1
1949-01-01,112
1949-02-01,118
1949-03-01,132
1949-04-01,129
1949-05-01,121


In [6]:
df.iplot(xTitle="Date", yTitle="Passengers")

## Test if stationary

In [18]:
rollmean = df.rolling(window=12).mean()
rollstd = df.rolling(window=12).std()


In [19]:
from plotly import graph_objs as go

fig = go.Figure()
fig.add_trace(go.Scatter(
    x=pd.to_datetime(rollmean.index.values),
    y=rollmean['#Passengers'],
    name='mean',mode='lines'))

fig.add_trace(go.Scatter(
    x=pd.to_datetime(rollmean.index.values),
    y=rollstd['#Passengers'],
    name='std',mode='lines'))


fig.add_trace(go.Scatter(
    x=pd.to_datetime(df.index.values),
    y=df['#Passengers'],
    name='origin',mode='lines'))



fig.show()

### ADF

In [9]:
from statsmodels.tsa.stattools import adfuller

def adf_test (df):
    dftest = adfuller(df, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used', 'Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print(dfoutput)
    
adf_test(df["#Passengers"])

Test Statistic                   0.815369
p-value                          0.991880
#Lags Used                      13.000000
Number of Observations Used    130.000000
Critical Value (1%)             -3.481682
Critical Value (5%)             -2.884042
Critical Value (10%)            -2.578770
dtype: float64


### KPSS

In [12]:
from statsmodels.tsa.stattools import kpss

def kpss_test (df):
    kpsstest = kpss(df, lags='auto', regression='c')
    kpss_output = pd.Series(kpsstest[0:3], index =['Test Statistic', 'p-value','Lags Used'])
    for key, value in kpsstest[3].items():
        kpss_output['Critical Value (%s)'%key] = value
    print(kpss_output)
    
kpss_test(df['#Passengers'])
        

Test Statistic           1.651312
p-value                  0.010000
Lags Used                8.000000
Critical Value (10%)     0.347000
Critical Value (5%)      0.463000
Critical Value (2.5%)    0.574000
Critical Value (1%)      0.739000
dtype: float64



p-value is smaller than the indicated p-value



## From non to stationary

### Differencing

In [24]:
df['#Passengers_diff'] = df['#Passengers'] - df['#Passengers'].shift(7)
df['#Passengers_diff'].dropna().iplot()

### log + diff

In [36]:
n=1
df['#Passengers_log'] = np.log(df['#Passengers'])
df['#Passengers_log_diff'] = df['#Passengers_log'] - df['#Passengers_log'].shift(n)
df['#Passengers_log'].dropna().iplot(title='log')
df['#Passengers_log_diff'].dropna().iplot(title='log + diff')

###  root^2 + diff

In [67]:
n=1
df['#Passengers_root'] = np.sqrt(df['#Passengers'])
df['#Passengers_root_diff'] = df['#Passengers_root'] - df['#Passengers_root'].shift(n)
df['#Passengers_root'].dropna().iplot(title='root')
df['#Passengers_root_diff'].dropna().iplot(title='root + diff')

### power + diff

In [55]:
n=1
df['#Passengers_pow'] = np.power(df['#Passengers'],10)
df['#Passengers_pow_diff'] = df['#Passengers_pow'] - df['#Passengers_pow'].shift(n)
df['#Passengers_pow'].dropna().iplot(title='root')
df['#Passengers_pow_diff'].dropna().iplot(title='root + diff')

### Box - Cox

In [71]:
from scipy.stats import boxcox
n=1
df['#Passengers_box'], lam = boxcox(df['#Passengers'])
df['#Passengers_box_diff'] = df['#Passengers_box'] - df['#Passengers_box'].shift(n)
print('Lambda: {}'.format(lam))
df['#Passengers_box'].dropna().iplot(title='box')
df['#Passengers_box_diff'].dropna().iplot(title='box + diff')

Lambda: 0.14802265137037945
