# Introduction to Statsmodels

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy
%matplotlib inline

In [None]:
from pandas_datareader import data as web  

ticker = 'TSLA'  
data = pd.DataFrame()
data[ticker] = web.DataReader(ticker, data_source='yahoo', start='2017-1-1')['Adj Close']

In [None]:
data.head()

In [None]:
# You can safely ignore the warning:
#  Please use the pandas.tseries module instead. from pandas.core import datetools
import statsmodels.api as sm

## Using Statsmodels to get the trend
The Hodrick-Prescott filter separates a time-series  y_t  into a trend  τ_t and a cyclical component  ζt

$y_t = \tau_t + \zeta_t$

The components are determined by minimizing the following quadratic loss function

$\min_{\\{ \tau_{t}\\} }\sum_{t}^{T}\zeta_{t}^{2}+\lambda\sum_{t=1}^{T}\left[\left(\tau_{t}-\tau_{t-1}\right)-\left(\tau_{t-1}-\tau_{t-2}\right)\right]^{2}$

In [None]:
# gdp_cycle, gdp_trend = sm.tsa.filters.hpfilter(df.realgdp)
# df[['trend','realgdp']]["2000-03-31":].plot(figsize=(12,8))

In [None]:
data['TSLA'].plot(figsize=(12,6))
plt.ylabel("REAL GDP")

In [None]:
cycle, trend = sm.tsa.filters.hpfilter(data.TSLA)

In [None]:
plt.figure(figsize=(12,6))
plt.plot(cycle)

In [None]:
plt.figure(figsize=(12,6))
plt.plot(trend,label = "Trend")
plt.plot(data.TSLA , label = "TSLA")
plt.plot(cycle +200, label = "Cycle")
plt.legend()
plt.ylabel("Price")
plt.xlabel("Date")
plt.title("Note that cycle is offset by 200")

## from statsmodels.tsa.seasonal import seasonal_decompose

In [None]:
# from pandas import datetime
# ts = web.DataReader('TSLA',  'yahoo', datetime(2018,1,1), datetime(2019,12,30))["Adj Close"]

from statsmodels.tsa.seasonal import seasonal_decompose
tsla = data.TSLA

#tsla = pd.to_datetime(data.TSLA.values, unit='d')

result = seasonal_decompose(tsla, model='multiplicative',freq=252);

In [None]:
_ = result.plot()

In [None]:
tmp = result.observed.values
observed = tmp / tmp.max()

tmp = result.trend.values
trend = tmp / tmp.max()
# observed

In [None]:
# result.trend
# result.seasonal
# result.resid


df = pd.DataFrame({
    "seasonal":result.seasonal.values,
    "resid":result.resid,
    "observed":observed
    ,"trend":result.trend
})
df.plot(title="tsla",figsize=(15,12),subplots=(4,1))

In [None]:
plt.figure(figsize=(12,6))
plt.plot(df.seasonal)
plt.plot(df.observed,color='green')# ,label=""

plt.ylabel("Price")
plt.xlabel("Date")
plt.legend()
plt.title("seasonal & observed")

In [None]:
plt.figure(figsize=(12,6))
plt.plot(trend,label = "Trend")
plt.plot(data.TSLA , label = "TSLA")
plt.plot(cycle +200, label = "Cycle")
plt.legend()
plt.ylabel("Price")
plt.xlabel("Date")
plt.title("Note that cycle is offset by 200")

In [None]:
corr,pVal = scipy.stats.pearsonr(df.seasonal,df.observed)

print("corr is: ",corr)
if (pVal <  0.05):
   print("P-value is significant! There is a correlation, ", pVal)
   print("Given our significance level of α = 0.05, There is sufficient evidence to conclude that there is a significant linear relationship between x and y because the correlation coefficient is significantly different from 0.")