In [6]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import kpss
from statsmodels.tsa.holtwinters import SimpleExpSmoothing, Holt

In [7]:
%%bigquery df_equity
select * from equity_prices.rbs_equity where Date > "2015-01-01" order by Date 

In [8]:
df_equity.shape

(1391, 7)

In [9]:
df_equity.dtypes

Date         object
Open         object
High         object
Low          object
Close        object
Adj_Close    object
Volume       object
dtype: object

In [10]:
df_equity['Date'] = pd.to_datetime(df_equity['Date'], errors='coerce')

In [11]:
df_equity.dtypes

Date         datetime64[ns]
Open                 object
High                 object
Low                  object
Close                object
Adj_Close            object
Volume               object
dtype: object

In [12]:
df_equity = df_equity[df_equity['Date']>= '01-01-2018']
df_equity.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj_Close,Volume
758,2018-01-02,277.200012,279.700012,276.100006,277.299988,241.92572,6870581
759,2018-01-03,277.0,278.600006,275.5,275.5,240.355347,10660425
760,2018-01-04,276.299988,279.399994,275.600006,278.0,242.536407,12082941
761,2018-01-05,278.0,279.299988,276.100006,276.299988,241.053284,9925316
762,2018-01-08,276.5,279.5,275.799988,278.700012,243.147156,12129375


In [13]:
df_equity.shape

(633, 7)

#### First get a complete data set
Note: our equity prices have missing values (weekends, public holidays).  We should fill in these missing values using the values from the previous day then we will have a set of continuous dates/values ("forward filling").  This will also pre-empt warnings from the prediction methods below.

In [14]:
df_equity.set_index('Date', inplace=True, drop=True)
df_equity.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj_Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-01-02,277.200012,279.700012,276.100006,277.299988,241.92572,6870581
2018-01-03,277.0,278.600006,275.5,275.5,240.355347,10660425
2018-01-04,276.299988,279.399994,275.600006,278.0,242.536407,12082941
2018-01-05,278.0,279.299988,276.100006,276.299988,241.053284,9925316
2018-01-08,276.5,279.5,275.799988,278.700012,243.147156,12129375


In [15]:
df_equity = df_equity.resample('D').ffill().reset_index()
df_equity.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj_Close,Volume
0,2018-01-02,277.200012,279.700012,276.100006,277.299988,241.92572,6870581
1,2018-01-03,277.0,278.600006,275.5,275.5,240.355347,10660425
2,2018-01-04,276.299988,279.399994,275.600006,278.0,242.536407,12082941
3,2018-01-05,278.0,279.299988,276.100006,276.299988,241.053284,9925316
4,2018-01-06,278.0,279.299988,276.100006,276.299988,241.053284,9925316
5,2018-01-07,278.0,279.299988,276.100006,276.299988,241.053284,9925316
6,2018-01-08,276.5,279.5,275.799988,278.700012,243.147156,12129375
7,2018-01-09,276.0,281.299988,274.700012,280.5,244.717499,10644412
8,2018-01-10,285.5,293.600006,283.600006,293.399994,255.971893,27692422
9,2018-01-11,293.0,297.5,289.600006,296.799988,258.938171,14965757


Note that the missing dates have been forward filled with the previous day's values.
Now set the index to be the Date because we will want a Series with a DateTime index for methods below.

In [16]:
df_equity.set_index('Date', inplace=True, drop=True)
df_equity.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj_Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-06-28,121.449997,123.5,118.099998,118.5,118.5,17556806
2020-06-29,117.550003,121.800003,116.800003,121.199997,121.199997,15808442
2020-06-30,121.550003,123.0,119.199997,121.599998,121.599998,18530987
2020-07-01,121.800003,122.75,118.300003,119.449997,119.449997,12948349
2020-07-02,121.5,126.230003,120.052002,125.0,125.0,27146092


Get a Series of just the Close values.

In [17]:
equity_data = pd.Series(df_equity['Close'], index= df_equity.index)
equity_data.tail()

Date
2020-06-28    118.500000
2020-06-29    121.199997
2020-06-30    121.599998
2020-07-01    119.449997
2020-07-02    125.000000
Name: Close, dtype: object

In [19]:
equity_data[0] = equity_data[0].astype(float)

AttributeError: 'str' object has no attribute 'astype'

### Simple Exponential Smoothing (SES)

SES has one parameter called level (or smoothing constant or smoothing_level in statsmodels), denoted by alpha in the equations. This is a percentage of the error that you want to fold back into the level. It can be set from 0 to 1, 0 meaning apply no error correction and 1 meaning account for all the error.

In [18]:
ses_model = SimpleExpSmoothing(equity_data)
ses_fit1 = ses_model.fit()
ses_pred1 = ses_fit1.forecast(30)

ValueError: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).

In [None]:
ses_fit1.model.params['smoothing_level']

In [None]:
df_ses = ses_pred1.to_frame(name="vals")
# turn datetime index into a column...
df_ses.reset_index(level=0, inplace=True)
df_ses.columns = ['Date', 'Predicted_Close']
df_ses.head()