In [3]:
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
from polygon import RESTClient
import json
import time
from sklearn.linear_model import LinearRegression
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [4]:
# Function to get data for a single ticker

def getTickerDailyData(client, ticker="IBM", start="2023-01-01", end="2023-02-01"):
    print(f'Starting data pull for {ticker}...')
    start = time.time()
    data = []
    date_range = pd.date_range(start, end, freq='B')
    for business_day in date_range:
        try:
            response = client.stocks_equities_daily_open_close(symbol=ticker, date=str(business_day)[0:10])
            data.append([pd.to_datetime(response.from_) ,response.open, response.close, response.high, response.low, ticker])
        except:
            continue
    elapsed_time = time.time() - start_time
    print(f'Function run time:{time.strftime("%H:%M:%S", time.gmtime(elapsed_time))}')
    return pd.DataFrame(data, columns=['date', 'open', 'close', 'high', 'low', 'ticker'])
    

In [None]:
# Get data from start_date to end_date for the ticker

client = RESTClient(auth_key="JeeOjlOPBe5aTiQ_Jb39raRqlam45uAO")
ticker = "F"
start_date = "2018-01-01"
end_date = "2023-02-01"

ezpw_data = getTickerDailyData(client, ticker=ticker, start=start_date, end=end_date)

Starting data pull for F...


In [None]:
# Function that takes in two values from the ticker data and plots them

def createPlot(xvalue, yvalue, xlabel, ylabel, title):
    fig, ax = plt.subplots()
    ax.plot(xvalue, yvalue)

    ax.set(xlabel=xlabel, ylabel=ylabel, title=title)
    for tick in ax.get_xticklabels():
        tick.set_rotation(45)
    ax.grid()
    fig.tight_layout()
    plt.show()

In [None]:
createPlot(xvalue=ezpw_data.date.values, yvalue=ezpw_data.open.values, xlabel="date", ylabel="price", title="Open Prices")

 Build Model

Our first model, will be a SA-ARISMA model using only the series itself, predicting one day in advance then incorporating that data into its next daily prediction. Seasonally Adjusted, Autoregressive, Integrated, Simple moving average model. 

In [None]:
ezpw_data.head()

ezpw_data_open = ezpw_data[['date', 'open', 'ticker']]

In [None]:
ezpw_data_open.head()

In [None]:
ezpw_data_open["ar1"] = ezpw_data_open['open'].shift(1)
ezpw_data_open["ar2"] = ezpw_data_open['open'].shift(2)
ezpw_data_open["ar3"] = ezpw_data_open['open'].shift(3)
ezpw_data_open["ar4"] = ezpw_data_open['open'].shift(4)
ezpw_data_open["SMA"] = ezpw_data_open['ar1'].rolling(5).mean()
ezpw_data_open["int"] = ezpw_data_open.ar1 - ezpw_data_open.ar2
ezpw_data_open['intercept'] = np.ones(len(ezpw_data_open))

In [None]:
ezpw_data_open.head()

In [None]:
ezpw_data_open.shape

In [None]:
train_data = ezpw_data_open[5:500]

In [None]:
train_data.head()

In [None]:
reg = LinearRegression()

In [None]:
fit_model = reg.fit(train_data[['ar1','ar2', 'SMA', 'int']], train_data[['open']])

In [None]:
fit_model.coef_

In [None]:
fit_model.intercept_

In [None]:
fit_model.get_params()

In [None]:
train_data['prediction_insample'] = fit_model.predict(train_data[['ar1','ar2', 'SMA', 'int']])

In [None]:
train_data.head()

In [None]:
train_data['error'] = train_data.open - train_data.prediction_insample
train_data['sq_error'] = train_data['error']**2
train_data['abs_error'] = np.abs(train_data['error'])

In [None]:
train_data

In [None]:
train_data[['error', 'sq_error', 'abs_error']].mean()

In [None]:
fig, ax = plt.subplots()
ax.plot(train_data.date.values[:20], train_data.open.values[:20])
ax.plot(train_data.date.values[:20], train_data.prediction_insample.values[:20])

ax.set(xlabel='date', ylabel='price', title="Open Prices")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
ax.grid()
fig.tight_layout()
plt.show()

In [None]:
test_data = ezpw_data_open[500:]

In [None]:
test_data

In [None]:
test_data['prediction_oos'] = fit_model.predict(test_data[['ar1','ar2', 'SMA', 'int']])

In [None]:
test_data.head()

In [None]:
test_data['error'] = test_data.open - test_data.prediction_oos
test_data['sq_error'] = test_data['error']**2
test_data['abs_error'] = np.abs(test_data['error'])

In [None]:
test_data

In [None]:
test_data[['error', 'sq_error', 'abs_error']].mean()

In [None]:
fig, ax = plt.subplots()
ax.plot(test_data.date.values[:20], test_data.open.values[:20])
ax.plot(test_data.date.values[:20], test_data.prediction_oos.values[:20])

ax.set(xlabel='date', ylabel='price', title="Open Prices")
for tick in ax.get_xticklabels():
    tick.set_rotation(45)
ax.grid()
fig.tight_layout()
plt.show()

In [None]:
fit_model2 = reg.fit(test_data[['ar1','ar2', 'SMA', 'int']], test_data[['open']])

In [None]:
test_data['prediction_inSample']= fit_model2.predict(test_data[['ar1','ar2', 'SMA', 'int']])

In [None]:
test_data

In [None]:
test_data['in_sample_error'] = test_data.open - test_data.prediction_inSample
test_data['in_sample_sq_error'] = test_data['in_sample_error']**2
test_data['in_sample_abs_error'] = np.abs(test_data['in_sample_error'])

In [None]:
test_data[['in_sample_error', 'in_sample_sq_error', 'in_sample_abs_error']].mean()