In [4]:
import os
import numpy as np
import pandas as pd
import xgboost as xgb
import matplotlib.pyplot as plt
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV
import plotly as py
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from warnings import simplefilter
import pyEX as p
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=DeprecationWarning)
import talib as t
import warnings
warnings.filterwarnings("ignore")
c = p.Client(api_token=YOUR API KEY)
sym='IEX'
timeframe='1y'
df = c.chartDF(symbol=sym, timeframe=timeframe)
init_notebook_mode(connected=True)

layout=go.Layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(103, 128, 159, .8)')
fig = go.Figure(layout=layout)
fig.update_layout(
    font_color="white",
)
templated_fig = pio.to_templated(fig)
pio.templates['my_template'] = templated_fig.layout.template
pio.templates.default = 'my_template'
macd, macdsignal,macdhist=t.MACD(df['close'].values.astype(float))
df['macd']=macd
df['macdsignal']=macdsignal
df = df.iloc[33:] # Remove inital starting point because of moving averages and MACD line
df = df[:-1]      # Because of shifting close price

df = df.iloc[33:] # Because of moving averages and MACD line
df = df[:-1]      # Because of shifting close price

df.index = range(len(df))

test_size  = 0.15
valid_size = 0.15

test_split_idx  = int(df.shape[0] * (1-test_size))
valid_split_idx = int(df.shape[0] * (1-(valid_size+test_size)))

train_df  = df.loc[:valid_split_idx].copy()
valid_df  = df.loc[valid_split_idx+1:test_split_idx].copy()
test_df   = df.loc[test_split_idx+1:].copy()
#Overview of train, test, and validation sets 
fig = go.Figure()
fig.add_trace(go.Scatter(x=train_df.date, y=train_df.close, name='Training'))
fig.add_trace(go.Scatter(x=valid_df.date, y=valid_df.close, name='Validation'))
fig.add_trace(go.Scatter(x=test_df.date,  y=test_df.close,  name='Test'))
fig.show()


In [5]:
#Predict closing price 
drop_cols = ['date', 'volume', 'open', 'low', 'high', 'symbol', 'id', 'key', 'updated', 'label', 'subkey']

train_df = train_df.drop(drop_cols, 1)
valid_df = valid_df.drop(drop_cols, 1)
test_df  = test_df.drop(drop_cols, 1)

y_train = train_df['close'].copy()
X_train = train_df.drop(['close'], 1)

y_valid = valid_df['close'].copy()
X_valid = valid_df.drop(['close'], 1)

y_test  = test_df['close'].copy()
X_test  = test_df.drop(['close'], 1)

#Setup model parameters 
parameters = {
    'n_estimators': [100, 200, 300, 400],
    'learning_rate': [0.001, 0.005, 0.01, 0.05],
    'max_depth': [8, 10, 12, 15],
    'gamma': [0.001, 0.005, 0.01, 0.02],
    'random_state': [42]
}
eval_set = [(X_train, y_train), (X_valid, y_valid)]
#Initialize model
model = xgb.XGBRegressor(eval_set=eval_set, objective='reg:squarederror', verbose = False)
clf = GridSearchCV(model, parameters)
#Train model using prior year worth of data 
model = xgb.XGBRegressor(eval_set = [(X_train, y_train), (X_valid, y_valid)], objective='reg:squarederror')
model.fit(X_train, y_train, eval_set=eval_set, verbose=False)

Parameters: { eval_set } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1,
             eval_set=[(     changeOverTime  marketChangeOverTime   uOpen  uClose    uHigh      uLow  \
0         -0.031987             -0.031987  195.00  193.38  195.640  192.7600   
1         -0.029033             -0.029033  195.92  193.97  195.920  192.4900   
2         -0.018521             -0.018521  196.90  196.07  197.610  194.1700   
3         -0.011563             -0.011563  196.63  197.46  199.500  195.1...
             gamma=0, gpu_id=-1, importance_type='gain',
             interaction_constraints='', learning_rate=0.300000012,
             max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=100, n_jobs=6,
             num_parallel_tree=1, random_state=0, reg_alpha=0, reg_lambda=1,
             scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosit

In [6]:
#Predict using untrained recent data 
y_pred = model.predict(X_test)
print(f'y_true = {np.array(y_test)[:50]}')
print(f'y_pred = {y_pred[:50]}')

y_true = [146.63 151.52 151.   146.09 143.96 138.51 136.38 133.2  138.11 136.88
 131.18 125.81 119.36 119.99 114.65 116.48 123.36 114.   125.96 115.7
 130.85 123.14 134.87 144.45 135.69 146.24 148.21]
y_pred = [148.76001 151.12624 151.18861 148.80087 148.92197 149.394   148.81812
 148.73984 148.77823 148.9082  149.35857 149.434   149.58478 149.12738
 149.58563 149.0926  149.0226  149.58478 149.0226  149.58478 149.0226
 149.58478 149.02129 149.25014 149.25908 149.39438 149.08868]


In [None]:

predicted_prices = df.loc[test_split_idx+1:].copy()
predicted_prices['Close'] = y_pred

fig = make_subplots(rows=2, cols=1)
fig.add_trace(go.Scatter(x=df.date, y=df.close,
                         name='Truth',
                         marker_color='LightSkyBlue'), row=1, col=1)

fig.add_trace(go.Scatter(x=predicted_prices.date,
                         y=predicted_prices.close,
                         name='Prediction',
                         marker_color='MediumPurple'), row=1, col=1)

fig.add_trace(go.Scatter(x=predicted_prices.date,
                         y=y_test,
                         name='Truth',
                         marker_color='LightSkyBlue',
                         showlegend=False), row=2, col=1)

fig.add_trace(go.Scatter(x=predicted_prices.date,
                         y=y_pred,
                         name='Prediction',
                         marker_color='MediumPurple',
                         showlegend=False), row=2, col=1)

fig.show()