In [40]:
# General packages
import pandas as pd
import numpy as np
import hvplot.pandas
import datetime as dt

# Packages related to machine learning
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from dateutil.relativedelta import relativedelta
    #for nueral networs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# needed for API
import pandas_datareader as pdr
import yfinance as yfin
yfin.pdr_override()
from dotenv import load_dotenv
import os
import json
import requests

# Sentiment Score
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from urllib.request import urlopen
from urllib.request import Request
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

#turn off warning signs for cleaner code
from warnings import filterwarnings
filterwarnings("ignore")

#import tools
from functions.vix_mod import vix_analysis
from functions.spy_mod import spy_analysis
from functions.econ_mod import get_econ_data
from functions.sent_mod import market_sent

ModuleNotFoundError: No module named 'tensorflow.keras.layers.core'

In [102]:
def create_variable_tables ():
    spy_df = spy_analysis()
    econ_df = get_econ_data()
    vix_df, cluster_model = vix_analysis()
    sentiment_df = market_sent()

    # Set Up DataFrame for Testing
    X_prep = pd.concat([vix_df, spy_df], axis=1)
    X_prep['y']=X_prep['spy_change'].shift(-1)
    X_prep = X_prep.dropna()
    X_prep = pd.concat([X_prep, econ_df, sentiment_df], axis=1)
    X_prep = X_prep.dropna(subset='spy_close')
    X_prep[np.isnan(X_prep)] = 0

    X_full = X = X_prep.drop(columns=['spy_close','y','high','low'])
    y_full = y = X_prep['y']

    # Create Variable Tables
    X_0 = X_prep[X_prep['labels']==0].drop(columns=['spy_close','vix_close','y'])
    X_1 = X_prep[X_prep['labels']==1].drop(columns=['spy_close','vix_close','y'])
    X_2 = X_prep[X_prep['labels']==2].drop(columns=['spy_close','vix_close','y'])

    y_0 = X_prep[X_prep['labels']==0]['y']
    y_1 = X_prep[X_prep['labels']==1]['y']
    y_2 = X_prep[X_prep['labels']==2]['y']
    
    return X_full, X_0, X_1 , X_2, y_full, y_0, y_1, y_2

X_full, X_0, X_1 , X_2, y_full, y_0, y_1, y_2 = create_variable_tables()
X_full.head()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0,vix_close,vix_change,labels,vix_days_in_label,vix_con_direction,volume,spy_change,volume_change,spy_con_direction,3_day_change,...,MORTGAGE30US,PRIME,MICH,TOTALSA,UMCSENT,HOUST,RECPROUSM156N,REAINTRATREARAT1YE,REAINTRATREARAT10Y,Sentiment
2003-09-08,18.26,0.004953,0.0,21.0,1.0,32632800.0,0.008265,0.031466,1.0,0.003095,...,0.018987,-0.058824,0.12,-0.053444,-0.017917,0.057829,0.02,-0.417762,0.083081,0.0
2003-09-09,18.85,0.032311,0.0,22.0,2.0,35053200.0,-0.006558,0.074171,-1.0,-0.003965,...,0.018987,-0.058824,0.12,-0.053444,-0.017917,0.057829,0.02,-0.417762,0.083081,0.0
2003-09-10,20.01,0.061538,2.0,1.0,3.0,45904900.0,-0.010097,0.309578,-2.0,-0.008461,...,0.018987,-0.058824,0.12,-0.053444,-0.017917,0.057829,0.02,-0.417762,0.083081,0.0
2003-09-11,19.25,-0.037981,0.0,1.0,-1.0,38396300.0,0.002942,-0.163569,1.0,-0.013695,...,0.018987,-0.058824,0.12,-0.053444,-0.017917,0.057829,0.02,-0.417762,0.083081,0.0
2003-09-12,18.68,-0.02961,0.0,2.0,-2.0,42524800.0,0.001858,0.107523,2.0,-0.00534,...,-0.043478,-0.058824,0.12,-0.053444,-0.017917,0.057829,0.02,-0.417762,0.083081,0.0


In [95]:
def create_train_test ():
    # Define train periods
    X_full, X_0, X_1 , X_2, y_full, y_0, y_1, y_2 = create_variable_tables()
    start_train = X_full.index.min()
    last_day = dt.datetime.strptime('2021-01-01', '%Y-%m-%d').date()
    end_train = last_day

    # Define test period
    start_test = last_day
    end_test = X_full.index.max()


    #Create train Data Frames
    X_full_train = X_full.loc[start_train: end_train]
    y_full_train = y_full.loc[start_train: end_train]

    # Create test DataFrames
    X_full_test = X_full.loc[start_test: end_test]
    y_full_test = y_full.loc[start_test: end_test]
    
    return X_full_train, y_full_train, X_full_test, y_full_test

X_full_train, y_full_train, X_full_test, y_full_test = create_train_test()
display(X_full_test.head())
display(y_full_test.head())

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Unnamed: 0,vix_close,vix_change,labels,vix_days_in_label,vix_con_direction,volume,spy_change,volume_change,spy_con_direction,3_day_change,...,MORTGAGE30US,PRIME,MICH,TOTALSA,UMCSENT,HOUST,RECPROUSM156N,REAINTRATREARAT1YE,REAINTRATREARAT10Y,Sentiment
2021-01-04,26.969999,0.185494,2.0,43.0,1.0,110210800.0,-0.013614,0.403589,-1.0,-0.007188,...,0.003759,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0
2021-01-05,25.34,-0.060437,2.0,44.0,-1.0,66426200.0,0.006887,-0.39728,1.0,-0.001774,...,0.003759,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0
2021-01-06,25.07,-0.010655,2.0,45.0,-2.0,107997700.0,0.005978,0.62583,2.0,-0.000883,...,0.003759,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0
2021-01-07,22.370001,-0.107698,2.0,46.0,-3.0,68766800.0,0.014857,-0.363257,3.0,0.027956,...,-0.007491,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0
2021-01-08,21.559999,-0.036209,2.0,47.0,-4.0,71677200.0,0.005698,0.042323,4.0,0.026742,...,-0.007491,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0


2021-01-04    0.688736
2021-01-05    0.597839
2021-01-06    1.485749
2021-01-07    0.569767
2021-01-08   -0.674078
Name: y, dtype: float64

In [96]:
display(X_full_test.head())
display(y_full_test.head())

Unnamed: 0,vix_close,vix_change,labels,vix_days_in_label,vix_con_direction,volume,spy_change,volume_change,spy_con_direction,3_day_change,...,MORTGAGE30US,PRIME,MICH,TOTALSA,UMCSENT,HOUST,RECPROUSM156N,REAINTRATREARAT1YE,REAINTRATREARAT10Y,Sentiment
2021-01-04,26.969999,0.185494,2.0,43.0,1.0,110210800.0,-0.013614,0.403589,-1.0,-0.007188,...,0.003759,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0
2021-01-05,25.34,-0.060437,2.0,44.0,-1.0,66426200.0,0.006887,-0.39728,1.0,-0.001774,...,0.003759,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0
2021-01-06,25.07,-0.010655,2.0,45.0,-2.0,107997700.0,0.005978,0.62583,2.0,-0.000883,...,0.003759,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0
2021-01-07,22.370001,-0.107698,2.0,46.0,-3.0,68766800.0,0.014857,-0.363257,3.0,0.027956,...,-0.007491,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0
2021-01-08,21.559999,-0.036209,2.0,47.0,-4.0,71677200.0,0.005698,0.042323,4.0,0.026742,...,-0.007491,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0


2021-01-04    0.688736
2021-01-05    0.597839
2021-01-06    1.485749
2021-01-07    0.569767
2021-01-08   -0.674078
Name: y, dtype: float64

In [97]:
def scale_x_data ():
    # fit Scale X Variables
    X_full_train, y_full_train, X_full_test, y_full_test = create_train_test()
    scaler = StandardScaler()
    X_full_scaler = scaler.fit(X_full_train)
    
    # Scale X Variables
    X_full_train_scaled = X_full_scaler.transform(X_full_train)
    X_full_test_scaled = X_full_scaler.transform(X_full_test)

    
    return X_full_train_scaled, X_full_test_scaled, y_full_train, y_full_test

print(X_full_test_scaled[:5,3])
display(X_full_test.head())

[-0.35440236 -0.34465723 -0.3349121  -0.32516697 -0.31542184]


Unnamed: 0,vix_close,vix_change,labels,vix_days_in_label,vix_con_direction,volume,spy_change,volume_change,spy_con_direction,3_day_change,...,MORTGAGE30US,PRIME,MICH,TOTALSA,UMCSENT,HOUST,RECPROUSM156N,REAINTRATREARAT1YE,REAINTRATREARAT10Y,Sentiment
2021-01-04,26.969999,0.185494,2.0,43.0,1.0,110210800.0,-0.013614,0.403589,-1.0,-0.007188,...,0.003759,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0
2021-01-05,25.34,-0.060437,2.0,44.0,-1.0,66426200.0,0.006887,-0.39728,1.0,-0.001774,...,0.003759,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0
2021-01-06,25.07,-0.010655,2.0,45.0,-2.0,107997700.0,0.005978,0.62583,2.0,-0.000883,...,0.003759,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0
2021-01-07,22.370001,-0.107698,2.0,46.0,-3.0,68766800.0,0.014857,-0.363257,3.0,0.027956,...,-0.007491,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0
2021-01-08,21.559999,-0.036209,2.0,47.0,-4.0,71677200.0,0.005698,0.042323,4.0,0.026742,...,-0.007491,-0.235294,0.2,0.023631,-0.021066,-0.029679,0.16,-0.123681,0.011604,0.0


In [105]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch

X_full_train_scaled, X_full_test_scaled, y_full_train, y_full_test = scale_x_data()

# Create nueral network
nn = Sequential()

# add first hidden layer
nn.add(Dense(units=30, input_dim=46, activation="relu"))
# add second hidden layer
nn.add(Dense(units=15, activation="relu"))
# add third hidden layer
nn.add(Dense(units=5, activation="relu"))
# add fourth hidden layer
#nn.add(Dense(units=5, activation="relu"))
# Output layer
nn.add(Dense(units=1, activation="linear"))

# Compile the model
nn.compile(loss="mean_squared_error", optimizer='adam', metrics=['mean_squared_error'])

# Fit the model
model_full = nn.fit(X_full_train_scaled, y_full_train, epochs=100, batch_size=100)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100


In [106]:
model_loss, model_accuracy = nn.evaluate(X_full_test, y_full_test)


y_pred = nn.predict(X_full_test)
pred_df = pd.DataFrame()
y_pred = pd.DataFrame(y_pred)
pred_df['pred']=y_pred
real = pd.DataFrame(y_full_test.values)
pred_df['real'] = real
new_df = pd.DataFrame(X_full_test)
pred_df.head(65)

pred_df['adjust'] = np.where(pred_df['pred']>0,pred_df['real']*-1,pred_df['real']*(1))
pred_df['base'] = np.where(abs(pred_df['pred'])>15,0,pred_df['pred'])
pred_df['p'] = np.where(pred_df['pred']>=0,1,-1)
pred_df['r'] = np.where(pred_df['real']>=0,1,-1)
pred_df['call'] = np.where(pred_df['p']==pred_df['r'],1,0)

prod = (1+pred_df/100).cumprod()-1
prod[100:200].hvplot()

print(pred_df['call'].sum()/pred_df['call'].count())

prod = (1+pred_df/100).cumprod()-1

print(pred_df[30:90].head(50))
prod[30:90].hvplot(y=["adjust","real","base"])

0.4859154929577465
        pred      real    adjust      base  p  r  call
30 -0.000508 -0.004256 -0.004256 -0.000508 -1 -1     1
31 -0.000508 -0.001766 -0.001766 -0.000508 -1 -1     1
32 -0.000508 -0.007692 -0.007692 -0.000508 -1 -1     1
33 -0.000508  0.001214  0.001214 -0.000508 -1  1     0
34 -0.000508  0.011019  0.011019 -0.000508 -1  1     0
35 -0.000508 -0.024096 -0.024096 -0.000508 -1 -1     1
36 -0.000508 -0.005153 -0.005153 -0.000508 -1 -1     1
37 -0.000508  0.024240  0.024240 -0.000508 -1  1     0
38 -0.000508 -0.007803 -0.007803 -0.000508 -1 -1     1
39 -0.000508 -0.013246 -0.013246 -0.000508 -1 -1     1
40 -0.000508 -0.012375 -0.012375 -0.000508 -1 -1     1
41 -0.000508  0.018397  0.018397 -0.000508 -1  1     0
42 -0.000508 -0.004979 -0.004979 -0.000508 -1 -1     1
43 -0.000508  0.014278  0.014278 -0.000508 -1  1     0
44 -0.000508  0.006225  0.006225 -0.000508 -1  1     0
45 -0.000508  0.010139  0.010139 -0.000508 -1  1     0
46 -0.000508  0.001347  0.001347 -0.000508 -1 