In [1]:
import pandas as pd
import quandl
import matplotlib.pyplot as plt
import math
import datetime
import yfinance as yf
from sklearn.model_selection import train_test_split
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import StandardScaler

from statsmodels.tsa.stattools import adfuller

Data = pd.DataFrame()

In [None]:
start_date = datetime.datetime(2014,12,12)

# S&P500 future data from "CHRIS/CME_SP1" database
Data1 = quandl.get("CHRIS/CME_SP1", authtoken="U_Gp39gZutpHmaFgC65Y", start_date=start_date)
# number of declining and advancing stocks
declining = quandl.get("URC/NYSE_DEC", authtoken="U_Gp39gZutpHmaFgC65Y", start_date=start_date)
advancing = quandl.get("URC/NYSE_ADV", authtoken="U_Gp39gZutpHmaFgC65Y", start_date=start_date)
adv_vol = quandl.get("URC/NYSE_ADV_VOL", authtoken="U_Gp39gZutpHmaFgC65Y", start_date=start_date)
dec_vol = quandl.get("URC/NYSE_DEC_VOL", authtoken="U_Gp39gZutpHmaFgC65Y", start_date=start_date)

In [None]:
Data['Close'] = Data1['Last']
Data['declining'] = declining['Numbers of Stocks']
Data['advancing'] = advancing['Numbers of Stocks']
Data['dec_vol'] = dec_vol['Numbers of Stocks']
Data['adv_vol'] = adv_vol['Numbers of Stocks']

In [None]:
Data.to_csv("data.csv")

In [2]:
Data = pd.read_csv("data.csv", parse_dates=True, index_col='Date')

In [3]:
Data.head()

Unnamed: 0_level_0,Close,declining,advancing,dec_vol,adv_vol
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-12-12,1997.5,2519.0,663.0,3378800000.0,713534200.0
2014-12-15,1991.0,2407.0,778.0,3433819000.0,848258700.0
2014-12-16,1971.2,1833.0,1335.0,2611648000.0,2256718000.0
2014-12-17,2013.0,373.0,2833.0,320082400.0,216829300.0
2014-12-18,2064.0,625.0,2570.0,509800100.0,4112185000.0


In [4]:
Data2 = pd.read_csv("local_future.csv")
Data3  = pd.read_csv('local_data.csv')

Data2['Date'] = pd.to_datetime(Data2['Date'])
Data2.set_index('Date', inplace=True)

Data3['Date'] = pd.to_datetime(Data3['Date'])
Data3.set_index('Date', inplace=True)

Data['PCR'] = Data3['S&P PUT-CALL RATIO']

df = yf.Ticker('^VIX').history(start="2014-12-13")

Data['VIX'] = df['Close']

# find the TRIN value using the number and volume of advancing and declining stocks
AD_ratio = Data['advancing'] / Data['declining']
AD_vol_ratio = Data['adv_vol'].divide(Data['dec_vol'])

TRIN = AD_ratio / AD_vol_ratio

Data['TRIN'] = TRIN

Data['TRIN'] = Data['TRIN'].apply(lambda x: math.log(x))

In [5]:
Data = Data.loc[:'2018-08-10']
Data = Data.dropna()

In [6]:
ma1 = 5
ma2 = 15
ma3 = 40
window_size = 15
predict_days = 1

Data['Return'] = Data['Close'].pct_change()
Data['MA'+str(ma1)] = Data['Return'].rolling(ma1).mean()
Data['MA'+str(ma2)] = Data['Return'].rolling(ma2).mean()
Data['MA'+str(ma3)] = Data['Return'].rolling(ma3).mean()

In [7]:
Data['After_' + str(predict_days)] = Data['Close'].shift(-predict_days)

In [8]:
threshold = 0.01
for index, row in Data.iterrows():
    if (row['After_' + str(predict_days)] / row['Close'] - 1) > threshold:
        Data.loc[index, 'Label'] = 0
    elif (row['After_' + str(predict_days)] / row['Close'] - 1) < -threshold:
        Data.loc[index, 'Label'] = 1
    else:
        Data.loc[index, 'Label'] = 2

In [9]:
print(sum(Data['Label'] == 0))
print(sum(Data['Label'] == 1))
print(sum(Data['Label'] == 2))

90
73
753


In [10]:
Data.head(10)

Unnamed: 0_level_0,Close,declining,advancing,dec_vol,adv_vol,PCR,VIX,TRIN,Return,MA5,MA15,MA40,After_1,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2014-12-12,1997.5,2519.0,663.0,3378800000.0,713534200.0,2.49,21.08,0.220203,,,,,1991.0,2.0
2014-12-15,1991.0,2407.0,778.0,3433819000.0,848258700.0,2.46,20.42,0.268833,-0.003254,,,,1971.2,2.0
2014-12-16,1971.2,1833.0,1335.0,2611648000.0,2256718000.0,2.04,23.57,-0.170953,-0.009945,,,,2013.0,0.0
2014-12-17,2013.0,373.0,2833.0,320082400.0,216829300.0,2.63,19.440001,2.416981,0.021205,,,,2064.0,0.0
2014-12-18,2064.0,625.0,2570.0,509800100.0,4112185000.0,1.05,16.809999,-0.673781,0.025335,,,,2066.7,2.0
2014-12-19,2066.7,1129.0,2042.0,1765002000.0,3944460000.0,1.7,16.49,-0.211563,0.001308,0.00693,,,2072.5,2.0
2014-12-22,2072.5,1342.0,1837.0,1561649000.0,1731905000.0,1.53,15.25,0.210493,0.002806,0.008142,,,2079.1,2.0
2014-12-23,2079.1,1153.0,2033.0,949546600.0,2025055000.0,1.85,14.8,-0.190222,0.003185,0.010768,,,2078.0,2.0
2014-12-24,2078.0,1577.0,1527.0,742866300.0,623306400.0,1.62,14.37,0.143259,-0.000529,0.006421,,,2084.2,2.0
2014-12-26,2084.2,1041.0,2084.0,511420000.0,1106867000.0,1.09,14.5,-0.07799,0.002984,0.001951,,,2085.6,2.0


In [11]:
Data = pd.get_dummies(Data, columns = ['Label'] )

In [15]:
Data.head(2)

Unnamed: 0_level_0,Close,declining,advancing,dec_vol,adv_vol,PCR,VIX,TRIN,Return,MA5,MA15,MA40,After_1,Label_0.0,Label_1.0,Label_2.0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2014-12-12,1997.5,2519.0,663.0,3378800000.0,713534163.0,2.49,21.08,0.220203,,,,,1991.0,0,0,1
2014-12-15,1991.0,2407.0,778.0,3433819000.0,848258739.0,2.46,20.42,0.268833,-0.003254,,,,1971.2,0,0,1


In [16]:
ml_data = Data.drop(['Close', 'After_' + str(predict_days)], axis=1)

ml_data = ml_data.dropna()

In [17]:
ml_data.values.shape

(876, 14)

In [19]:
ml_data.head(2)

Unnamed: 0_level_0,declining,advancing,dec_vol,adv_vol,PCR,VIX,TRIN,Return,MA5,MA15,MA40,Label_0.0,Label_1.0,Label_2.0
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2015-02-11,1700.0,1457.0,1986305000.0,1488967000.0,1.85,16.959999,0.133945,0.0016,0.003544,0.001304,0.000891,0,0,1
2015-02-12,733.0,2435.0,618533500.0,3081230000.0,1.56,15.34,-0.405176,0.008957,0.002872,0.000924,0.001196,0,0,1


In [20]:
ml_train = ml_data[:700]
ml_test = ml_data[700:875]

sc = StandardScaler()
ml_train = sc.fit_transform(ml_train)
ml_test = sc.transform(ml_test)

print(ml_train.shape)
print(ml_test.shape)

(700, 14)
(175, 14)


In [21]:
batch_size = 1

def windowed_dataset(series, window_size, batch_size):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size))
    ds = ds.map(lambda w: (w[:window_size, :11], w[-1][11:]))
    ds = ds.batch(batch_size).prefetch(1)
    return ds

In [22]:
ds = windowed_dataset(ml_train, window_size, batch_size)
ds

<PrefetchDataset shapes: ((None, None, 11), (None, 3)), types: (tf.float64, tf.float64)>

In [23]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Bidirectional

model = Sequential([
    Bidirectional(LSTM(40, return_sequences=True), input_shape=[window_size, 11]),
    Bidirectional(LSTM(40, return_sequences=True)),
    Dense(50, activation='relu'),
    Dense(20, activation='relu'),
    Dense(3, activation='softmax')
])

In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, 15, 80)            16640     
_________________________________________________________________
bidirectional_1 (Bidirection (None, 15, 80)            38720     
_________________________________________________________________
dense (Dense)                (None, 15, 50)            4050      
_________________________________________________________________
dense_1 (Dense)              (None, 15, 20)            1020      
_________________________________________________________________
dense_2 (Dense)              (None, 15, 3)             63        
Total params: 60,493
Trainable params: 60,493
Non-trainable params: 0
_________________________________________________________________


In [25]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [26]:
history = model.fit(ds, epochs=100, verbose=1)

W0916 18:24:04.099664 46004 deprecation.py:323] From C:\Users\trimu\Miniconda3\envs\tf_20\lib\site-packages\tensorflow\python\ops\math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [53]:
prediction = model.predict(ds)

In [57]:
prediction.shape

(686, 15, 3)

In [48]:
prediction

array([0, 0, 2, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
       2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 1, 1,
       1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0,
       0, 0, 0, 0, 0, 0, 0, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2,
       1, 1, 1, 1, 0, 1, 1, 1, 2, 2, 2, 1, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 2, 1, 1, 2, 2, 2, 1, 2,
       2, 1, 1, 2, 1, 1, 2, 0, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1,
       1, 2, 1, 1, 1, 2, 1, 2, 1, 0, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 0,
       0, 0, 2, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [46]:
original = np.argmax(ml_train[:, 11:], axis=1)
original

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 0, 2, 0,
       2, 0, 2, 2, 2, 2, 1, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 0, 2, 2, 2, 0, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0,
       1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 0, 0, 2, 2, 2, 2, 2, 2,
       2, 2, 1, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 1, 1,
       1, 1, 2, 0, 0, 2, 1, 1, 0, 2, 1, 0, 1, 2, 2, 2, 0, 2, 2, 1, 2, 1,
       2, 2, 2, 1, 2, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0,
       2, 2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 1, 2, 2, 1, 1, 0, 2, 0, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 1, 0, 2, 1, 2, 2, 1, 2, 0, 0, 1, 1, 0, 0, 2, 2,
       2, 0, 2, 2, 1, 2, 1, 1, 1, 2, 2, 1, 0, 1, 2, 1, 2, 0, 1, 0, 1, 2,
       0, 2, 1, 2, 2, 1, 1, 2, 2, 1, 0, 0, 0, 2, 2, 0, 1, 2, 0, 2, 2, 0,
       2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 0, 1, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2,

In [43]:
from sklearn.metrics import accuracy_score

In [44]:
accuracy_score(original[14:], prediction)

0.12244897959183673

In [None]:
ds = windowed_dataset(ml_test.values, window_size, batch_size)

In [None]:
prediction = model.predict(ds)
prediction = prediction.argmax(axis=1)

In [None]:
prediction

In [None]:
original = np.argmax(ml_test[['0', '1','2']].values, axis=1)[:-4]

In [None]:
original

In [None]:
accuracy_score(original, prediction)

In [None]:
ml_train.shape

In [None]:
prediction.shape

In [None]:
ml_test.shape