# Import Libraries

In [22]:
import pandas_datareader as web
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# import pywt
import seaborn
from statsmodels.robust import mad
from scipy import signal
import data_reader, features
from alpha_vantage.timeseries import TimeSeries 
import keras
import tensorflow as tf
from keras.models import Model
from keras.layers import Dense, Dropout, LSTM, Input, Activation, concatenate
from keras import optimizers
# import numpy as np
from tensorflow import set_random_seed

In [23]:
def calc_returns(df):
    df['returns'] = df.pct_change()
    df['log-returns'] = np.log(df.iloc[:,0]).diff()
    df['up-down'] = np.sign(df['log-returns'])
    df_dropna = df.dropna()
    return df, df_dropna

def remove_na(df):
    df = df[df['returns'].notna()]
    return df


def get_cwt_features(scale_bot,scale_top,scale_incr,data):
    scales = np.arange(scale_bot,scale_top,step=scale_incr)

    cwt = features.plot_wavelet(time, data, scales)
    # print(type(cwt))
    cwt_features = pd.DataFrame(cwt).T
    cwt_features.set_index(returns.index,inplace=True)
    return cwt_features

def prep_features(data,history_points):
    hist = np.array([data[i:i + history_points].copy() for i in range(len(data) - history_points)])
    return hist

def prep_labels(data,history_points):
    hist_labels = np.array([data[i + history_points].copy() for i in range(len(data) - history_points)])
    hist_labels = np.expand_dims(hist_labels, -1)
    return hist_labels

def split_data(feats, labels, test_split):
    assert feats.shape[0] == labels.shape[0]
    n = int(labels.shape[0]*test_split)
    feature_train = feats[:n]
    label_train = labels[:n]
    feature_test = feats[n:]
    label_test = labels[n:]
    return feature_train, label_train, feature_test, label_test

def test(hist_feats,feature_train,feature_test,label_train,label_test,epoch,batch):
    feat_shape_ax1 = hist_feats.shape[1]
    feat_shape_ax2 = hist_feats.shape[2]
    lstm_input = Input(shape=(feat_shape_ax1, feat_shape_ax2), name='lstm_input')
    x = LSTM(50, name='lstm_0')(lstm_input)
    x = Dropout(0.2, name='lstm_dropout_0')(x)
    x = Dense(64, name='dense_0')(x)
    x = Activation('sigmoid', name='sigmoid_0')(x)
    x = Dense(1, name='dense_1')(x)
    output = Activation('linear', name='linear_output')(x)
#     output = Activation('sigmoid', name='linear_output')(x)
    model = Model(inputs=lstm_input, outputs=output)

    adam = optimizers.Adam(lr=0.0005)

    model.compile(optimizer=adam, loss='mse')

    model.fit(x=feature_train, y=label_train, batch_size=batch, epochs=epoch, shuffle=True, validation_split=0.1)
    evaluation = model.evaluate(feature_test, label_test)
    print(evaluation)

    test_predicted = model.predict(feature_test)
    # plt.plot(test_predicted,'o')
    # plt.plot(label_test,'+')
    # plt.legend(['predicted','real'])
    # plt.show()
    return test_predicted, label_test

# not used
def test2(hist_feats,feature_train,feature_test,label_train,label_test,epoch):
    feat_shape_ax1 = hist_feats.shape[1]
    feat_shape_ax2 = hist_feats.shape[2]
    lstm_input = Input(shape=(feat_shape_ax1, feat_shape_ax2), name='lstm_input')
    x = LSTM(50, name='lstm_0')(lstm_input)
    x = Dropout(0.2, name='lstm_dropout_0')(x)
    x = Dense(64, name='dense_0')(x)
    x = Activation('sigmoid', name='sigmoid_0')(x)
    x = Dense(1, name='dense_1')(x)

    y = LSTM(50, name='lstm_1')(x)
    y = Dropout(0.2, name='lstm_dropout_1')(y)
    y = Dense(64, name='dense_0')(y)
    y = Activation('sigmoid', name='sigmoid_0')(y)
    y = Dense(1, name='dense_1')(y)

    output = Activation('sigmoid', name='linear_output')(y)
    model = Model(inputs=lstm_input, outputs=output)

    adam = optimizers.Adam(lr=0.0005)

    model.compile(optimizer=adam, loss='mse')

    model.fit(x=feature_train, y=label_train, batch_size=batch, epochs=epoch, shuffle=True, validation_split=0.1)
    evaluation = model.evaluate(feature_test, label_test)
    print(evaluation)

    test_predicted = model.predict(feature_test)
    # plt.plot(test_predicted,'o')
    # plt.plot(label_test,'+')
    # plt.legend(['predicted','real'])
    # plt.show()
    return test_predicted, label_test

def test_stats(predicted, real):
    c = 0
    s = 0
    for i in range(len(predicted)):
        if (predicted[i] > 0) and (real[i] > 0):
            c = c+1
        if (predicted[i] < 0) and (real[i] < 0):
            c = c+1
        s = s+1
    print('da',c/s)
    pct_correct_da = c/s
    
    return pct_correct_da

In [6]:
start = '2018-07-01'
end = '2019-01-01'
ticker = 'AAPL'

df = data_reader.download(ticker,start,end)


opens = df['adjusted close'].to_frame()
opens, returns = calc_returns(opens)
print(opens)


signal = df['adjusted close'].dropna().to_numpy()
log_signal = returns['log-returns'].dropna().to_numpy()


data = log_signal
N = len(data)
t0=0
dt=1/365
time = np.arange(0, N) * dt + t0


            adjusted close   returns  log-returns  up-down
2018-07-02        182.1920       NaN          NaN      NaN
2018-07-03        179.0188 -0.017417    -0.017570     -1.0
2018-07-05        180.4594  0.008047     0.008015      1.0
2018-07-06        182.9609  0.013862     0.013767      1.0
2018-07-09        185.5014  0.013885     0.013790      1.0
...                    ...       ...          ...      ...
2018-12-24        143.9221 -0.025874    -0.026215     -1.0
2018-12-26        154.0573  0.070421     0.068052      1.0
2018-12-27        153.0575 -0.006490    -0.006511     -1.0
2018-12-28        153.1360  0.000513     0.000513      1.0
2018-12-31        154.6161  0.009665     0.009619      1.0

[126 rows x 4 columns]


In [21]:
lbls = returns['log-returns'].dropna().to_numpy() 
scale_bot = 1
scale_top = 10
scale_incr = 1
cwt_features = get_cwt_features(scale_bot,scale_top,scale_incr,data)


In [18]:
results = pd.concat([opens['up-down'],opens['log-returns'],cwt_features],axis=1,sort=False)
print('num lbls:',len(lbls))
print('num results:',len(results))

num lbls: 125
num results: 126


In [24]:
start = '2002-01-01'
end = '2019-01-10'
ticker = 'AAPL'

df = data_reader.download(ticker,start,end)


opens = df['adjusted close'].to_frame()
opens, returns = calc_returns(opens)
print(opens)


signal = df['adjusted close'].dropna().to_numpy()
log_signal = returns['log-returns'].dropna().to_numpy()


data = log_signal
N = len(data)
t0=0
dt=1/365
time = np.arange(0, N) * dt + t0


# scale_bot = 1 ########## UNCOMMENT ##########
# scale_top = 80 ########## UNCOMMENT ##########
# scale_incr = 1 ########## UNCOMMENT ##########

# cwt_features = get_cwt_features(scale_bot,scale_top,scale_incr,data) ########## UNCOMMENT ##########
# results = pd.concat([opens['up-down'],opens['log-returns'],cwt_features],axis=1,sort=False) ########## UNCOMMENT ##########
# results = pd.concat([opens['log-returns'],cwt_features],axis=1,sort=False)
# print(results)


###################################################################
# feats = cwt_features.to_numpy()
# feats = results.dropna().to_numpy() ########## UNCOMMENT ##########
###################################################################

# history_points = 100

# hist_cwt = prep_features(cwt_features_np,history_points) ####### either this or next line. try next
# hist_feats = prep_features(feats,history_points) ########## UNCOMMENT ##########


lbls = returns['log-returns'].dropna().to_numpy() 
# hist_labels = prep_labels(lbls,history_points) ########## UNCOMMENT ##########
# print(results['log-returns'])
# print(hist_feats)
# print(hist_labels)

# print(hist_labels.shape)
# print(hist_feats.shape)

test_split = 0.9

# feature_train, label_train, feature_test, label_test = split_data(hist_feats,hist_labels,0.9) ########## UNCOMMENT ##########

# print('feat train',feature_train.shape)
# print('label train',label_train.shape)
# print('feat test',feature_test.shape)
# print('label test',label_test.shape)


# epoch = 100 ########## UNCOMMENT ##########
# batch = 32 ########## UNCOMMENT ##########
# predicted, real = test(hist_feats,feature_train,feature_test,label_train,label_test,epoch,batch) ########## UNCOMMENT ##########
# pct_da = test_stats(predicted, real) ########## UNCOMMENT ##########
# print('epoch:',epoch) ########## UNCOMMENT ##########
# print('startdate:',start) ########## UNCOMMENT ##########
# print('enddate:',end) ########## UNCOMMENT ##########
# print('cwt splits:',scale_bot,scale_top,scale_incr) ########## UNCOMMENT ##########
# print('hist dates',history_points) ########## UNCOMMENT ##########

report = pd.DataFrame(columns=['Epoch','Batch Size','CWT Top','CWT Incr','Hist points','Pct Acc'])


batches = [8]
tops = [10]
hist_list = [10]
epochs = [150]
for i in tops:
    for b in batches:
        for k in hist_list:
            for e in epochs:
                
                print("running: hist " + str(k) + ", cwt top "+ str(i) + ",batch "+str(b) )
                scale_bot = 1
                scale_top = i
                scale_incr = 1

                cwt_features = get_cwt_features(scale_bot,scale_top,scale_incr,data)
                results = pd.concat([opens['up-down'],opens['log-returns'],cwt_features],axis=1,sort=False)

                feats = results.dropna().to_numpy()
                history_points = k
                hist_feats = prep_features(feats,history_points)

                # lbls = returns['log-returns'].dropna().to_numpy()
                hist_labels = prep_labels(lbls,history_points)

                
                feature_train, label_train, feature_test, label_test = split_data(hist_feats,hist_labels,0.9)

                epoch = e
                batch = b 
                predicted, real = test(hist_feats,feature_train,feature_test,label_train,label_test,epoch,batch) 
                pct_da = test_stats(predicted, real) 

                report = report.append({'Epoch':e,'Batch Size':b,'CWT Top':i,'CWT Incr':1,'Hist points':k,'Pct Acc':pct_da},ignore_index=True)
                
                test_datapoints = pd.DataFrame(data={'Pred':predicted.T[0],'Actual':real.T[0]})
                fname_test_dpoints = str(ticker)+'_datapoints_e'+str(e)+'b'+str(b)+'cwttop'+str(i)+'cwtinc'+str(1)+'h'+str(k)+'pctacc'+str(pct_da)+'.csv'
                data_reader.save_df(test_datapoints,fname_test_dpoints)

data_reader.save_df(report,'AAPL_hypertests.csv')

            adjusted close   returns  log-returns  up-down
2002-01-02          1.4407       NaN          NaN      NaN
2002-01-03          1.4580  0.012008     0.011937      1.0
2002-01-04          1.4648  0.004664     0.004653      1.0
2002-01-07          1.4159 -0.033383    -0.033953     -1.0
2002-01-08          1.3980 -0.012642    -0.012723     -1.0
...                    ...       ...          ...      ...
2019-01-04        145.3238  0.042689     0.041803      1.0
2019-01-07        145.0003 -0.002226    -0.002229     -1.0
2019-01-08        147.7645  0.019063     0.018884      1.0
2019-01-09        150.2738  0.016982     0.016839      1.0
2019-01-10        150.7541  0.003196     0.003191      1.0

[4286 rows x 4 columns]
running: hist 10, cwt top 10,batch 8
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


  result = getattr(ufunc, method)(*inputs, **kwargs)



Train on 3462 samples, validate on 385 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150


Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 80/150
Epoch 81/150
Epoch 82/150
Epoch 83/150
Epoch 84/150
Epoch 85/150
Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150
Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150


Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150
0.00011031695306475196
da 0.7850467289719626
