In [111]:
import pandas as pd
import keras
import numpy as np
from itertools import islice
from sklearn.model_selection import train_test_split
from stldecompose import decompose
from matplotlib import pyplot
from keras.models import model_from_json
#https://stackoverflow.com/questions/48356464/how-to-model-convolutional-recurrent-network-crnn-in-keras
#https://machinelearningmastery.com/timedistributed-layer-for-long-short-term-memory-networks-in-python/
#https://stackoverflow.com/questions/43034960/many-to-one-and-many-to-many-lstm-examples-in-keras
#https://github.com/keras-team/keras/issues/6063
#https://stackoverflow.com/questions/34357617/append-2d-array-to-3d-array-extending-third-dimension

In [112]:
def rolling_window(a, window, step_size):
    shape = a.shape[:-1] + (a.shape[-1] - window +2  - step_size, window)
    strides = a.strides + (a.strides[-1] * step_size,)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

In [113]:
def smape(A, F):
    return 100/len(A) * np.sum(2 * np.abs(F - A) / (np.abs(A) + np.abs(F)))

In [114]:
in_win = 15
out_win = 12

### CMC Data Processing

In [115]:
# data_bt = pd.read_csv("cmc_btc.csv")
# data_bt['Date'] = pd.to_datetime(data_bt['Date'])
# data_bt = data_bt.sort_values(by=['Date']).reset_index(drop=True)
# data_bt.head()

In [116]:
# date_col = pd.to_datetime(data_bt["Date"])
# data_open = data_bt["Open*"]
# data_high = data_bt["High"]
# data_low = data_bt["Low"]
# data_close = data_bt["Close**"]
# data_vol = data_bt["Volume"]
# data_mark = data_bt["Market Cap"]

### BITFINEX DATA PROCESSING

In [117]:
data_bt = pd.read_csv("BITFINEX_SPOT_BTC_USD/1DAY_2013-03-31&2018-09-20.csv")
data_bt['time_close'] = pd.to_datetime(data_bt['time_close'])
data_bt = data_bt.sort_values(by=['time_close']).reset_index(drop=True)
data_bt.head()

Unnamed: 0,price_open,price_high,price_low,price_close,volume_traded,trades_count,time_close,time_open,time_period_start,time_period_end,market_cap
0,93.25,100.0,93.03,93.10001,390.827224,55,2013-03-31 23:36:44,2013-03-31T22:07:48.0000000Z,2013-03-31T22:00:00.000000Z,2013-04-01T00:00:00.0000000Z,
1,93.17,105.9,92.49999,102.37,4919.654127,627,2013-04-01 23:41:42,2013-04-01T00:05:39.0000000Z,2013-04-01T00:00:00.000000Z,2013-04-02T00:00:00.0000000Z,
2,102.8,118.388067,99.0,117.98999,9084.832816,1205,2013-04-02 23:54:35,2013-04-02T00:00:11.0000000Z,2013-04-02T00:00:00.000000Z,2013-04-03T00:00:00.0000000Z,
3,116.579097,146.88,101.51088,134.952969,12909.402178,2502,2013-04-03 23:59:37,2013-04-03T00:07:29.0000000Z,2013-04-03T00:00:00.000000Z,2013-04-04T00:00:00.0000000Z,
4,131.779686,143.0,119.0,132.681,6910.100414,1456,2013-04-04 23:50:09,2013-04-04T00:02:15.0000000Z,2013-04-04T00:00:00.000000Z,2013-04-05T00:00:00.0000000Z,


In [118]:
data_bt.dropna(inplace=True)
data_bt.reset_index(drop=True, inplace=True)
data_bt.isna().any()

price_open           False
price_high           False
price_low            False
price_close          False
volume_traded        False
trades_count         False
time_close           False
time_open            False
time_period_start    False
time_period_end      False
market_cap           False
dtype: bool

In [119]:
date_col = pd.to_datetime(data_bt["time_close"])
data_open = data_bt["price_open"]
data_high = data_bt["price_high"]
data_low = data_bt["price_low"]
data_close = data_bt["price_close"]
data_vol = data_bt["volume_traded"]
data_mark = data_bt["market_cap"]

### Log Transformation

In [120]:
log_open = np.log(data_open)
log_high = np.log(data_high)
log_low = np.log(data_low)
log_close = np.log(data_close)
log_vol = np.log(data_vol)
log_mark = np.log(data_mark)

### STL Decompostion

In [121]:
log_open = pd.DataFrame(log_open)
log_open = log_open.set_index(date_col)

log_high = pd.DataFrame(log_high)
log_high = log_high.set_index(date_col)

log_low = pd.DataFrame(log_low)
log_low = log_low.set_index(date_col)

log_close = pd.DataFrame(log_close)
log_close = log_close.set_index(date_col)

log_vol = pd.DataFrame(log_vol)
log_vol = log_vol.set_index(date_col)

log_mark = pd.DataFrame(log_mark)
log_mark = log_mark.set_index(date_col)

In [122]:
stl_open = decompose(log_open)
stl_high = decompose(log_high)
stl_low = decompose(log_low)
stl_close = decompose(log_close)
stl_vol = decompose(log_vol)
stl_mark = decompose(log_mark)

### Deasonal TS component

In [123]:
deseason_open  = (stl_open.resid + stl_open.trend).iloc[:,0]
deseason_high  = (stl_high.resid + stl_high.trend).iloc[:,0]
deseason_low   = (stl_low.resid + stl_low.trend).iloc[:,0]
deseason_close = (stl_close.resid + stl_close.trend).iloc[:,0]
deseason_vol   = (stl_vol.resid + stl_vol.trend).iloc[:,0]
deseason_mark  = (stl_mark.resid + stl_mark.trend).iloc[:,0]

### Input Normalized Window

In [124]:
date_col_win_in = rolling_window(date_col[0:(len(data_bt)-out_win)], in_win, 1)
date_col_win_in_exp = np.expand_dims(date_col_win_in, axis=2)

data_open_win_in = rolling_window(deseason_open[0:(len(deseason_open)-out_win)], in_win, 1)
data_open_win_in = pd.DataFrame(data_open_win_in) 
norm_open_win_in = data_open_win_in.subtract(data_open_win_in.iloc[:,in_win-1], axis='index')
norm_open_win_in_exp = np.expand_dims(norm_open_win_in.values, axis=2)

data_high_win_in = rolling_window(deseason_high[0:(len(deseason_high)-out_win)], in_win, 1)
data_high_win_in = pd.DataFrame(data_high_win_in) 
norm_high_win_in = data_high_win_in.subtract(data_high_win_in.iloc[:,in_win-1], axis='index')
norm_high_win_in_exp = np.expand_dims(norm_high_win_in.values, axis=2)

data_low_win_in = rolling_window(deseason_low[0:(len(deseason_low)-out_win)], in_win, 1)
data_low_win_in = pd.DataFrame(data_low_win_in) 
norm_low_win_in = data_low_win_in.subtract(data_low_win_in.iloc[:,in_win-1], axis='index')
norm_low_win_in_exp = np.expand_dims(norm_low_win_in.values, axis=2)

data_close_win_in = rolling_window(deseason_close[0:(len(deseason_close)-out_win)], in_win, 1)
data_close_win_in = pd.DataFrame(data_close_win_in) 
norm_close_win_in = data_close_win_in.subtract(data_close_win_in.iloc[:,in_win-1], axis='index')
norm_close_win_in_exp = np.expand_dims(norm_close_win_in.values, axis=2)

data_vol_win_in = rolling_window(deseason_vol[0:(len(deseason_vol)-out_win)], in_win, 1)
data_vol_win_in = pd.DataFrame(data_vol_win_in) 
norm_vol_win_in = data_vol_win_in.subtract(data_vol_win_in.iloc[:,in_win-1], axis='index')
norm_vol_win_in_exp = np.expand_dims(norm_vol_win_in.values, axis=2)

data_mark_win_in = rolling_window(deseason_mark[0:(len(deseason_mark)-out_win)], in_win, 1)
data_mark_win_in = pd.DataFrame(data_mark_win_in) 
norm_mark_win_in = data_mark_win_in.subtract(data_mark_win_in.iloc[:,in_win-1], axis='index')
norm_mark_win_in_exp = np.expand_dims(norm_mark_win_in.values, axis=2)

  app.launch_new_instance()


### Output Normalized Window

In [125]:
date_col_win_out = rolling_window(date_col[in_win:len(data_bt)], out_win, 1)
date_col_win_out_exp = np.expand_dims(date_col_win_out, axis=2)

data_high_win_out = rolling_window(deseason_high[in_win:len(deseason_high)], out_win, 1)
data_high_win_out = pd.DataFrame(data_high_win_out) 
norm_high_win_out = data_high_win_out.subtract(data_high_win_in.iloc[:,out_win-1], axis='index').values
#norm_high_win_out_exp = np.expand_dims(norm_high_win_out.values, axis=2)

data_low_win_out = rolling_window(deseason_low[in_win:len(deseason_high)], out_win, 1)
data_low_win_out = pd.DataFrame(data_low_win_out) 
norm_low_win_out = data_low_win_out.subtract(data_low_win_in.iloc[:,out_win-1], axis='index').values
#norm_low_win_out_exp = np.expand_dims(norm_low_win_out.values, axis=2)

data_close_win_out = rolling_window(deseason_close[in_win:len(deseason_high)], out_win, 1)
data_close_win_out = pd.DataFrame(data_close_win_out) 
norm_close_win_out = data_close_win_out.subtract(data_close_win_in.iloc[:,out_win-1], axis='index').values
#norm_close_win_out_exp = np.expand_dims(norm_close_win_out.values, axis=2)

  app.launch_new_instance()


### Stacking Data for Training in High Dimension

In [126]:
x_all = np.dstack((norm_open_win_in_exp, norm_high_win_in_exp, norm_low_win_in_exp
                   ,norm_close_win_in_exp, norm_vol_win_in_exp, norm_mark_win_in_exp))
x_all.shape

(1934, 15, 6)

In [127]:
#y_all = np.dstack((norm_high_win_out_exp, norm_low_win_out_exp, norm_close_win_out_exp))
y_all = norm_close_win_out
y_all.shape

(1934, 12)

### Train Validation Test Split

In [128]:
#X_train, X_test, y_train,  = train_test_split(x_all, y_all, test_size=0.10,shuffle=False)

msk = np.random.rand(len(x_all)) < 0.81

X_train = x_all[msk]
X_test = x_all[~msk]

Y_train = y_all[msk]
Y_test = y_all[~msk]

print(X_test.shape, X_train.shape,Y_train.shape,Y_test.shape)

(383, 15, 6) (1551, 15, 6) (1551, 12) (383, 12)


### Building Deep Learning Architecture using Keras

In [129]:
from keras import Sequential
from keras.layers import LSTM, RepeatVector, TimeDistributed, Dense, Activation,Permute,Conv1D,GaussianNoise,Dropout,regularizers,Conv2D,Reshape
from keras.optimizers import SGD, nadam,adam
from keras.callbacks import ReduceLROnPlateau,EarlyStopping

In [130]:
hidden_neurons = 180
batch_size = 1
model = Sequential()  
#model.add(LSTM(batch_input_shape = (batch_size,15,6), units=hidden_neurons, stateful = False, return_sequences=False))  
model.add(LSTM(units=hidden_neurons, batch_input_shape=(batch_size,X_train.shape[1], X_train.shape[2]),stateful = True, return_sequences=False))
model.add(GaussianNoise(0.005))
model.add(Dense(out_win,use_bias=False, bias_initializer='zeros',kernel_regularizer=regularizers.l2(0.0008)))
optm = nadam(lr=0.01)
model.compile(loss='mean_squared_error', optimizer="nadam", metrics=['accuracy'])  

In [131]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (1, 180)                  134640    
_________________________________________________________________
gaussian_noise_3 (GaussianNo (1, 180)                  0         
_________________________________________________________________
dense_3 (Dense)              (1, 12)                   2160      
Total params: 136,800
Trainable params: 136,800
Non-trainable params: 0
_________________________________________________________________


### Training Model

In [132]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss',patience=3, min_lr=0.00001,factor=0.1)
for i in range(40):
    model.fit(X_train, Y_train,
              epochs=1,
              batch_size=batch_size,
              verbose=2,
              validation_data = (X_test, Y_test))
    model.reset_states()

#early_stopping = EarlyStopping(monitor='val_loss', patience=5)
# model.fit(X_train, Y_train,
#           epochs=50,
#           batch_size=batch_size,
#           verbose=2,
#           callbacks=[reduce_lr],
#           validation_data = (X_test, Y_test))

# Save the weights
model.save_weights('model_weights.h5')

# Save the model architecture
with open('model_architecture.json', 'w') as f:
    f.write(model.to_json())


Train on 1551 samples, validate on 383 samples
Epoch 1/1
 - 23s - loss: 0.0147 - acc: 0.1135 - val_loss: 0.0135 - val_acc: 0.1854
Train on 1551 samples, validate on 383 samples
Epoch 1/1
 - 23s - loss: 0.0124 - acc: 0.1225 - val_loss: 0.0133 - val_acc: 0.1514
Train on 1551 samples, validate on 383 samples
Epoch 1/1
 - 29s - loss: 0.0122 - acc: 0.1328 - val_loss: 0.0129 - val_acc: 0.1540
Train on 1551 samples, validate on 383 samples
Epoch 1/1
 - 25s - loss: 0.0119 - acc: 0.1438 - val_loss: 0.0133 - val_acc: 0.1749
Train on 1551 samples, validate on 383 samples
Epoch 1/1
 - 29s - loss: 0.0119 - acc: 0.1483 - val_loss: 0.0127 - val_acc: 0.1775
Train on 1551 samples, validate on 383 samples
Epoch 1/1
 - 29s - loss: 0.0119 - acc: 0.1502 - val_loss: 0.0130 - val_acc: 0.1462
Train on 1551 samples, validate on 383 samples
Epoch 1/1
 - 27s - loss: 0.0120 - acc: 0.1573 - val_loss: 0.0130 - val_acc: 0.1044
Train on 1551 samples, validate on 383 samples
Epoch 1/1
 - 30s - loss: 0.0117 - acc: 0.15

In [133]:
# Model reconstruction from JSON file
with open('model_architecture.json', 'r') as f:
    model = model_from_json(f.read())

# Load weights into the new model
model.load_weights('model_weights.h5')

### Carrying out Prediction on test data set

In [134]:
y_pred = []
for i in range(0,len(X_test)):
    pred = model.predict(X_test[i:(i+1),:,:])
    y_pred.append(pred[0])

In [135]:
y_pred = pd.DataFrame(y_pred).values

### Renormalizing and seasonalizing the predictions and true labels

In [136]:
season_high  = pd.DataFrame(rolling_window(stl_high.seasonal.iloc[:,0][in_win:len(stl_high.seasonal)], out_win, 1))
season_low   = pd.DataFrame(rolling_window(stl_low.seasonal.iloc[:,0][in_win:len(stl_low.seasonal)], out_win, 1))
season_close = pd.DataFrame(rolling_window(stl_close.seasonal.iloc[:,0][in_win:len(stl_close.seasonal)], out_win, 1))

  app.launch_new_instance()


In [137]:
season_high_pr  = pd.DataFrame(rolling_window(stl_high.seasonal.iloc[:,0][0:(len(stl_high.seasonal)-out_win)], in_win, 1))
season_high_pr.drop(season_high_pr.columns[0:(in_win-out_win)], axis=1, inplace=True)
season_high_pr.columns = np.arange(len(season_high_pr.columns))
season_low_pr   = pd.DataFrame(rolling_window(stl_low.seasonal.iloc[:,0][0:(len(stl_low.seasonal)-out_win)], in_win, 1))
season_low_pr.drop(season_low_pr.columns[0:(in_win-out_win)], axis=1, inplace=True)
season_low_pr.columns = np.arange(len(season_low_pr.columns))
season_close_pr = pd.DataFrame(rolling_window(stl_close.seasonal.iloc[:,0][0:(len(stl_close.seasonal)-out_win)], in_win, 1))
season_close_pr.drop(season_close_pr.columns[0:(in_win-out_win)], axis=1, inplace=True)
season_close_pr.columns = np.arange(len(season_close_pr.columns))

  app.launch_new_instance()


In [138]:
X_train.shape[0]

1551

#### Transforming Predictions

In [139]:
#high_pred  = pd.DataFrame(y_pred[:,:,0])
#low_pred   = pd.DataFrame(y_pred[:,:,1])
close_pred = pd.DataFrame(y_pred[:,])
print(close_pred.shape)
#high_denorm_pred  = high_pred.add(data_high_win_in.iloc[X_train.shape[0]:,out_win-1].reset_index(drop=True), axis='index')
#low_denorm_pred   = low_pred.add(data_low_win_in.iloc[X_train.shape[0]:,out_win-1].reset_index(drop=True), axis='index')
close_denorm_pred = close_pred.add(data_close_win_in.iloc[X_train.shape[0]:,out_win-1].reset_index(drop=True), axis='index')
print(close_denorm_pred.shape)
#deseason_high_pred  = high_denorm_pred.add(season_high_pr.iloc[X_train.shape[0]:,].reset_index(drop=True), axis='index')
#deseason_low_pred   = low_denorm_pred.add(season_low_pr.iloc[X_train.shape[0]:,].reset_index(drop=True), axis='index')
deseason_close_pred = close_denorm_pred.add(season_close_pr.iloc[X_train.shape[0]:,].reset_index(drop=True), axis='index')
print(deseason_close_pred.shape)
#pred_high  = np.exp(deseason_high_pred)
#pred_low   = np.exp(deseason_low_pred)
pred_close = np.exp(deseason_close_pred)

(383, 12)
(383, 12)
(383, 12)


In [140]:
pred_close.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,4374.79379,4465.549247,4402.68664,4383.525484,4285.779348,4394.70374,4605.85264,4457.998286,4438.393867,4487.992572,4522.759326,4507.631432
1,4109.871391,4153.873394,4210.278911,4137.852812,4180.057559,4076.374341,3962.872918,3939.722726,3871.764142,3906.317787,3847.893243,3892.889186
2,4136.90251,4224.764186,4186.168798,4235.007012,4197.51958,4177.739026,4209.242339,4165.485387,4220.254411,4278.610474,4279.455037,4330.235568
3,4309.731383,4318.179514,4343.708727,4375.46306,4349.500257,4293.791129,4256.428008,4274.013375,4264.739306,4291.201455,4322.648775,4302.031697
4,4385.603275,4429.187255,4363.531167,4269.472711,4132.911513,4115.480135,4187.99133,4170.336392,4204.299295,4270.725687,4251.237723,4185.008495


#### Transforming True Labels

In [141]:
#high_test  = pd.DataFrame(y_test[:,:,0])
#low_test   = pd.DataFrame(y_test[:,:,1])
close_test = pd.DataFrame(Y_test[:,])

#high_denorm_test  = high_test.add(data_high_win_in.iloc[X_train.shape[0]:,out_win-1].reset_index(drop=True), axis='index')
#low_denorm_test   = low_test.add(data_low_win_in.iloc[X_train.shape[0]:,out_win-1].reset_index(drop=True), axis='index')
close_denorm_test = close_test.add(data_close_win_in.iloc[X_train.shape[0]:,out_win-1].reset_index(drop=True), axis='index')

#deseason_high_test  = high_denorm_test.add(season_high.iloc[X_train.shape[0]:,].reset_index(drop=True))
#deseason_low_test   = low_denorm_test.add(season_low.iloc[X_train.shape[0]:,].reset_index(drop=True))
deseason_close_test = close_denorm_test.add(season_close.iloc[X_train.shape[0]:,].reset_index(drop=True))

#test_high  = np.exp(deseason_high_test)
#test_low   = np.exp(deseason_low_test)
test_close = np.exp(deseason_close_test)

In [142]:
Y_test.shape

(383, 12)

In [143]:
from matplotlib import pyplot as plt

In [144]:
#X_test[1,:,1]
date_col_win_test = rolling_window(date_col[0:(len(data_bt)-out_win)], in_win, 1)[X_train.shape[0]:]
date_col_win_test = pd.DataFrame(date_col_win_test)

data_open_win_test = rolling_window(data_open[0:(len(data_open)-out_win)], in_win, 1)[X_train.shape[0]:]
data_open_win_test = pd.DataFrame(data_open_win_test) 

data_high_win_test = rolling_window(data_high[0:(len(data_high)-out_win)], in_win, 1)[X_train.shape[0]:]
data_high_win_test = pd.DataFrame(data_high_win_test) 


data_low_win_test = rolling_window(data_low[0:(len(data_low)-out_win)], in_win, 1)[X_train.shape[0]:]
data_low_win_test = pd.DataFrame(data_low_win_test) 


data_close_win_test = rolling_window(data_close[0:(len(data_close)-out_win)], in_win, 1)[X_train.shape[0]:]
data_close_win_test = pd.DataFrame(data_close_win_test) 


data_vol_win_test = rolling_window(data_vol[0:(len(data_vol)-out_win)], in_win, 1)[X_train.shape[0]:]
data_vol_win_test = pd.DataFrame(data_vol_win_test) 


data_mark_win_test = rolling_window(data_mark[0:(len(data_mark)-out_win)], in_win, 1)[X_train.shape[0]:]
data_mark_win_test = pd.DataFrame(data_mark_win_test) 

  app.launch_new_instance()


In [145]:
date_col_win_pred = pd.DataFrame(date_col_win_out[X_train.shape[0]:])
date_col_win_pred.shape

(383, 12)

In [146]:
print(type(data_high_win_test))
data_high_win_test.shape

<class 'pandas.core.frame.DataFrame'>


(383, 15)

In [147]:
date_col_win_pred.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,2017-08-23 23:59:59,2017-08-24 23:59:58,2017-08-25 23:59:58,2017-08-26 23:59:51,2017-08-27 23:59:48,2017-08-28 23:59:58,2017-08-29 23:59:54,2017-08-30 23:59:59,2017-08-31 23:59:47,2017-09-01 23:59:54,2017-09-02 23:59:59,2017-09-03 23:59:57
1,2017-08-24 23:59:58,2017-08-25 23:59:58,2017-08-26 23:59:51,2017-08-27 23:59:48,2017-08-28 23:59:58,2017-08-29 23:59:54,2017-08-30 23:59:59,2017-08-31 23:59:47,2017-09-01 23:59:54,2017-09-02 23:59:59,2017-09-03 23:59:57,2017-09-04 23:59:58
2,2017-08-25 23:59:58,2017-08-26 23:59:51,2017-08-27 23:59:48,2017-08-28 23:59:58,2017-08-29 23:59:54,2017-08-30 23:59:59,2017-08-31 23:59:47,2017-09-01 23:59:54,2017-09-02 23:59:59,2017-09-03 23:59:57,2017-09-04 23:59:58,2017-09-05 23:59:52
3,2017-08-26 23:59:51,2017-08-27 23:59:48,2017-08-28 23:59:58,2017-08-29 23:59:54,2017-08-30 23:59:59,2017-08-31 23:59:47,2017-09-01 23:59:54,2017-09-02 23:59:59,2017-09-03 23:59:57,2017-09-04 23:59:58,2017-09-05 23:59:52,2017-09-06 23:59:47
4,2017-08-27 23:59:48,2017-08-28 23:59:58,2017-08-29 23:59:54,2017-08-30 23:59:59,2017-08-31 23:59:47,2017-09-01 23:59:54,2017-09-02 23:59:59,2017-09-03 23:59:57,2017-09-04 23:59:58,2017-09-05 23:59:52,2017-09-06 23:59:47,2017-09-07 23:59:56


In [158]:
%matplotlib qt
for index,row in pred_close.iterrows():
    plt.figure()
    plt.plot(date_col_win_pred.iloc[index,:],pred_close.iloc[index,:],color='r',marker = '.')
    plt.plot(date_col_win_test.iloc[index,:],data_close_win_test.iloc[index,:],color='g',marker = '.')
    plt.plot(date_col_win_pred.iloc[index,:],test_close.iloc[index,:],color='b',marker = '.')
    if index == 25:
        break
plt.show()



In [149]:

error_high = []
error_low = []
error_close = []

# for index,row in pred_high.iterrows():
#     error_high.append(smape(test_high.iloc[index,:], pred_high.iloc[index,:]))

# for index,row in pred_low.iterrows():
#     error_low.append(smape(test_low.iloc[index,:], pred_low.iloc[index,:]))
    
for index,row in pred_close.iterrows():
    error_close.append(smape(test_close.iloc[index,:], pred_close.iloc[index,:]))

## SMAPE ERROR

In [150]:
# print("High prediciton SMAPE score : ",sum(error_high)/len(error_high))
# print("Low prediciton SMAPE score : ",sum(error_low)/len(error_low))
print("Close prediciton SMAPE score : ",sum(error_close)/len(error_close))
#print("Overall SMAPE score : ",((sum(error_high)/len(error_high)) + (sum(error_low)/len(error_low)) + (sum(error_close)/len(error_close)))/3)

Close prediciton SMAPE score :  9.594184534156367


In [151]:
close_pred_check = data_close_win_test.iloc[:,in_win-1]-pred_close.iloc[:,0]
close_true_check = data_close_win_test.iloc[:,in_win-1]-test_close.iloc[:,0]

# high_pred_check = data_high_win_test.iloc[:,in_win-1]-pred_high.iloc[:,0]
# high_true_check = data_high_win_test.iloc[:,in_win-1]-test_high.iloc[:,0]

# low_pred_check = data_low_win_test.iloc[:,in_win-1]-pred_low.iloc[:,0]
# low_true_check = data_low_win_test.iloc[:,in_win-1]-test_low.iloc[:,0]

### F-Score , Precision Recall, True Positive Rate & True Negative Rate

In [152]:
close_tp = 0
close_tn = 0
close_fp = 0
close_fn = 0

# high_tp = 0
# high_tn = 0
# high_fp = 0
# high_fn = 0

# low_tp = 0
# low_tn = 0
# low_fp = 0
# low_fn = 0
for i in range(0,len(close_pred_check)):
    if(close_true_check[i] >= 0  and close_pred_check[i] >= 0):
        close_tp += 1
    if(close_true_check[i] >= 0  and close_pred_check[i] < 0):
        close_tn += 1 
    if(close_true_check[i] < 0  and close_pred_check[i] < 0):
        close_fp += 1
    if(close_true_check[i] < 0  and close_pred_check[i] >= 0):
        close_fn += 1

#     if(high_true_check[i] >= 0  and high_pred_check[i] >= 0):
#         high_tp += 1
#     if(high_true_check[i] >= 0  and high_pred_check[i] < 0):
#         high_tn += 1 
#     if(high_true_check[i] < 0  and high_pred_check[i] < 0):
#         high_fp += 1
#     if(high_true_check[i] < 0  and high_pred_check[i] >= 0):
#         high_fn += 1
    
#     if(low_true_check[i] >= 0  and low_pred_check[i] >= 0):
#         low_tp += 1
#     if(low_true_check[i] >= 0  and low_pred_check[i] < 0):
#         low_tn += 1 
#     if(low_true_check[i] < 0  and low_pred_check[i] < 0):
#         low_fp += 1
#     if(low_true_check[i] < 0  and low_pred_check[i] >= 0):
#         low_fn += 1


In [153]:
def precision(tp,fp):
    return(tp/(tp + fp))

def recall(tp,fn):
    return(tp/(tp + fn))

def f_score(precision,recall):
    return(2*((precision * recall)/(precision + recall)))

def tp_rate(tp,fn):
    return(tp/(tp+fn))

def fp_rate(fp,tn):
    return(fp/(fp+tn))

def Accuracy(TP,TN,FP,FN):
    return (TP + TN) / (TP + TN + FP + FN)

### Close

In [154]:
close_precision  = [precision(close_tp,close_fp) , precision(close_tn,close_fn)]
close_recall     = [recall(close_tp,close_fn) , recall(close_tn,close_fp)]
close_f_score    = [f_score(close_precision[0],close_recall[0]) , f_score(close_precision[1],close_recall[1])]
close_tp_rate    = [tp_rate(close_tp,close_fn), tp_rate(close_tn,close_fp)]
close_fp_rate    = [fp_rate(close_fp,close_tn) , fp_rate(close_fn,close_tp)]
close_Accuracy   = [Accuracy(close_tp,close_tn,close_fp,close_fn), Accuracy(close_tn,close_tp,close_fn,close_fp)]

print("close precision : ",close_precision)
print("close recall    : ",close_recall)
print("close f_score   : ",close_f_score)
print("close tp_rate   : ",close_tp_rate)
print("close fp_rate   : ",close_fp_rate)
print("close Accuracy  : ",close_Accuracy)

close precision :  [0.42857142857142855, 0.5104166666666666]
close recall    :  [0.7235294117647059, 0.2300469483568075]
close f_score   :  [0.5382932166301969, 0.3171521035598705]
close tp_rate   :  [0.7235294117647059, 0.2300469483568075]
close fp_rate   :  [0.7699530516431925, 0.27647058823529413]
close Accuracy  :  [0.4490861618798956, 0.4490861618798956]


### High

In [155]:
# high_precision  = [precision(high_tp,high_fp) , precision(high_tn,high_fn)]
# high_recall     = [recall(high_tp,high_fn) , recall(high_tn,high_fp)]
# high_f_score    = [f_score(high_precision[0],high_recall[0]) , f_score(high_precision[1],high_recall[1])]
# high_tp_rate    = [tp_rate(high_tp,high_fn), tp_rate(high_tn,high_fp)]
# high_fp_rate    = [fp_rate(high_fp,high_tn) , fp_rate(high_fn,high_tp)]
# high_Accuracy   = [Accuracy(high_tp,high_tn,high_fp,high_fn), Accuracy(high_tn,high_tp,high_fn,high_fp)]

# print("high precision : ",high_precision)
# print("high recall    : ",high_recall)
# print("high f_score   : ",high_f_score)
# print("high tp_rate   : ",high_tp_rate)
# print("high fp_rate   : ",high_fp_rate)
# print("high Accuracy  : ",high_Accuracy)

### Low

In [156]:
# low_precision  = [precision(low_tp,low_fp) , precision(low_tn,low_fn)]
# low_recall     = [recall(low_tp,low_fn) , recall(low_tn,low_fp)]
# low_f_score    = [f_score(low_precision[0],low_recall[0]) , f_score(low_precision[1],low_recall[1])]
# low_tp_rate    = [tp_rate(low_tp,low_fn), tp_rate(low_tn,low_fp)]
# low_fp_rate    = [fp_rate(low_fp,low_tn) , fp_rate(low_fn,low_tp)]
# low_Accuracy   = [Accuracy(low_tp,low_tn,low_fp,low_fn), Accuracy(low_tn,low_tp,low_fn,low_fp)]

# print("low precision : ",low_precision)
# print("low recall    : ",low_recall)
# print("low f_score   : ",low_f_score)
# print("low tp_rate   : ",low_tp_rate)
# print("low fp_rate   : ",low_fp_rate)
# print("low Accuracy ,: ",low_Accuracy)