## Import Library

In [176]:
import numpy as np

import neuralnetwork

from neuralnetwork import layers
from neuralnetwork import activations

## Test Model Summary

### Model from IF4071 Lecture Slide

In [177]:
model1 = neuralnetwork.Sequential()

model1.add(layers.LSTM(1, input_shape=(2, 2)))
model1.add(layers.Dense(1, activation=activations.ReLU))

model1.summary()


Model: Sequential
Layer (type)                       Output Shape                       Param #       
lstm_1 (LSTM)                      (None, 1)                          16               
--------------------------------------------------------------------------------
dense_1 (Dense)                    (None, 1)                          2                
Total params: 18



### Model with 32 Timesteps, 6 Features, and 10 Units LSTM

In [178]:
model2 = neuralnetwork.Sequential()

model2.add(layers.LSTM(10, input_shape=(32, 6)))
model2.add(layers.Dense(1, activation=activations.ReLU))

model2.summary()


Model: Sequential
Layer (type)                       Output Shape                       Param #       
lstm_1 (LSTM)                      (None, 10)                         680              
--------------------------------------------------------------------------------
dense_1 (Dense)                    (None, 1)                          11               
Total params: 691



## Test LSTM

### IF4071 Lecture Forward Propagation Example

In [179]:
# Define Weights

_x = np.array([[[1, 2]], [[.5, 3]]])
uf = np.array([[.7, .45]])
ui = np.array([[.95, .8]])
uc = np.array([[.45, .25]])
uo = np.array([[.6, .4]])

wf = np.array([[.1]])
wi = np.array([[.8]])
wc = np.array([[.15]])
wo = np.array([[.25]])

bf = np.array([[.15]])
bi = np.array([[.65]])
bc = np.array([[.2]])
bo = np.array([[.1]])

In [180]:
layer1 = layers.LSTM(1, input_shape=(2, 2))

layer1.set_w(wf, wi, wc, wo, bf, bi, bc, bo)
layer1.set_u(uf, ui, uc, uo)

layer1._init_cp()
layer1._init_hp()

layer1.forward_propagation(_x, debug=True)

Timestep 1
ft	: [[0.8519528]]
it	: [[0.96083428]]
~ct	: [[0.81775408]]
Ct	: [[0.78572615]]
ot	: [[0.81757448]]
ht	: [[0.5363134]]

Timestep 2
ft	: [[0.87030197]]
it	: [[0.98118397]]
~ct	: [[0.84980402]]
Ct	: [[1.5176331]]
ot	: [[0.84993334]]
ht	: [[0.77198111]]



### LSTM Forward Propagation with Random Weights

In [181]:
layer2 = layers.LSTM(1, input_shape=(10, 4))

layer2.init_layer()

layer2.forward_propagation(np.random.rand(10, 1, 4), debug=True)

Timestep 1
ft	: [[0.7931159]]
it	: [[0.78563256]]
~ct	: [[0.97894898]]
Ct	: [[0.76909419]]
ot	: [[0.76603451]]
ht	: [[0.49516657]]

Timestep 2
ft	: [[0.89421653]]
it	: [[0.872839]]
~ct	: [[0.98102541]]
Ct	: [[1.54401397]]
ot	: [[0.76172148]]
ht	: [[0.69529358]]

Timestep 3
ft	: [[0.905968]]
it	: [[0.90885862]]
~ct	: [[0.98199773]]
Ct	: [[2.29132435]]
ot	: [[0.77203303]]
ht	: [[0.75640057]]

Timestep 4
ft	: [[0.88493104]]
it	: [[0.84590884]]
~ct	: [[0.98113859]]
Ct	: [[2.85761785]]
ot	: [[0.78302092]]
ht	: [[0.77787717]]

Timestep 5
ft	: [[0.93956411]]
it	: [[0.92145445]]
~ct	: [[0.99538397]]
Ct	: [[3.60211617]]
ot	: [[0.84206026]]
ht	: [[0.84080916]]

Timestep 6
ft	: [[0.91638895]]
it	: [[0.9071891]]
~ct	: [[0.99043165]]
Ct	: [[4.19944825]]
ot	: [[0.81566689]]
ht	: [[0.81529974]]

Timestep 7
ft	: [[0.930176]]
it	: [[0.88712963]]
~ct	: [[0.99295787]]
Ct	: [[4.78710832]]
ot	: [[0.82332157]]
ht	: [[0.82320714]]

Timestep 8
ft	: [[0.93440474]]
it	: [[0.92494456]]
~ct	: [[0.98872536]]
Ct	: 

## LSTM Forward Propagation with BitCoin Data

### Prepare Dataset

In [182]:
import pandas as pd

pd.options.mode.chained_assignment = None

training_data   = pd.read_csv("dataset/bitcoin/training.csv")
test_data       = pd.read_csv("dataset/bitcoin/test.csv")

In [183]:
training_data

Unnamed: 0,Date,Open,High,Low,Close,Volume,Market Cap
0,"Jul 31, 2017",2763.24,2889.62,2720.61,2875.34,860575000,45535800000
1,"Jul 30, 2017",2724.39,2758.53,2644.85,2757.18,705943000,44890700000
2,"Jul 29, 2017",2807.02,2808.76,2692.80,2726.45,803746000,46246700000
3,"Jul 28, 2017",2679.73,2897.45,2679.73,2809.01,1380100000,44144400000
4,"Jul 27, 2017",2538.71,2693.32,2529.34,2671.78,789104000,41816500000
...,...,...,...,...,...,...,...
1551,"May 02, 2013",116.38,125.60,92.28,105.21,-,1292190000
1552,"May 01, 2013",139.00,139.89,107.72,116.99,-,1542820000
1553,"Apr 30, 2013",144.00,146.93,134.05,139.00,-,1597780000
1554,"Apr 29, 2013",134.44,147.49,134.00,144.54,-,1491160000


In [184]:
test_data

Unnamed: 0,Date,Open,High,Low,Close,Volume,Market Cap
0,"Aug 07, 2017",3212.78,3397.68,3180.89,3378.94,1482280000,52987300000
1,"Aug 06, 2017",3257.61,3293.29,3155.6,3213.94,1105030000,53720900000
2,"Aug 05, 2017",2897.63,3290.01,2874.83,3252.91,1945700000,47778200000
3,"Aug 04, 2017",2806.93,2899.33,2743.72,2895.89,1002120000,46276200000
4,"Aug 03, 2017",2709.56,2813.31,2685.14,2804.73,804797000,44666400000
5,"Aug 02, 2017",2727.13,2762.53,2668.59,2710.67,1094950000,44950800000
6,"Aug 01, 2017",2871.3,2921.35,2685.61,2718.26,1324670000,47321800000


### Predict Missing Values in Volume Feature Using Linear Regression 

In [185]:
def change_date_format(date):
    month_ = { 'Jan': '1', 'Feb': '2', 'Mar': '3', 'Apr': '4', 'May': '5', 'Jun': '6', 'Jul': '7', 'Aug': '8', 'Sep': '9', 'Oct': '10', 'Nov': '11', 'Dec': '12' }

    year    = date[8:12]
    month   = month_[date[0:3]]
    day     = date[4:6]

    return '{0}/{1}/{2}'.format(year, month, day)

In [186]:
training_data['Date'] = training_data['Date'].apply(lambda date: change_date_format(date))
training_data = training_data.set_index(['Date'])[::-1]

test_data['Date'] = test_data['Date'].apply(lambda date: change_date_format(date))
test_data = test_data.set_index(['Date'])[::-1]

In [187]:
training_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Market Cap
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013/4/28,135.30,135.98,132.10,134.21,-,1500520000
2013/4/29,134.44,147.49,134.00,144.54,-,1491160000
2013/4/30,144.00,146.93,134.05,139.00,-,1597780000
2013/5/01,139.00,139.89,107.72,116.99,-,1542820000
2013/5/02,116.38,125.60,92.28,105.21,-,1292190000
...,...,...,...,...,...,...
2017/7/27,2538.71,2693.32,2529.34,2671.78,789104000,41816500000
2017/7/28,2679.73,2897.45,2679.73,2809.01,1380100000,44144400000
2017/7/29,2807.02,2808.76,2692.80,2726.45,803746000,46246700000
2017/7/30,2724.39,2758.53,2644.85,2757.18,705943000,44890700000


In [188]:
test_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Market Cap
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017/8/01,2871.3,2921.35,2685.61,2718.26,1324670000,47321800000
2017/8/02,2727.13,2762.53,2668.59,2710.67,1094950000,44950800000
2017/8/03,2709.56,2813.31,2685.14,2804.73,804797000,44666400000
2017/8/04,2806.93,2899.33,2743.72,2895.89,1002120000,46276200000
2017/8/05,2897.63,3290.01,2874.83,3252.91,1945700000,47778200000
2017/8/06,3257.61,3293.29,3155.6,3213.94,1105030000,53720900000
2017/8/07,3212.78,3397.68,3180.89,3378.94,1482280000,52987300000


In [189]:
training_data['Market Cap'] = training_data['Market Cap'].apply(lambda e: float(e.replace(",","")))
test_data['Market Cap'] = test_data['Market Cap'].apply(lambda e: float(e.replace(",","")))

training_data['Volume'] = training_data['Volume'].apply(lambda e: float(e.replace(",","")) if e != '-' else '-')
test_data['Volume'] = test_data['Volume'].apply(lambda e: float(e.replace(",","")) if e != '-' else '-')


In [190]:
test_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Market Cap
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017/8/01,2871.3,2921.35,2685.61,2718.26,1324670000.0,47321800000.0
2017/8/02,2727.13,2762.53,2668.59,2710.67,1094950000.0,44950800000.0
2017/8/03,2709.56,2813.31,2685.14,2804.73,804797000.0,44666400000.0
2017/8/04,2806.93,2899.33,2743.72,2895.89,1002120000.0,46276200000.0
2017/8/05,2897.63,3290.01,2874.83,3252.91,1945700000.0,47778200000.0
2017/8/06,3257.61,3293.29,3155.6,3213.94,1105030000.0,53720900000.0
2017/8/07,3212.78,3397.68,3180.89,3378.94,1482280000.0,52987300000.0


In [191]:
from sklearn.model_selection import train_test_split

temp = training_data[training_data['Volume'] != '-']

X_temp = temp.loc[:, ['Open', 'Close', 'Low', 'High', 'Market Cap']]
y_temp = temp.loc[:, ['Volume']]

X_temp_train, X_temp_test, y_temp_train, y_temp_test = train_test_split(X_temp, y_temp, test_size=0.1, random_state=20)

In [192]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import explained_variance_score

reg = LinearRegression().fit(X_temp, y_temp)

y_pred = reg.predict(X_temp_test)

explained_variance_score(y_temp_test, y_pred)

0.9012728856978025

In [193]:
need_fill = training_data[training_data['Volume'] == '-'].loc[:, ['Open', 'Close', 'Low', 'High', 'Market Cap']]
training_data.loc[need_fill.index.to_list(), ['Volume']] = reg.predict(need_fill)

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0, 1))

X_train = pd.DataFrame(scaler.fit_transform(training_data), columns=['Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap'])
X_test = pd.DataFrame(scaler.transform(test_data), columns=['Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap'])