## **Example 1:**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Activation
from keras.models import load_model

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/YALINYAN-YU/DAV-Capstone/main/consolidated_coin_data.csv')
df.head()

Unnamed: 0,Currency,Date,Open,High,Low,Close,Volume,Market Cap
0,tezos,"Dec 04, 2019",1.29,1.32,1.25,1.25,46048752,824588509
1,tezos,"Dec 03, 2019",1.24,1.32,1.21,1.29,41462224,853213342
2,tezos,"Dec 02, 2019",1.25,1.26,1.2,1.24,27574097,817872179
3,tezos,"Dec 01, 2019",1.33,1.34,1.25,1.25,24127567,828296390
4,tezos,"Nov 30, 2019",1.31,1.37,1.31,1.33,28706667,879181680


In [None]:
df.count 

<bound method DataFrame.count of          Currency          Date  Open  ... Close      Volume   Market Cap
0           tezos  Dec 04, 2019  1.29  ...  1.25  46,048,752  824,588,509
1           tezos  Dec 03, 2019  1.24  ...  1.29  41,462,224  853,213,342
2           tezos  Dec 02, 2019  1.25  ...  1.24  27,574,097  817,872,179
3           tezos  Dec 01, 2019  1.33  ...  1.25  24,127,567  828,296,390
4           tezos  Nov 30, 2019  1.31  ...  1.33  28,706,667  879,181,680
...           ...           ...   ...  ...   ...         ...          ...
28939  bitcoin-sv  May 02, 2013  3.78  ...  3.37           0   58,287,979
28940  bitcoin-sv  May 01, 2013  4.29  ...  3.80           0   65,604,596
28941  bitcoin-sv  Apr 30, 2013  4.40  ...  4.30           0   74,020,918
28942  bitcoin-sv  Apr 29, 2013  4.37  ...  4.38           0   75,388,964
28943  bitcoin-sv  Apr 28, 2013  4.30  ...  4.35           0   74,636,938

[28944 rows x 8 columns]>

We split the dataset up into a training and test set, and standardise its features. Standardisation is good practice as it reduces overfitting in cases where variance for some features may be higher than others.



In [None]:
# We want 80% of the data to be used for training, and 20% for testing
n_train_rows = int(df.shape[0]*.8)-1

# Split into train and test sets but keep all 9 columns
train = df.iloc[:n_train_rows, :]
test = df.iloc[n_train_rows:, :]

# The total rows of the two datasets should equal the total amount of rows in your CSV
print(train.shape)
print(test.shape)

(23154, 8)
(5790, 8)


In [None]:
#check the data type
df.dtypes



Currency      object
Date          object
Open          object
High          object
Low           object
Close         object
Volume        object
Market Cap    object
dtype: object

In [None]:
df['Market Cap'] = df['Market Cap'].astype(float)

ValueError: ignored

In [23]:
df['Volume'] = df['Volume'].astype(float)

ValueError: ignored

In [24]:
df['Open'] = df['Open'].astype(float)

ValueError: ignored

In [22]:
df['Market Cap'] = pd.to_numeric(df['Market Cap'],errors='coerce')

In [None]:
# Normalise features
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(train.values)
test_set_scaled = sc.fit_transform(test.values)

ValueError: ignored

The LSTM model requires us to organise the data in blocks. Our data is grouped at one-minute intervals and we’ll use blocks of 50 minutes to predict the next block.



In [None]:
steps = 50

In [None]:
# Prepare the training data
x_train = []
y_train = []

for i in range(steps, training_set_scaled.shape[0]-steps):
    x_train.append(training_set_scaled[i-steps:i, :])
    y_train.append(training_set_scaled[i, :])

x_train, y_train = np.array(x_train), np.array(y_train)
print(x_train.shape)

In [None]:
# Prepare the test data
x_test = []
y_test = []

for i in range(steps, test_set_scaled.shape[0]):
    x_test.append(test_set_scaled[i-steps:i, :])
    y_test.append(test_set_scaled[i, :])

x_test, y_test = np.array(x_test), np.array(y_test)
print(x_test.shape)

**The model**
Now it’s time to train our model. We choose what type of model we want to use; sequential in this case, and we decide our hyper-parameters.

The model I’m using is relatively straightforward, containing 5 hidden layers with 50 neurons each, and a dropout in between every one of those hidden layers. We use the mean-squared-error loss function, the Adam optimiser, set the batch size at 32, and go through this network for 10 epochs.

Deciding on hyper-parameters： still searching;
(and it’s worth testing out multiple options to understand what works best on your test data and in production. )

Optimising ：still searching

use 1 dense;
use MSE as loss;

In [None]:
model = Sequential()

In [None]:
epochs = 10

model.add(LSTM(units=50, return_sequences = True, input_shape = (x_train.shape[1],9)))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(units=50, return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=9))
model.compile(loss="mse", optimizer="adam")
model.fit(x_train, y_train, batch_size = 32, epochs = epochs)
model.summary

model.save("multiple_features_"+str(steps)+"_steps_"+str(epochs)+"_epochs.h5")
print("Saved model to disk")

### **Example2: mock the temperature case**

In [None]:
from keras.models import Sequential

from keras import layers

from keras.optimizers import RMSprop

runs tough 2 dense layer; (compare the above use 1 dense layer)

use mae as the loss; (compare the above use mse as the loss)

In [None]:
model = Sequential()
# model.add(layers.Flatten(input_shape=(lookback // step, float_data.shape[-1])))
# the original dateset is 2-dementional in my case, should i still apply layers.flatten here?


model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(1))

In [None]:
model.compile(RMSprop(),loss='mae')
history=model.fit_generator(train=gen,
                      steps_per_epoch=500,
                      epochs=20,
                      validation_data=val_gen,
                      validation_steps=val_steps)


NameError: ignored

## another LSTM try:


In [18]:
from keras.models import Sequential

from keras import layers

from keras.optimizers import RMSprop


num_layers = 1
learning_rate = 0.005
size_layer = 128
timestamp = 5
epoch = 500
dropout_rate = 0.6

In [19]:
dates = pd.to_datetime(df.iloc[:, 0]).tolist()

ParserError: ignored

## Example 3:Useing gates recurrent layer:(cheaper to run compared with LSTM)



In [None]:
from keras.models import Sequential

from keras import layers

from keras.optimizers import RMSprop

In [None]:
model = Sequential()
model.add(layer.GRU(32, INPUT_SHAPE=(None, float_data.shape[-1])))
model.add(layers.Dense(1))

model.compile(optimizer=RMSprop(),loss='mae')
history=model.fit_generator(train=gen,
                      steps_per_epoch=500,
                      epochs=20,
                      validation_data=val_gen,
                      validation_steps=val_steps)

NameError: ignored

## Example 4:
Using LSTM-CNN combined


Parallel ML comparing:



ML example1:

Step 1: Choosing the data;
Step 2: Choosing the model;
Step 3: Building the Model;
Step 4: Training the Model


import numpy as np
import matplotlib.pyplot as mpl
from sklearn.preprocessing import scale
from TFANN import ANNR
from google.colab import files