In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow.keras as keras
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout, Input, Flatten, PReLU, LeakyReLU, Activation
from tensorflow.keras.losses import MeanSquaredLogarithmicError, MeanAbsolutePercentageError
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.activations import linear, relu
from tensorflow.keras.metrics import MeanSquaredLogarithmicError, MeanAbsolutePercentageError, RootMeanSquaredError
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from numpy import sqrt
import os

from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
if not os.path.isdir('model/'):
    os.mkdir('model')
best_cp = ModelCheckpoint('best_model.h5', save_best_only=True)
all_cp = ModelCheckpoint('model.h5', save_best_only=False)
es_cb = EarlyStopping(patience=10, restore_best_weights=True)

In [3]:
df = pd.read_csv('./data/coinbaseUSD_1-min_data_2014-12-01_to_2019-01-09.csv')

In [4]:
df.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1417411980,300.0,300.0,300.0,300.0,0.01,3.0,300.0
1,1417412040,,,,,,,
2,1417412100,,,,,,,
3,1417412160,,,,,,,
4,1417412220,,,,,,,


In [5]:
df.isnull().sum()

Timestamp                 0
Open                 109069
High                 109069
Low                  109069
Close                109069
Volume_(BTC)         109069
Volume_(Currency)    109069
Weighted_Price       109069
dtype: int64

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2099760 entries, 0 to 2099759
Data columns (total 8 columns):
 #   Column             Dtype  
---  ------             -----  
 0   Timestamp          int64  
 1   Open               float64
 2   High               float64
 3   Low                float64
 4   Close              float64
 5   Volume_(BTC)       float64
 6   Volume_(Currency)  float64
 7   Weighted_Price     float64
dtypes: float64(7), int64(1)
memory usage: 128.2 MB


In [7]:
df.isnull().sum() / df.shape[0]

Timestamp            0.000000
Open                 0.051944
High                 0.051944
Low                  0.051944
Close                0.051944
Volume_(BTC)         0.051944
Volume_(Currency)    0.051944
Weighted_Price       0.051944
dtype: float64

In [8]:
df = df.dropna()

In [9]:
df.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1417411980,300.0,300.0,300.0,300.0,0.01,3.0,300.0
7,1417412400,300.0,300.0,300.0,300.0,0.01,3.0,300.0
51,1417415040,370.0,370.0,370.0,370.0,0.01,3.7,370.0
77,1417416600,370.0,370.0,370.0,370.0,0.026556,9.82555,370.0
1436,1417498140,377.0,377.0,377.0,377.0,0.01,3.77,377.0


In [10]:
df = df.reset_index().drop(['index'], axis=1)

In [11]:
df.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1417411980,300.0,300.0,300.0,300.0,0.01,3.0,300.0
1,1417412400,300.0,300.0,300.0,300.0,0.01,3.0,300.0
2,1417415040,370.0,370.0,370.0,370.0,0.01,3.7,370.0
3,1417416600,370.0,370.0,370.0,370.0,0.026556,9.82555,370.0
4,1417498140,377.0,377.0,377.0,377.0,0.01,3.77,377.0


In [12]:
df['Timestamp'].min(), df['Timestamp'].max()

(1417411980, 1546898760)

In [13]:
df['Open'].min(), df['Open'].max()

(0.06, 19891.99)

In [14]:
df['High'].min(), df['High'].max()

(0.06, 19891.99)

In [16]:
df['High'][0:10], df['Open'][0:10]

(0    300.0
 1    300.0
 2    370.0
 3    370.0
 4    377.0
 5    378.0
 6    378.0
 7    378.0
 8    378.0
 9    378.0
 Name: High, dtype: float64,
 0    300.00
 1    300.00
 2    370.00
 3    370.00
 4    377.00
 5    377.75
 6    378.00
 7    378.00
 8    378.00
 9    378.00
 Name: Open, dtype: float64)

In [17]:
df.nunique() / df.shape[0]

Timestamp            1.000000
Open                 0.223386
High                 0.206552
Low                  0.217447
Close                0.223730
Volume_(BTC)         0.738064
Volume_(Currency)    0.998683
Weighted_Price       0.897246
dtype: float64

In [19]:
df['Volume_(Currency)'].unique()

array([3.00000000e+00, 3.70000000e+00, 9.82554980e+00, ...,
       4.77564731e+03, 1.08142419e+04, 7.02118355e+03])

In [20]:
df['Volume_(Currency)'].unique().shape

(1988070,)

In [21]:
df.shape[0]

1990691

In [22]:
df['Weighted_Price'].min(), df['Weighted_Price'].max()

(0.06, 19891.987528)

In [23]:
X = df.iloc[:,df.columns!='Weighted_Price'].values
y = df['Weighted_Price'].values

In [24]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [25]:
y = np.log10(y)

In [26]:
X.shape

(1990691, 7)

In [27]:
def train_test_val_split(X, y, val_size=0.15, test_size=0.1):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=val_size)
    X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=test_size)
    return X_train, X_val, X_test, y_train, y_val, y_test

In [28]:
X_train, X_val, X_test, y_train, y_val, y_test = train_test_val_split(X,y)

In [29]:
X_train.shape

(1522878, 7)

In [33]:
model = Sequential()
model.add(Input(shape=(7,)))
model.add(Dense(7))
model.add(Activation(LeakyReLU(0.1)))
model.add(Dense(14))
model.add(Activation(LeakyReLU(0.1)))
model.add(Dense(28))
model.add(Activation(LeakyReLU(0.1)))
model.add(Dense(14))
model.add(Activation(LeakyReLU(0.1)))
model.add(Dense(7))
model.add(Activation(LeakyReLU(0.1)))
model.add(Dense(3))
model.add(Activation(LeakyReLU(0.1)))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', metrics=[RootMeanSquaredError(name='rmse'), MeanSquaredLogarithmicError(name='msle')])
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 7)                 56        
_________________________________________________________________
activation (Activation)      (None, 7)                 0         
_________________________________________________________________
dense_2 (Dense)              (None, 14)                112       
_________________________________________________________________
activation_1 (Activation)    (None, 14)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 28)                420       
_________________________________________________________________
activation_2 (Activation)    (None, 28)                0         
_________________________________________________________________
dense_4 (Dense)              (None, 14)               

In [36]:
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), callbacks=[best_cp, all_cp, es_cb])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
