In [1]:
import numpy as np
import pandas
import seaborn
import matplotlib.pyplot as plt

In [2]:
df = pandas.read_csv('../input/bitcoin-historical-data/bitstampUSD_1-min_data_2012-01-01_to_2021-03-31.csv')

In [3]:
df.head()

In [4]:
from tabulate import tabulate
info = [[col, df[col].count(), df[col].max(), df[col].min()] for col in df.columns]
print(tabulate(info, headers = ['Feature', 'Count', 'Max', 'Min'], tablefmt = 'orgtbl'))

# Exploratory Data analysis Part 2

In [5]:
print(df.isna().sum())

There are more than **1 million** unrecorded timestamps.

In [6]:
df = df.dropna()

In [7]:
print('total missing values : ' + str(df.isna().sum().sum()))

In [8]:
df = df[df['Timestamp'] > (df['Timestamp'].max()-650000)]
print(df['Timestamp'].max())

In [9]:
df = df.reset_index(drop = True)

In [10]:
df.head()

In [11]:
df.hist(figsize = (10,9))
plt.savefig('histogram.png')
plt.show();
plt.show()

In [12]:
plt.figure(figsize = (10,10))
plt.savefig('Correlation.png')
plt.show()
m = df.corr()
seaborn_plot=seaborn.heatmap(m, vmin = -1.0, annot = True, square = True)
seaborn_plot.figure.savefig("output.png")

In [13]:
df = df.drop(['Timestamp', 'Low', 'High', 'Volume_(BTC)', 'Weighted_Price'], axis = 1)

In [14]:
info = [[col, df[col].count(), df[col].max(), df[col].min()] for col in df.columns]
print(tabulate(info, headers = ['Feature', 'Count', 'Max', 'Min'], tablefmt = 'orgtbl'))

In [15]:
plt.figure(figsize = (10,10))
plt.subplot(2,1,1)
plt.plot(df['Open'].values[df.shape[0]-500:df.shape[0]])
plt.xlabel('Time period')
plt.ylabel('Opening price')
plt.title('Opening price of Bitcoin for last 500 timestamps')

plt.subplot(2,1,2)
plt.plot(df['Volume_(Currency)'].values[df.shape[0]-500:df.shape[0]])
plt.xlabel('Time period')
plt.ylabel('Volume Traded')
plt.title('Volume traded of Bitcoin for last 500 timestamps')
plt.savefig('my_image.png')
plt.show()

# Creating  the arrays

In [16]:
a = np.array(df.drop(['Close'], axis = 1))
b = np.array(df['Close'])

In [17]:
print(a.shape)
print(b.shape)

# Data Scaling


In [19]:
from sklearn.preprocessing import StandardScaler
a = StandardScaler().fit_transform(a)

In [20]:
t = np.reshape(b, (-1,1))
b = StandardScaler().fit_transform(t)
b = b.reshape(-1)

# Creating the  time series datasets
Considering past **500** timestamps,which are  approximately equal to 8 hours.

In [21]:
print(a.shape)
print(b.shape)

In [22]:
size = 500
xa_temp = []
ya_temp = []
for k in range(size,a.shape[0]) :
    xa_temp.append(a[k-size: k])
    ya_temp.append(b[k])
xa_temp = np.array(xa_temp)
ya_temp = np.array(ya_temp)

In [23]:
print(xa_temp.shape)
print(ya_temp.shape)

# Train test split

In [24]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(xa_temp, ya_temp, test_size = 0.2, random_state = 1)

In [25]:
print(X_train.shape)
print(y_train.shape)

In [26]:
print(X_test.shape)
print(y_test.shape)

# Models (RNN and LSTM)

In [27]:
from tensorflow import keras 
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import SimpleRNN
from tensorflow.keras.layers import BatchNormalization

from tensorflow.keras.layers import Input

In [28]:
def layer (hidden1) :
    
    model = keras.models.Sequential()
    
    # add input layer
    model.add(Input(shape = (500, 2, )))
    
    # add rnn layer
    model.add(SimpleRNN(hidden1, activation = 'tanh', return_sequences = False))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    # add output layer
    model.add(Dense(1, activation = 'linear'))
    
    model.compile(loss = 'mean_squared_error', optimizer = 'adam')
    
    return model

In [29]:
model = layer(10)
model.summary()

In [30]:
from tensorflow.keras.callbacks import ModelCheckpoint
checkp = ModelCheckpoint('./bit_model.h5', monitor = 'val_loss', save_best_only = True, verbose = 1)

In [31]:
import time
beg = time.time()

In [None]:
model.fit(X_train, y_train, batch_size = 32, epochs = 10, validation_data = (X_test, y_test), callbacks = [checkp])

In [None]:
end = time.time()

In [None]:
from tensorflow.keras.models import load_model
model = load_model('./bit_model.h5')

In [None]:
pred = model.predict(X_test)

In [None]:
print(pred.shape)

In [None]:
pred = pred.reshape(-1)

In [None]:
from sklearn.metrics import mean_squared_error
print('MSE : ' + str(mean_squared_error(y_test, pred)))

In [None]:
plt.figure(figsize = (15,8))
plt.plot(y_test[2040:2060])
plt.plot(pred[2040:2060])
plt.xlabel('Time',fontsize=20)
plt.ylabel('Price',fontsize=20)
plt.title('Closing Price vs Time (using SimpleRNN)')
plt.legend(['Actual price', 'Predicted price'])
plt.savefig('RNN')
plt.show()

In [None]:
print('Time taken for SimpleRNN model to learn : ' + str(end-beg) + ' sec.')

In [None]:
def layerls (hidden1) :
    
    model = keras.models.Sequential()
    
    # add input layer
    model.add(Input(shape = (500, 2, )))
    
    # add rnn layer
    model.add(LSTM(hidden1, activation = 'tanh', return_sequences = False))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    # add output layer
    model.add(Dense(1, activation = 'linear'))
    
    model.compile(loss = "mean_squared_error", optimizer = 'adam')
    
    return model

In [None]:
model = layerls(256)
model.summary()

In [None]:
checkp = ModelCheckpoint('./bit_model_lstm.h5', monitor = 'val_loss', save_best_only = True, verbose = 1)

In [None]:
beg = time.time()

In [None]:
#It stopped early due to avaliable tpu quota,mse value changes due to different value initialization 
model.fit(X_train, y_train, batch_size = 32, epochs = 10, validation_data = (X_test, y_test), callbacks = [checkp])

In [None]:
end = time.time()

In [None]:
pred = model.predict(X_test)
z=pred

In [None]:
z = z.reshape(-1)

In [None]:
print('MSE : ' + str(mean_squared_error(y_test, z)))

In [None]:
plt.figure(figsize = (10,7))
plt.plot(y_test[2040:2060])
plt.plot(pred[2040:2060])
plt.xlabel('Time',fontsize=20)
plt.ylabel('Price',fontsize=20)
plt.title('Closing Price vs Time (using LSTM)')
plt.legend(['Actual price', 'Predicted price'])
plt.savefig('LSTM')
plt.show()

In [None]:
print('Time taken by LSTM to learn : ' + str(end-beg))