In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

from datetime import datetime
from sklearn.linear_model import LinearRegression
from fastai.tabular.all import add_datepart
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM

### Load dataset and convert to daily closing price data

In [None]:
df = pd.read_csv("dataset/btcusd.csv")[2000000:]

# Change this variable to False to train on daily resolution
minute_resolution = False

def get_date(long):
    return datetime.fromtimestamp(long / 1000).strftime('%Y-%m-%d')
def get_time(long):
    return datetime.fromtimestamp(long / 1000).strftime('%H:%M:%S')
def get_hour(long):
    return int(datetime.fromtimestamp(long / 1000).strftime('%H'))

df['date_string'] = df['time'].apply(get_date)
if minute_resolution:
    df = df.rename(columns={'date_string': 'date'})
    df = df.drop(columns=['time', 'open', 'high', 'low', 'volume'])
    data_df = df
else:
    df['time_string'] = df['time'].apply(get_time)
    df['hour'] = df['time'].apply(get_hour)

    df = df.drop(columns=['time', 'open', 'high', 'low', 'volume'])

    grouped = df.groupby('date_string')
    dates = list(grouped.groups.keys())

    close_data = []
    date_data = []
    for date in dates:
        date_df = grouped.get_group(date)
        close_data.append(date_df.iloc[-1]['close'])
        date_data.append(date)

    data_df = pd.DataFrame()
    data_df['date'] = date_data
    data_df['close'] = close_data
data_df

# Linear Regression

### Create Train/Test Set

In [None]:
train_percent = 0.8

new_df = data_df.copy()
add_datepart(new_df, 'date')
new_df.drop('Elapsed', axis=1, inplace=True)
new_df

In [None]:
train_size = len(new_df) * train_percent
train_size

In [None]:
train = new_df[:int(train_size)]
valid = new_df[int(train_size):]

x_train = train.drop('close', axis=1)
y_train = train['close']
x_valid = valid.drop('close', axis=1)
y_valid = valid['close']

### Model

In [None]:
model = LinearRegression()
model.fit(x_train,y_train);

### Results

In [None]:
preds = model.predict(x_valid)
rms=np.sqrt(np.mean(np.power((np.array(y_valid)-np.array(preds)),2)))
print('RMS: ' + str(rms))

pd.set_option('mode.chained_assignment', None)
valid['Predictions'] = preds

valid.index = new_df[int(train_size):].index
train.index = new_df[:int(train_size)].index

In [None]:
plt.figure(figsize=(20,10))
plt.rcParams.update({'font.size': 22})

plt.title('Bitcoin Closing Price')
plt.xlabel('Dates')
plt.ylabel('Closing Price')
plt.plot(train['close'])
plt.plot(valid[['close', 'Predictions']])

blue_patch = mpatches.Patch(color='#1f77b4', label='Train/Validation Data')
orange_patch = mpatches.Patch(color='#ff7f0e', label='Test Data')
green_patch = mpatches.Patch(color='#2ca02c', label='Prediction')

plt.legend(handles=[blue_patch, orange_patch, green_patch])

plt.show()

# LSTM

### Choose Hyperparameters

In [None]:
train_percent = 0.8
num_epochs = 10
batch_size = 1
loss_type = 'mean_squared_error'

num_timesteps = 10
num_units = 50
optimizer_type = 'adam'

### Create Train/Validation/Test Set

In [None]:
new_data = data_df.copy()
new_data

In [None]:
train_size = len(new_df) * 0.6
valid_size = len(new_df) * 0.2
test_size = len(new_df) * 0.2

In [None]:
new_data.index = new_data.date
new_data.drop('date', axis=1, inplace=True)

dataset = new_data.values

train = dataset[:int(train_size),:]
print(len(train))
valid = dataset[int(train_size):int(train_size)+int(valid_size),:] 
print(len(valid))
test = dataset[-int(test_size):, :]
print(len(test))

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)

x_train, y_train = [], []
for i in range(num_timesteps,len(train)):
    x_train.append(scaled_data[i - num_timesteps:i,0])
    y_train.append(scaled_data[i, 0])
    
x_train, y_train = np.array(x_train), np.array(y_train)

x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

### Model

In [None]:
model = Sequential()
model.add(LSTM(units=num_units, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(LSTM(units=num_units))
model.add(Dense(1))

model.compile(loss=loss_type, optimizer=optimizer_type)
model.fit(x_train, y_train, epochs=num_epochs, batch_size=batch_size, verbose=2)

### Results

In [None]:
def evaluate(inputs, labels):
    X_test = []
    for i in range(num_timesteps, inputs.shape[0]):
        X_test.append(inputs[i-num_timesteps:i,0])
    X_test = np.array(X_test)

    X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
    closing_price = model.predict(X_test)
    closing_price = scaler.inverse_transform(closing_price.reshape(-1,1))

    rms=np.sqrt(np.mean(np.power((labels[num_timesteps:]-closing_price),2)))
    return rms, closing_price

In [None]:
inputs = new_data[:int(train_size)].values
inputs = inputs.reshape(-1,1)
inputs  = scaler.transform(inputs)

rms, closing_price = evaluate(inputs, train)
'Train RMS: ' + str(rms)

In [None]:
inputs = new_data[int(train_size):int(train_size) + int(valid_size)].values
inputs = inputs.reshape(-1,1)
inputs  = scaler.transform(inputs)

rms, closing_price = evaluate(inputs, valid)
print('Valid RMS: ' + str(rms))

In [None]:
inputs = new_data[-int(test_size):].values
inputs = inputs.reshape(-1,1)
inputs  = scaler.transform(inputs)

rms, closing_price = evaluate(inputs, test)
print('Test RMS: ' + str(rms))

In [None]:
train = new_data[:int(train_size)+int(valid_size)+num_timesteps]
test = new_data[-int(test_size)+num_timesteps:]
test['Predictions'] = closing_price

In [None]:
plt.figure(figsize=(20, 10))
plt.rcParams.update({'font.size': 22})

plt.title('Bitcoin Closing Price')
plt.xlabel('Dates')
plt.ylabel('Closing Price')
plt.plot(train['close'])
plt.plot(test[['close','Predictions']])

blue_patch = mpatches.Patch(color='#1f77b4', label='Train/Validation Data')
orange_patch = mpatches.Patch(color='#ff7f0e', label='Test Data')
green_patch = mpatches.Patch(color='#2ca02c', label='Prediction')

plt.legend(handles=[blue_patch, orange_patch, green_patch])

plt.xticks(range(len(new_data.index))[::40], new_data.index.values[::40], rotation=90)

plt.show()