# Notebook LSTM model

This notebook shows the implementation of an LSTM-RNN model for the forecasting of the price fluctuation of the meme coins: SHIBA, DOGE, and MONA.

In [8]:
# Import all modules needed
import math
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler 
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import cv2
from tensorflow.keras.layers import Dense, LSTM, Dropout

In [134]:
# Load the datasets
df_shib = pd.read_csv('C:/Users/deann/Documents/University/Thesis/Thesis_git/datasets/SHIB_data.csv')
df_doge = pd.read_csv('C:/Users/deann/Documents/University/Thesis/Thesis_git/datasets/DOGE_data.csv')
df_mona = pd.read_csv('C:/Users/deann/Documents/University/Thesis/Thesis_git/datasets/MONA_data.csv')

In [135]:
# Function for creating percentage change
def create_perc_change(data, steps):
    y_perc_change = []
    data_len = data.shape[0]
    for i in range(steps, data_len):
        if data[i-steps, 0] == 0:
            y_perc_change.append(0)
        else:
            perc_change = ((data[i, 0] - data[i-steps, 0])/data[i-steps, 0])
            y_perc_change.append(perc_change)
    y_perc_change = np.array(y_perc_change)
    return y_perc_change

In [136]:
# Function for baseline RMSE (calculated between the present day and and test day which is the present day but shifted one day in advance)
def baseline(df):
    y = create_perc_change(df, 1)
    y = np.reshape(y,(y.size, 1))
    training_data_len = math.ceil(len(df)* 0.85)
    baseline_y_test = y[training_data_len:]
    baseline_y_true = y[training_data_len-1:-1]
    baseline_rmse = np.sqrt(np.mean(baseline_y_test - baseline_y_true)**2)
    return baseline_rmse

In [144]:
def model(path, n_lookback, n_forecast, coin, com):
    # Load dataset
    df_pre = pd.read_csv(path)

    # Without compound
    if com == 'without':
        # Feature selection
        df = df_pre[['Close', 'Open', 'High', 'Low', 'Adj Close']].to_numpy()

        # Create percentage array
        y = create_perc_change(df, n_lookback)
        y = np.reshape(y,(y.size, 1))
        
        # Scale the data for normalization
        scaler = MinMaxScaler(feature_range=(-1,1))
        scaler2 = MinMaxScaler(feature_range=(0,1))

        # Remove the first n_lookback days 
        df = df[n_lookback:]
        df_scaled = scaler.fit_transform(df)

        # Length of training data
        training_data_len = math.ceil(len(df_scaled)* 0.85)

        # Create X and Y sets by looking n_lookback days in the past and n_forecast days in the future
        X, Y = [], []
        for i in range(n_lookback, len(y) - n_forecast + 1):
            X.append(df_scaled[i - n_lookback: i])
            Y.append(y[i: i + n_forecast])
        X, Y = np.array(X), np.array(Y)
        Y_reshaped = np.reshape(Y, (Y.shape[0], Y.shape[1]))
        Y_scaled = scaler2.fit_transform(Y_reshaped)

        # Create training and test sets
        X_train, y_train = X[:training_data_len], Y_scaled[:training_data_len]
        X_test, y_test = X[training_data_len:], Y_scaled[training_data_len:]
    
        # LSTM Model
        model = keras.Sequential()
        model.add(LSTM(128, return_sequences=True, input_shape=(X_train.shape[1],5)))
        model.add(Dropout(0.1))
        model.add(LSTM(64, return_sequences=True))
        model.add(Dropout(0.1))
        model.add(LSTM(64, return_sequences=True))
        model.add(Dropout(0.1))
        model.add(LSTM(128, return_sequences=False))
        model.add(Dropout(0.1))
        model.add(Dense(n_forecast))#change
        model.compile(optimizer='adam', loss='mean_squared_error')
        epochs = 30
        history = model.fit(X_train, y_train, batch_size= 16, epochs=epochs, validation_data=(X_test, y_test), verbose=0)

        # Plot training and validation loss
        '''
        loss_train = np.array(history.history['loss'])
        es = np.array([x for x in range(1, epochs+1)])
        loss_val = history.history['val_loss']
        plt.plot(es, loss_train, 'g', label='Training loss')
        plt.plot(es, loss_val, 'b', label='validation loss')
        plt.title(f'Training and Validation loss with {n_lookback} days Lookback and {n_forecast} days Forecast')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()
        '''

        # Predictions using the model
        preds = model.predict(X_test)

        # Unscale the data
        pred_unscaled = scaler2.inverse_transform(preds)
        y_test_unscaled = scaler2.inverse_transform(y_test)

        # Calculate the RMSE value using the predicted values and the test values
        rmse_without = np.sqrt(np.mean(pred_unscaled - y_test_unscaled)**2)

        print(f'RSME of {coin} without compound, lookback={n_lookback} and forecast={n_forecast} is {rmse_without}')
        return rmse_without

    # With compound
    if com == 'with':
        # Feature selection
        df = df_pre[['Close', 'Open', 'High', 'Low', 'Adj Close', 'com']].to_numpy()

        # Create percentage array
        y = create_perc_change(df, n_lookback)
        y = np.reshape(y,(y.size, 1))

        # Scale the data for normalization
        scaler = MinMaxScaler(feature_range=(-1,1))
        scaler2 = MinMaxScaler(feature_range=(0,1))

        # Remove the first n_lookback days 
        df = df[n_lookback:]
        df_scaled = scaler.fit_transform(df)

        # Length of training data
        training_data_len = math.ceil(len(df_scaled)* 0.85)

        # Create X and Y sets by looking n_lookback days in the past and n_forecast days in the future
        X = []
        Y = []
        for i in range(n_lookback, len(y) - n_forecast + 1):
            X.append(df_scaled[i - n_lookback: i])
            Y.append(y[i: i + n_forecast])
        X, Y = np.array(X), np.array(Y)
        Y_reshaped = np.reshape(Y, (Y.shape[0], Y.shape[1]))
        Y_scaled = scaler2.fit_transform(Y_reshaped)

        # Create training and test sets
        X_train, y_train = X[:training_data_len], Y_scaled[:training_data_len]
        X_test, y_test = X[training_data_len:], Y_scaled[training_data_len:]
    
        #LSTM Model
        print('Start lstm')
        model = keras.Sequential()
        model.add(LSTM(128, return_sequences=True, input_shape=(X_train.shape[1],6)))
        model.add(Dropout(0.1))
        model.add(LSTM(64, return_sequences=True))
        model.add(Dropout(0.1))
        model.add(LSTM(64, return_sequences=True))
        model.add(Dropout(0.1))
        model.add(LSTM(128, return_sequences=False))
        model.add(Dropout(0.1))
        model.add(Dense(n_forecast))#change
        model.compile(optimizer='adam', loss='mean_squared_error')
        epochs = 30
        history = model.fit(X_train, y_train, batch_size= 16, epochs=epochs, validation_data=(X_test, y_test), verbose=0)

        # Plot training and validation loss
        '''
        loss_train = np.array(history.history['loss'])
        es = np.array([x for x in range(1, epochs+1)])
        loss_val = history.history['val_loss']
        plt.plot(es, loss_train, 'g', label='Training loss')
        plt.plot(es, loss_val, 'b', label='validation loss')
        plt.title(f'Training and Validation loss with {n_lookback} days Lookback and {n_forecast} days Forecast')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend()
        plt.show()
        '''

        # Predictions using the model
        preds = model.predict(X_test)

        # Unscale the data
        pred_unscaled = scaler2.inverse_transform(preds)
        y_test_unscaled = scaler2.inverse_transform(y_test)

        # Calculate the RMSE value using the predicted values and the test values
        rmse_with = np.sqrt(np.mean(pred_unscaled - y_test_unscaled)**2)

        print(f'RSME of {coin} with compound, lookback={n_lookback} and forecast={n_forecast} is {rmse_with}')
        return rmse_with
    else:
        print('Type with or without')
    
    return 

### Run models for the coins with and without compound value

In [148]:
# Function for running the models
def run_models(path, name):
    n_days = [1, 7, 14, 30]
    df_rmse = pd.DataFrame({})
    for com in ['with', 'without']:
        for lookback in n_days:
            vals = []
            for forecast in n_days:
                rmse_value = model(path, lookback, forecast, name, com)
                vals.append(rmse_value)
            df_rmse[f'lookback {lookback}'] = np.array(vals)
        df_rmse.to_csv(f'{name}_{com}.csv')
    return df_rmse

In [149]:
# DOGE
doge = run_models('C:/Users/deann/Documents/University/Thesis/Thesis_git/datasets/DOGE_data.csv', 'DOGE')

# SHIB
shib = run_models('C:/Users/deann/Documents/University/Thesis/Thesis_git/datasets/SHIB_data.csv', 'SHIB')

# MONA
mona = run_models('C:/Users/deann/Documents/University/Thesis/Thesis_git/datasets/MONA_data.csv', 'MONA')

Start lstm
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RSME of DOGE with compound, lookback=1 and forecast=1 is 0.033531852118672476
Start lstm
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
RSME of DOGE with compound, lookback=1 and forecast=7 is 0.0005408629474084816
Start lstm
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/

In [25]:
def show_results():
    coins = ['DOGE', 'SHIB', 'MONA']
    com = ['with', 'without']
    for x in coins:
        for y in com:
            df = pd.read_csv(f'../datasets/{x}_{y}.csv')
            print('-----------------------------------------------------------------')
            print(x, y)
            print(df)
            
    return

In [27]:
# Results from personal tests
show_results()

-----------------------------------------------------------------
DOGE with
   Unnamed: 0  lookback 1  lookback 7  lookback 14  lookback 30
0           0    0.033532    0.035473     0.133565     0.553537
1           1    0.000541    0.010231     0.012066     0.526891
2           2    0.001370    0.187386     0.303103     0.200326
3           3    0.016191    0.275655     0.257948     0.582836
-----------------------------------------------------------------
DOGE without
   Unnamed: 0  lookback 1  lookback 7  lookback 14  lookback 30
0           0    0.019396    0.010266     0.174793     0.578921
1           1    0.002634    0.038693     0.904174     0.632449
2           2    0.006331    0.058777     0.029181     0.082988
3           3    0.008791    0.102983     0.147216     0.955297
-----------------------------------------------------------------
SHIB with
   Unnamed: 0  lookback 1  lookback 7  lookback 14  lookback 30
0           0    0.013127    0.417654     0.562075     0.032512
1