# LSTM RNN Stock Predictor Using Fear and Greed Index

In [147]:
import numpy as np
import pandas as pd
import plotly.express as px

In [148]:
# Set the random seed for reproducibility
from numpy.random import seed
seed(1)
from tensorflow import random
random.set_seed(2)

## Data Preparation

In [149]:
# Load the fear and greed sentiment data for Bitcoin
df = pd.read_csv(
    './data/btc_sentiment.csv', 
    index_col='date', 
    infer_datetime_format=True, 
    parse_dates=True,
)

df = df.drop(columns='fng_classification')
df.head()

Unnamed: 0_level_0,fng_value
date,Unnamed: 1_level_1
2019-07-29,19
2019-07-28,16
2019-07-27,47
2019-07-26,24
2019-07-25,42


In [150]:
# Load the historical closing prices for Bitcoin
df2 = pd.read_csv(
    './data/btc_historic.csv', 
    index_col='Date', 
    infer_datetime_format=True, 
    parse_dates=True,
)['Close'].to_frame()

df2 = df2.sort_index()

df2.tail()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2019-07-25,9882.429688
2019-07-26,9847.450195
2019-07-27,9478.320313
2019-07-28,9531.769531
2019-07-29,9529.889648


In [151]:
# Join the data into a single DataFrame
df = df.join(df2, how="inner")

df.tail()

Unnamed: 0,fng_value,Close
2019-07-25,42,9882.429688
2019-07-26,24,9847.450195
2019-07-27,47,9478.320313
2019-07-28,16,9531.769531
2019-07-29,19,9529.889648


In [152]:
df.head()

Unnamed: 0,fng_value,Close
2018-02-01,30,9114.719727
2018-02-02,15,8870.820313
2018-02-03,40,9251.269531
2018-02-04,24,8218.049805
2018-02-05,11,6937.080078


In [153]:
# This function accepts the column number for the features (X) and the target (y)
# It chunks the data up with a rolling window of Xt-n to predict Xt
# It returns a numpy array of X and y
def window_data(df, window, feature_col_number, target_col_number):
    X = []
    y = []
    for i in range(len(df) - window - 1):
        features = df.iloc[i:(i + window), feature_col_number]
        target = df.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [200]:
# Predict Closing Prices using a 10 day window of previous fng values
# Then, experiment with window sizes anywhere from 1 to 10 and see how the model performance changes
window_size = 1

# Column index 0 is the 'fng_value' column
# Column index 1 is the `Close` column
feature_column = 0
target_column = 1
X, y = window_data(df, window_size, feature_column, target_column)

In [201]:
# Use 70% of the data for training and the remainder for testing
# To avoid the dataset being randomized, we will manually split the data using array slicing.
split = int(0.7 * len(X))
X_train = X[: split]
X_test = X[split:]
y_train = y[: split]
y_test = y[split:]

In [202]:
from sklearn.preprocessing import MinMaxScaler

# Create a MinMaxScaler object
scaler = MinMaxScaler()

# Fit the MinMaxScaler object with the training feature data X_train
scaler.fit(X_train)

# Scale the features training and testing sets
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Fit the MinMaxScaler object with the training target data y_train
scaler.fit(y_train)

# Scale the target training and testing sets
y_train = scaler.transform(y_train)
y_test = scaler.transform(y_test)

In [203]:
# Reshape the features for the model
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

---

## Build and Train the LSTM RNN

Train the model with the training data.

In [204]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [205]:
# Build the LSTM model
num_units = 11
dropout_fraction = 0.2

model = Sequential([
    
    LSTM(
        units=num_units,
        return_sequences=True,
        input_shape=(X_train.shape[1], 1),
    ),
    Dropout(dropout_fraction),

    # Layer 2
    LSTM(units=num_units),
    Dropout(dropout_fraction),

    # Output layer
    Dense(1),
])

In [206]:
# Compile the model
model.compile(optimizer="adam", loss="mean_squared_error")

In [207]:
# Summarize the model
model.summary()

Model: "sequential_13"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_27 (LSTM)              (None, 1, 11)             572       
                                                                 
 dropout_27 (Dropout)        (None, 1, 11)             0         
                                                                 
 lstm_28 (LSTM)              (None, 11)                1012      
                                                                 
 dropout_28 (Dropout)        (None, 11)                0         
                                                                 
 dense_13 (Dense)            (None, 1)                 12        
                                                                 
Total params: 1,596
Trainable params: 1,596
Non-trainable params: 0
_________________________________________________________________


In [208]:
# Train the model
# Use at least 10 epochs
# Do not shuffle the data
model.fit(X_train, y_train, epochs=12, shuffle=False, batch_size=1, verbose=1)

Epoch 1/12
Epoch 2/12
Epoch 3/12
Epoch 4/12
Epoch 5/12
Epoch 6/12
Epoch 7/12
Epoch 8/12
Epoch 9/12
Epoch 10/12
Epoch 11/12
Epoch 12/12


<keras.callbacks.History at 0x7fe5a1a67040>

---

## Model Performance

Evaluate the model using the test data. 

In [209]:
# Evaluate the model
model.evaluate(X_test, y_test)



0.1257622390985489

In [210]:
# Make some predictions
predicted = model.predict(X_test)



In [211]:
# Recover the original prices instead of the scaled version
scaler.fit(y[split: ])

predicted_prices = scaler.inverse_transform(predicted)
real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

In [212]:
# Create a DataFrame of Real and Predicted values
stocks = pd.DataFrame({
    "Real": real_prices.ravel(),
    "Predicted": predicted_prices.ravel()
}, index = df.index[-len(real_prices): ]) 

stocks.tail()

Unnamed: 0,Real,Predicted
2019-07-25,10977.770978,5562.270996
2019-07-26,11101.000112,4757.622559
2019-07-27,11061.916867,5646.029297
2019-07-28,10649.481115,4912.111816
2019-07-29,10709.20093,5856.609375


In [213]:
# Plot the real vs predicted values as a line chart
px.line(
    stocks, 
    width=1000, 
    height=600, 
    title="BTC Price Prediction Based on Crypto Fear & Greed Index History",
    labels=dict(
        index='Date', 
        value='BTC Price ($)', 
        variable='BTC Price ($)'
    )
)