In [17]:
# **************************************************************************** #
#        _____                                      ______________  __         #
#       / ___/______________ _____  ____  __  __   / ____/_  __/ / / /         #
#       \__ \/ ___/ ___/ __ `/ __ \/ __ \/ / / /  / __/   / / / /_/ /          #
#      ___/ / /__/ /  / /_/ / /_/ / /_/ / /_/ /  / /___  / / / __  /           #
#     /____/\___/_/   \__,_/ .___/ .___/\__, /  /_____/ /_/ /_/ /_/            #
#         ____            /_/ __/_/    /____/                                  #
#        / __ \________  ____/ (_)____/ /_____  _____                          #
#       / /_/ / ___/ _ \/ __  / / ___/ __/ __ \/ ___/                          #
#      / ____/ /  /  __/ /_/ / / /__/ /_/ /_/ / /                              #
#     /_/   /_/   \___/\__,_/_/\___/\__/\____/_/                               #
#                                                                              #
# **************************************************************************** #
#                                                                              #
#                  JAMES REVELLO's WGU CAPSTONE PROJECT                        #
#                         STUDENT ID: 010649181                                #
#                                                                              #
# **************************************************************************** #
#  To use this Notebook, please do the following:                              #
#   1. Press the Play Button, top left of each cell. Checkmark = success.      #
#   2. Click interactive buttons and select fields as needed.                  #
#   3. Click the "Install Dependencies" button if first use                    #
#   4. If "eth_data.csv" is not in current instance:                           #
#       - Click Folder icon in left toolbar.                                   #
#       - Click the Upload Icon and select "eth_data.csv" in browser.          #
# **************************************************************************** #

In [18]:
# **************************************************************************** #
# IMPORT PACKAGES                                                              #
# **************************************************************************** #
import math
import datetime
import tensorflow as tf
import io
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets.widgets import Label, FloatProgress, FloatSlider, Button
from ipywidgets.widgets import Layout, HBox, VBox, Output
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from IPython.display import display, clear_output
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input

# Reads the CSV Kaggle File
# (Downloaded from https://www.kaggle.com/datasets/varpit94/ethereum-data)
# Thank you varpit94 for hosting it on Kaggle!

df = pd.read_csv('eth_data.csv')

In [8]:
# **************************************************************************** #
# INSTALL DEPENDENCIES                                                         #
# **************************************************************************** #

install_button = widgets.Button(
    description="Install Dependencies",
    button_style="info"
)

def install_packages(b):
    with output:
        clear_output()
        print("Installing packages...")
        !pip install ipywidgets
        !pip install tensorflow
        !pip install ipython
        !pip install ipywidgets
        !pip install jupyterthemes
        print("Installation complete.")

install_button.on_click(install_packages)

output = widgets.Output()
display(install_button, output)

Button(button_style='info', description='Install Dependencies', style=ButtonStyle())

Output()

In [9]:
# **************************************************************************** #
# SELECT DATE RANGE                                                            #
# **************************************************************************** #

# Converting Date to datetime to resolve an error
df['Date'] = pd.to_datetime(df['Date'])

# Extract the minimum and maximum dates from the dataframe
#     Will be useful in future should we use a csv with different date ranges
# We're using %Y-%m-%d to play nice with our dataset
min_date = df['Date'].min().strftime('%Y-%m-%d')
max_date = df['Date'].max().strftime('%Y-%m-%d')

# A start date for the button that's 60 days later
#   (since training needs 2 months)
adjusted_start_date = df['Date'].min()

# End date adjusted to 7 days before the max date just in case
adjusted_end_date = df['Date'].max() - pd.Timedelta(days=7)
adjusted_date_range = pd.date_range(start=adjusted_start_date, end=adjusted_end_date)

# Date range slider instead of button or dropdown to prevent issues
#     with updating
date_range_slider = widgets.SelectionRangeSlider(
    options=[date.strftime('%Y-%m-%d') for date in adjusted_date_range],
    index=(0, len(adjusted_date_range) - 1),
    description='Date Range',
    orientation='horizontal',
    layout={'width': '800px'}
)

output = widgets.Output()

def on_date_change(change):
    with output:
        clear_output()
        start_date, end_date = change['new']
        start_date_dt = datetime.datetime.strptime(start_date, '%Y-%m-%d')
        end_date_dt = datetime.datetime.strptime(end_date, '%Y-%m-%d')
        # Enforce the 60-day rule
        if (end_date_dt - start_date_dt).days < 60:
            print("Selected date range must be at least 60 days apart.")
        elif end_date_dt > adjusted_end_date:
            print(f"End date must be before {adjusted_end_date.strftime('%Y-%m-%d')}")
        else:
            print(f"Selected Start Date: {start_date}")
            print(f"Selected End Date: {end_date}")

date_range_slider.observe(on_date_change, names='value')

display(date_range_slider, output)

on_date_change({'new': date_range_slider.value})

filter_button = widgets.Button(
    description="Filter DataFrame",
    button_style="success"
)

filtered_output = widgets.Output()

# Holds the user's dataframe based on selected dates
filtered_df = None

def filter_dataframe(b):
    global filtered_df
    with filtered_output:
        clear_output()
        try:
            start_date, end_date = date_range_slider.value
            start_date_dt = datetime.datetime.strptime(start_date, '%Y-%m-%d')
            end_date_dt = datetime.datetime.strptime(end_date, '%Y-%m-%d')
            # Check the 60-day rule and the end date constraint before filtering
            if (end_date_dt - start_date_dt).days >= 60 and end_date_dt <= adjusted_end_date:
                # Filter the dataframe based on the selected date range
                filtered_df = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
                print("Filtered DataFrame:")
                display(filtered_df.head())  # Display first few rows of the filtered dataframe
                display(filtered_df.tail())  # And the last few so you know your range
            else:
                if (end_date_dt - start_date_dt).days < 60:
                    print("Selected date range must be at least 60 days apart.")
                if end_date_dt > adjusted_end_date:
                    print(f"End date must be before {adjusted_end_date.strftime('%Y-%m-%d')}")
        except Exception as e:
            print(f"Error filtering DataFrame: {e}")

filter_button.on_click(filter_dataframe)

display(filter_button, filtered_output)


SelectionRangeSlider(description='Date Range', index=(0, 1590), layout=Layout(width='800px'), options=('2017-1…

Output()

Button(button_style='success', description='Filter DataFrame', style=ButtonStyle())

Output()

In [10]:
# **************************************************************************** #
# ANALYZE DATA                                                                 #
# **************************************************************************** #

output_area = Output()
histogram_output = Output()
boxplot_output = Output()
linegraph_output = Output()

# Redundant, but we need to clear all cells so we don't have
# the screen crowded with each button press
def clear_all():
    with output_area:
        clear_output()
    with histogram_output:
        clear_output()
    with boxplot_output:
        clear_output()
    with linegraph_output:
        clear_output()


def display_head(change):
    with output_area:
        clear_all()
        display(filtered_df.head())


def display_describe(change):
    with output_area:
        clear_all()
        display(filtered_df.describe())


def display_histograms(change):
    with histogram_output:
        clear_all()
        distro_plot = ['Open', 'High', 'Low', 'Close', 'Volume']
        plt.figure(figsize=(12, 6))
        for i, col in enumerate(distro_plot):
            plt.subplot(2, 3, i + 1)
            sb.histplot(filtered_df[col])
        plt.tight_layout()
        plt.show()


def display_boxplots(change):
    with boxplot_output:
        clear_all()
        distro_plot = ['Open', 'High', 'Low', 'Close', 'Volume']
        plt.figure(figsize=(12, 6))
        for i, col in enumerate(distro_plot):
            plt.subplot(2, 3, i + 1)
            sb.boxplot(x=filtered_df[col])
        plt.tight_layout()
        plt.show()


def display_linegraph(change):
    with linegraph_output:
        clear_all()
        plt.figure(figsize=(20, 6))
        plt.plot(filtered_df["Date"], filtered_df["Close"])
        plt.title("Plot of Closing Prices over Time")
        plt.xlabel("Time")
        plt.ylabel("Closing Price")
        plt.show()


button_head = widgets.Button(description="Head")
button_describe = widgets.Button(description="Describe")
button_histograms = widgets.Button(description="Histograms")
button_boxplots = widgets.Button(description="Boxplots")
button_linegraph = widgets.Button(description="Linegraph")


button_head.on_click(display_head)
button_describe.on_click(display_describe)
button_histograms.on_click(display_histograms)
button_boxplots.on_click(display_boxplots)
button_linegraph.on_click(display_linegraph)


display(VBox([HBox([button_head, button_describe, button_histograms, button_boxplots, button_linegraph]),
              output_area, histogram_output, boxplot_output, linegraph_output]))

VBox(children=(HBox(children=(Button(description='Head', style=ButtonStyle()), Button(description='Describe', …

In [11]:
# **************************************************************************** #
# ADD FEATURES                                                                 #
# **************************************************************************** #


# Cell to parse 'Date' and add 'Year', 'Month', 'Day' columns to filtered_df
def add_features(df):
    df = df.copy()  # Avoid modifying the original DataFrame
    df.loc[:, 'Date'] = pd.to_datetime(df['Date'])
    df.loc[:, 'Year'] = df['Date'].dt.year
    df.loc[:, 'Month'] = df['Date'].dt.month
    df.loc[:, 'Day'] = df['Date'].dt.day
    if 'Adj Close' in df.columns:
        df = df.drop(columns=['Adj Close'])
    return df

output = widgets.Output()

def on_button_click(b):
    global filtered_df
    with output:
        output.clear_output()
        filtered_df = add_features(filtered_df)
        display(filtered_df.head())

button = widgets.Button(description="Add Features", button_style='info')
button.on_click(on_button_click)
display(button, output)


Button(button_style='info', description='Add Features', style=ButtonStyle())

Output()

In [12]:
# **************************************************************************** #
# TRAIN AND PLOT MODEL                                                         #
# **************************************************************************** #

# Function to add features (if previous cell not run)
def add_features(df):
    df = df.copy()  # Avoid modifying the original DataFrame
    df.loc[:, 'Date'] = pd.to_datetime(df['Date'])
    df.loc[:, 'Year'] = df['Date'].dt.year
    df.loc[:, 'Month'] = df['Date'].dt.month
    df.loc[:, 'Day'] = df['Date'].dt.day
    if 'Adj Close' in df.columns:
        df = df.drop(columns=['Adj Close'])
    return df


def train_lstm(df):
    global test_data, predicted_price, train_data
    df = df.copy()

    # Moving Average
    df.loc[:, 'MA_10'] = df['Close'].rolling(window=10).mean()

    # Drop null rows
    df = df.dropna()

    # Scale/normalize all values to between 0 and 1 since LSTM is essentially a linear regression based model.
    feature_scaler = MinMaxScaler(feature_range=(0, 1))
    target_scaler = MinMaxScaler(feature_range=(0, 1))
    feature_columns = ['Close', 'MA_10']
    df[feature_columns] = feature_scaler.fit_transform(df[feature_columns])
    df['Close'] = target_scaler.fit_transform(df[['Close']])

    # Split the data into training and testing sets, reserving the last two years for testing (2022).
    train_data = df[df['Date'] < '2021-06-01']
    test_data = df[df['Date'] >= '2021-06-01']

    # Prepare the training data
    x_train, y_train = [], []
    for i in range(60, len(train_data)):
        x_train.append(train_data[feature_columns].iloc[i-60:i].values)
        y_train.append(train_data['Close'].iloc[i])

    x_train, y_train = np.array(x_train), np.array(y_train)

    # Reshape the training data to 3D as required by LSTM
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], len(feature_columns)))

    # Prepare the testing data
    x_test, y_test = [], []
    for i in range(60, len(test_data)):
        x_test.append(test_data[feature_columns].iloc[i-60:i].values)
        y_test.append(test_data['Close'].iloc[i])

    x_test, y_test = np.array(x_test), np.array(y_test)
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], len(feature_columns)))

    # Initialize the LSTM model
    model = Sequential()

    # Input Layer
    input_shape = (x_train.shape[1], x_train.shape[2])
    model.add(Input(shape=input_shape))

    # First layer
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))

    # Second
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))

    # Third
    model.add(LSTM(units=50, return_sequences=True))
    model.add(Dropout(0.2))

    # Fourth
    model.add(LSTM(units=50))
    model.add(Dropout(0.2))

    # Dense layer / Output
    model.add(Dense(units=1))

    # Compile the model. Adam model for efficiency
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Fit the model to the training data. Ten rounds so it doesn't take forever
    model.fit(x_train, y_train, epochs=10, batch_size=32)

    # Predict the test data
    predicted_price = model.predict(x_test)
    predicted_price = target_scaler.inverse_transform(predicted_price)
    predicted_price = predicted_price.flatten()  # Ensure y_pred is 1D

    # Save the trained model in Keras format for later use
    model.save('trained_lstm_model.keras')

    return model, predicted_price, train_data, test_data

def plot_results(train_data, test_data, predicted_price):
    plt.figure(figsize=(10,5), dpi=100)
    plt.plot(train_data['Date'], train_data['Close'], label='Training data')
    plt.plot(test_data['Date'], test_data['Close'], color='blue', label='Actual Price')
    plt.plot(test_data.iloc[60:]['Date'], predicted_price, color='orange', label='Predicted Price')

    plt.title('Ethereum Closing Price Prediction')
    plt.xlabel('Time')
    plt.ylabel('Closing Price')
    plt.legend(loc='upper left', fontsize=8)
    plt.show()

output = widgets.Output()

def on_train_and_plot_button_click(b):
    global filtered_df
    with output:
        output.clear_output()
        model, predicted_price, train_data, test_data = train_lstm(df)
        plot_results(train_data, test_data, predicted_price)

train_and_plot_button = widgets.Button(description="Train LSTM and Plot", button_style='info')
train_and_plot_button.on_click(on_train_and_plot_button_click)

display(train_and_plot_button, output)


Button(button_style='info', description='Train LSTM and Plot', style=ButtonStyle())

Output()

In [15]:
# **************************************************************************** #
# Evaluate Model                                                               #
# **************************************************************************** #

def evaluate_model(test_data, predicted_price):
    y_true = test_data.iloc[60:]['Close'].values
    y_pred = predicted_price

    # Calculate performance metrics
    mse = mean_squared_error(y_true, y_pred)
    mae = mean_absolute_error(y_true, y_pred)
    rmse = math.sqrt(mse)
    mape = np.mean(np.abs(y_pred - y_true) / np.abs(y_true))

    print(f'MSE: {mse}')
    print(f'MAE: {mae}')
    print(f'RMSE: {rmse}')
    print(f'MAPE: {mape}')

    # Generate heatmap
    errors = pd.DataFrame({'True': y_true, 'Predicted': y_pred, 'Error': y_pred - y_true})
    plt.figure(figsize=(10, 6))
    sb.heatmap(errors.corr(), annot=True, cmap='coolwarm', vmin=-1, vmax=1)
    plt.title('Heatmap of Prediction Errors')
    plt.show()

output_metrics = widgets.Output()

def on_evaluate_button_click(b):
    global test_data, predicted_price
    with output_metrics:
        output_metrics.clear_output()
        evaluate_model(test_data, predicted_price)

evaluate_button = widgets.Button(description="Evaluate Model", button_style='success')
evaluate_button.on_click(on_evaluate_button_click)

display(evaluate_button, output_metrics)


Button(button_style='success', description='Evaluate Model', style=ButtonStyle())

Output()

In [19]:
# **************************************************************************** #
# PREDICT NEXT WEEK'S CLOSE                                                    #
# **************************************************************************** #

# Load the trained model
model = load_model('trained_lstm_model.keras')

def predict_next_week(model, df):
    # Make a copy of the DataFrame to avoid SettingWithCopyWarning
    df = df.copy()

    # Calculate moving average
    df['MA_10'] = df['Close'].rolling(window=10).mean()

    # Drop null rows like before
    df.dropna(inplace=True)

    # Scale/normalize all values to between 0 and 1 since LSTM is essentially a linear regression based model.
    feature_scaler = MinMaxScaler(feature_range=(0, 1))
    feature_columns = ['Close', 'MA_10']
    df[feature_columns] = feature_scaler.fit_transform(df[feature_columns])

    # Prepare the input data for prediction
    last_60_days = df[feature_columns].iloc[-60:].values
    last_60_days_scaled = np.reshape(last_60_days, (1, last_60_days.shape[0], last_60_days.shape[1]))

    # Predict the next 7 days
    predictions = []
    for _ in range(7):
        pred = model.predict(last_60_days_scaled)
        predictions.append(pred[0, 0])
        last_60_days_scaled = np.append(last_60_days_scaled[:, 1:, :], [[[pred[0, 0], np.mean(last_60_days_scaled[0, :, 1])]]], axis=1)

    # Inverse transform the predictions to get actual values
    predictions = np.array(predictions).reshape(-1, 1)
    predictions = feature_scaler.inverse_transform(np.hstack((predictions, np.zeros_like(predictions))))[:, 0]

    return predictions

output_predictions = widgets.Output()

def on_predict_button_click(b):
    global filtered_df, model
    with output_predictions:
        output_predictions.clear_output()
        predictions = predict_next_week(model, filtered_df)
        print("Predicted next week's closing prices:", predictions)
        plt.figure(figsize=(10,5), dpi=100)
        plt.plot(range(1, 8), predictions, marker='o', linestyle='-', color='blue')
        plt.title("Predicted Next Week's Closing Prices")
        plt.xlabel("Day")
        plt.ylabel("Closing Price")
        plt.show()

predict_button = widgets.Button(description="Predict Next Week", button_style='primary')
predict_button.on_click(on_predict_button_click)

display(predict_button, output_predictions)


Button(button_style='primary', description='Predict Next Week', style=ButtonStyle())

Output()

In [20]:

# Output widget to display the evaluation
output_evaluation = widgets.Output()

# Function to predict closing prices for filtered_df
def predict_for_filtered_df(model, df):
    df = df.copy()
    df['MA_10'] = df['Close'].rolling(window=10).mean()
    df.dropna(inplace=True)

    feature_scaler = MinMaxScaler(feature_range=(0, 1))
    feature_columns = ['Close', 'MA_10']
    df[feature_columns] = feature_scaler.fit_transform(df[feature_columns])

    x_test = []
    for i in range(60, len(df)):
        x_test.append(df[feature_columns].iloc[i-60:i].values)

    x_test = np.array(x_test)
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], len(feature_columns)))

    predictions = model.predict(x_test)
    predictions = feature_scaler.inverse_transform(np.hstack((predictions, np.zeros_like(predictions))))[:, 0]

    return predictions

# Function to handle button click for evaluation
def on_evaluate_button_click(b):
    global df, filtered_df, model
    with output_evaluation:
        output_evaluation.clear_output()

        # Predict the closing prices for filtered_df
        predictions = predict_for_filtered_df(model, filtered_df)

        # Get the actual values from filtered_df starting from the 60th index
        actual_values = filtered_df['Close'].iloc[60:].values

        # Ensure the lengths of actual_values and predictions match
        if len(actual_values) > len(predictions):
            actual_values = actual_values[:len(predictions)]
        elif len(predictions) > len(actual_values):
            predictions = predictions[:len(actual_values)]

        # Calculate performance metrics
        mse = mean_squared_error(actual_values, predictions)
        mae = mean_absolute_error(actual_values, predictions)
        rmse = math.sqrt(mse)
        mape = np.mean(np.abs(predictions - actual_values) / np.abs(actual_values))

        # Print performance metrics
        print(f'MSE: {mse}')
        print(f'MAE: {mae}')
        print(f'RMSE: {rmse}')
        print(f'MAPE: {mape}')

        # Generate heatmap
        errors = pd.DataFrame({'True': actual_values, 'Predicted': predictions, 'Error': predictions - actual_values})
        plt.figure(figsize=(10, 6))
        sb.heatmap(errors.corr(), annot=True, cmap='coolwarm', vmin=-1, vmax=1)
        plt.title('Heatmap of Prediction Errors')
        plt.show()

# Create a button widget for evaluation
evaluate_button = widgets.Button(description="Evaluate Predictions", button_style='success')
evaluate_button.on_click(on_evaluate_button_click)

# Display the button and output
display(evaluate_button, output_evaluation)


Button(button_style='success', description='Evaluate Predictions', style=ButtonStyle())

Output()

In [None]:
from google.colab import drive
drive.mount('/content/drive')