<a href="https://colab.research.google.com/github/efwoods/tensorflow-mastery/blob/main/Predicting_Bitcoin_Prices_Naive_Bayes_vs_N_Beats_modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Historical Bitcoin Price
!wget https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/BTC_USD_2013-10-01_2021-05-18-CoinDesk.csv

In [None]:
import pandas as pd

In [None]:

# read in bitcoin data and pass the dates
# Parse the date and tell pandas that column 1 is a datetime
df = pd.read_csv('/content/BTC_USD_2013-10-01_2021-05-18-CoinDesk.csv', parse_dates=["Date"], index_col=["Date"])

In [None]:
df.head()

In [None]:
df.info()

In [None]:
# How many samples do we have?
len(df)

In [None]:
df.tail()


In [None]:
# **NOTE** Seasonality is the number of samples per year.

In [None]:
bitcoin_price = pd.DataFrame(df["Closing Price (USD)"]).rename(columns={"Closing Price (USD)": "Price"})

In [None]:
bitcoin_price.head()

In [None]:
import matplotlib.pyplot as plt
bitcoin_price.plot(figsize=(10,7))
plt.ylabel("Price")
plt.xlabel("Date")
plt.legend(fontsize=14)

In [None]:
## Importing time series data with Python's CSV module

In [None]:
import csv
from datetime import datetime

timesteps = []
btc_price = []
with open("/content/BTC_USD_2013-10-01_2021-05-18-CoinDesk.csv", 'r') as f:
  csv_reader = csv.reader(f, delimiter=',')
  next(csv_reader) # This will skip the header
  for line in csv_reader:
    timesteps.append(datetime.strptime(line[1], "%Y-%m-%d")) # get the dates as dates
    btc_price.append(float(line[2])) # get the closing price as a float

In [None]:
timesteps[:10], btc_price[:10]

In [None]:
import numpy as np

plt.figure(figsize=(10,7))
plt.plot(timesteps, btc_price)
plt.ylabel("BTC Price")
plt.title("Price of bitcoin from 1 Oct 2013 to 18 May 2021", fontsize=16)
plt.xlabel("Date")

In [None]:
# Get bitcoin date array
timesteps = bitcoin_price.index.to_numpy()
prices = bitcoin_price["Price"].to_numpy()

In [None]:
timesteps[:10], prices[:10]

In [None]:
from sklearn.model_selection import train_test_split

## The incorrect way to split the data
X_train, X_test, y_train, y_test = train_test_split(timesteps,
                                                    prices,
                                                    test_size=0.2,
                                                    random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [None]:
plt.figure(figsize=(10,7))
plt.scatter(X_train, y_train, s=5, label="Train")
plt.scatter(X_test, y_test, s=5, label="Test data")
plt.xlabel("Date")
plt.ylabel("BTC Price")
plt.legend(fontsize=14)

In [None]:
## Create train & test sets for time series

In [None]:
split_size = int(0.8 * len(prices))

# Create test data splits (everything before the split)
X_train, y_train = timesteps[:split_size], prices[:split_size]

# Create test data splits (everything beyond the split)
X_test, y_test = timesteps[split_size:], prices[split_size:]

len(X_train), len(X_test), len(y_train), len(y_test)

In [None]:
# Plot the correctly made splits
plt.figure(figsize=(10,7))
plt.scatter(X_train, y_train, s=5, label="Train")
plt.scatter(X_test, y_test, s=5, label="Test")
plt.xlabel("Date")
plt.ylabel("BTC Price")
plt.legend(fontsize=14)

In [None]:
## Create a plotting function
def plot_split_time_series(timesteps, values, format=".", start=0, end=None, label=None):
  """
  Plot timesteps against values

  Parameters
  __________
  timesteps : array of timestep values
  values : array of values across time
  format : style of plot, default "."
  start : where to start the plot (setting a value will index from start of timesteps)
  end : where to end the plot (similar to start but for the end)
  label : label to show on plot about values, default None
  """
  # Plot the series
  plt.plot(timesteps[start:end], values[start:end], format, label=label)
  plt.xlabel("Time")
  plt.ylabel("BTC Price")
  if label:
    plt.legend(fontsize=14)
  plt.grid(True)

def plot_time_series(X_train, X_test, y_train, y_test, training_label="Train_data", testing_label="Test data"):
  plt.figure(figsize=(10,7))
  plot_split_time_series(timesteps=X_train, values=y_train, label=training_label)
  plot_split_time_series(timesteps=X_test, values=y_test, label=testing_label)


In [None]:
# Test out plotting function
plot_time_series(X_train, X_test, y_train, y_test)

In [None]:
# Hyperparameters:
# Horizon: how far into the future we are predicting
# Window size: number of steps used to predict the horizon

# Hyperparameters:
- **Horizon**: how far into the future we are predicting
- **Window size**: number of steps used to predict the horizon


## Model 0: Naive forecast (baseline)

### Formula

$$\hat{y}_{t} = y_{t-1}$$

For a horizon of 1, the prediction @ timestep t (y-hat) is equal to the value at timestep t-1 (previous timestep).

In [None]:
# Create a naive forecast
naive_forecast = y_test[:-1]
naive_forecast[:10], naive_forecast[-10:]

In [None]:
# Plot naive forecast
plt.figure(figsize=(10,7))
# plot_split_time_series(X_train, y_train, label="Train data")
plot_split_time_series(X_test, y_test, start=350,  format="-", label="Test data")
plot_split_time_series(X_test[1:], naive_forecast, start=350, format="-", label="Naive Forecast")

In [None]:
import tensorflow as tf

In [None]:
# MASE implementation
def mean_absolute_scaled_error(y_true, y_pred):
  """
  Implement mean absolute error (assuming no seasonality of the data).
  """
  mae = tf.reduce_mean(tf.abs(y_true-y_pred))

  # Find MAE of naive forecast (no seasonality)
  mae_naive_no_season = tf.reduce_mean(tf.abs(y_true[1:] - y_true[:-1]))

  return mae / mae_naive_no_season



In [None]:
mean_absolute_scaled_error(y_true=y_test[1:], y_pred=naive_forecast).numpy()

In [None]:
# evaluate predictions
# lower scores are better
def evaluate_preds(y_true, y_pred):
  # use float32 for calculations
  # numpy uses float64; convert to float32
  y_true = tf.cast(y_true, dtype=tf.float32)
  y_pred = tf.cast(y_pred, dtype=tf.float32)

  # Calculate the evaluation metrics
  mae = tf.keras.metrics.mean_absolute_error(y_true, y_pred) # Mean absolute error: on average, the prediction is off the forecast by this amount
  mse = tf.keras.metrics.mean_squared_error(y_true, y_pred)
  rmse = tf.sqrt(mse)
  mape = tf.keras.metrics.mean_absolute_percentage_error(y_true, y_pred)
  mase = mean_absolute_scaled_error(y_true, y_pred)

  return {"mae": mae.numpy(),
          "mse": mse.numpy(),
          "rmse": rmse.numpy(),
          "mape": mape.numpy(),
          "mase": mase.numpy()}


In [None]:
naive_results = evaluate_preds(y_true=y_test[1:],
                               y_pred=naive_forecast)
naive_results

In [None]:
## Windowing the time series dataset
# given a series (window) of data, predict the next price (horizon)

# global variables for windows and horizon size
HORIZON = 1 # pridict next 1 day
WINDOW_SIZE = 7 # use the past week of Bitcoin data to make the prediction





In [None]:
def get_labelled_windows(x, horizon=HORIZON):
  """
  Creates labels for windowed dataset.

  E.G. if horizon is 1:
  Input: [0,1,3,3,4,5,6,7] -> Output: ([0,1,2,3,4,5,6],[7])
  """
  return x[:, :-horizon], x[:, -horizon]


In [None]:
# Test out the window labelling function
test_window, test_label = get_labelled_windows(tf.expand_dims(tf.range(8)+1, axis=0))
print(f"Window: {tf.squeeze(test_window).numpy()} -> Label: {tf.squeeze(test_label).numpy()}")

In [None]:
tf.expand_dims(tf.range(8)+1, axis=0)

In [None]:
def make_windows(x, window_size=WINDOW_SIZE, horizon=HORIZON):
  """
  Turns a 1D array into a 2D array of sequential labelled windows of window_size with horizon size labels.
  """
  # 1. Create a window of specific window_size (add the horizon on the end for labelling later)
  window_step = np.expand_dims(np.arange(window_size+horizon), axis=0)

  # 2. Create a 2D array of multiple window steps (minus1 to account for 0 indexing)
  window_indexes = window_step + np.expand_dims(np.arange(len(x)-(window_size+horizon-1)), axis=0).T # create 2d array of windows of size window size)

  print(f"Window indexes: \n {window_indexes, window_indexes.shape}")

  # 3. Index on the target array (a time series) with 2D array of multiple window steps
  windowed_array = x[window_indexes]
  # print(windowed_array)

  # 4. Get labeled windows
  windows, labels = get_labelled_windows(windowed_array, horizon=horizon)
  return windows, labels


In [None]:
prices[:7]

In [None]:
full_windows, full_labels = make_windows(prices, window_size=WINDOW_SIZE, horizon=HORIZON)
len(full_windows), len(full_labels)

In [None]:
len(prices)


In [None]:
# View the first 3 windows/labels
for i in range(3):
  print(f"Window: {full_windows[i]} -> Label: {full_labels[i]}")

In [None]:
## Turning windows into training and test sets
full_windows[:5], full_labels[:5]

In [None]:

# make train test splits
def make_train_test_splits(windows, labels, test_split=0.2):
  """
  Splits matching pairs of windows and labels into train and test splits.
  """
  split_size = int(len(windows) * (1-test_split)) # this will default to the 80% 20% train / test split.
  train_windows = windows[:split_size]
  train_labels = labels[:split_size]
  test_windows = windows[split_size:]
  test_labels = labels[split_size:]
  return train_windows, test_windows, train_labels, test_labels

In [None]:
train_windows, test_windows, train_labels, test_labels = make_train_test_splits(full_windows, full_labels)
len(train_windows), len(test_windows), len(train_labels), len(test_labels)

In [None]:
train_windows[:5], test_labels[:5]

In [None]:
y_train

In [None]:
# Check to see if train labels are the same (before and after window split)
np.array_equal(np.squeeze(train_labels[:-HORIZON-1]), y_train[WINDOW_SIZE:])


In [None]:
# CREATE A MODELING CHECKPOINT

# Compare each of the best performance with another model's best performance
# i.e. if the model's best performance is on epoch 55, but we are training to 100 epochs
# then we will load and evaluate teh model saved on epoch 55.


# Create a modeling checkpoint using the ModelCheckpoint callback from the tensorlfow api

import os

# Create a function to implement a ModelCheckpoint with a specific filename
def create_model_checkpoint(model_name, save_path="model_experiments"):
  return tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(save_path, model_name), verbose=0, save_best_only=True)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers

In [None]:
# Hyperparameters are values you adjust
# Parameters values the model learns on its own

# set random seed for reproducible results
tf.random.set_seed(42)

# 1. Construct model
model_1 = tf.keras.Sequential([
    layers.Dense(128, activation="relu"),
    layers.Dense(HORIZON, activation='linear')
], name="model_1_dense")

# 2. Compile the model
model_1.compile(loss="mae",
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["mae","mse"])

# 3. Fit the model
model_1.fit(x=train_windows,
            y=train_labels,
            epochs=100,
            verbose=1,
            batch_size=128,
            validation_data=(test_windows, test_labels),
            callbacks=[create_model_checkpoint(model_name=model_1.name)])


In [None]:
# Evaluate model on test data
model_1.evaluate(test_windows, test_labels)

In [None]:
# load in the saved best performing model_1 and evaluate it on test data
model_1 = tf.keras.models.load_model("model_experiments/model_1_dense/")
model_1.evaluate(test_windows, test_labels)

In [None]:
naive_results

In [None]:
## making a forecast on the dataset
# 1. Take in a model
# 2. Take in input data
# 3. Passes the input data to the model's predict() method
# 4. Returns the predictions

def make_preds(model, input_data):
  """
  Uses model to make predictions input_data.
  """
  forecast = model.predict(input_data)
  return tf.squeeze(forecast) # return 1D array of predictions



In [None]:
# make predictions using model_1
model_1_preds = make_preds(model_1, test_windows)
len(model_1_preds), model_1_preds[:10]


In [None]:
# Evaluate preds
model_1_results = evaluate_preds(y_true=tf.squeeze(test_labels),
                                 y_pred=model_1_preds)
model_1_results

In [None]:
# plot the model 1 predictions
offset = 300
plt.figure(figsize=(10,7))
plot_split_time_series(timesteps=X_test[-len(test_windows):],
                       values=test_labels[:,],
                       start=offset,
                       label="Test Data")

plot_split_time_series(timesteps=X_test[-len(test_windows):],
                       values=model_1_preds,
                       start=offset,
                       format="-",
                       label="Model 1 Predictions")


In [None]:
HORIZON = 1
WINDOW_SIZE = 30

# Make window data with appropriate horizon and window sizes
full_windows, full_labels = make_windows(prices, window_size=WINDOW_SIZE, horizon=HORIZON)
len(full_windows), len(full_labels)


# Revisit [Lecture 319 - 330](https://www.udemy.com/course/tensorflow-developer-certificate-machine-learning-zero-to-mastery/learn/lecture/27423946#overview)



# NBeats [Algorithm](https://arxiv.org/pdf/1905.10437.pdf)
![figure 1 from N-BBEATS paper, the algorithm we're going to build](https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/images/10-figure-1-nbeats-paper-annotated.png)

In [None]:
HORIZON = 1
WINDOW_SIZE = 7

In [None]:
# Create NBeatsBlock custom layer
class NBeatsBlock(tf.keras.layers.Layer):
  def __init__(self, # the constructor takes all the hyperparameters for the layer
               input_size: int,
               theta_size: int,
               horizon: int,
               n_neurons: int,
               n_layers: int,
               **kwargs): # the **kwargs argument takes care of all of the arguments for the parent class (input_shape, trainable, name)
    super().__init__(**kwargs)
    self.input_size = input_size
    self.theta_size = theta_size
    self.horizon = horizon
    self.n_neurons = n_neurons
    self.n_layers = n_layers

    # Block contains stack of 4 fully connected layers each has ReLU activation
    self.hidden = [tf.keras.layers.Dense(n_neurons, activation="relu") for _ in range(n_layers)]

    # Output of block is a theta layer with linear activation
    self.theta_layer = tf.keras.layers.Dense(theta_size, activation="linear", name="theta")

  def call(self, inputs): # the call method is what runs when the layer is called
    x = inputs
    for layer in self.hidden: # pass inputs through each hidden layer
      x = layer(x)
    theta = self.theta_layer(x)
    # Output the backcast and forecast from theata

    backcast, forecast = theta[:, :self.input_size], theta[:, -self.horizon:]

    return backcast, forecast

In [None]:
# Set up dummy NBeatsBlock layer to represent inputs and outputs
tf.random.set_seed(42)
dummy_nbeats_block_layer = NBeatsBlock(input_size=WINDOW_SIZE,
                                       theta_size=WINDOW_SIZE+HORIZON,
                                       horizon=HORIZON,
                                       n_neurons=128,
                                       n_layers=4)


In [None]:
# Create dummy inputs (have to be same size as input_size)
dummy_inputs = tf.expand_dims(tf.range(WINDOW_SIZE) + 1, axis=0) # input shape to the model has to reflect Dense layer input requirements (ndim=2
dummy_inputs

In [None]:
# Pass dummy inputs to dummy NBeatsBlock layer
backcast, forecast = dummy_nbeats_block_layer(dummy_inputs)
# These are the activation outpus of the theta layer (they'll be random due to no training of the model)
print(f"Backcast: {tf.squeeze(backcast.numpy())}")
print(f"Forecast: {tf.squeeze(forecast.numpy())}")



In [None]:
## Preparing data for the NBeats Algorithm
## Using a performant data pipeline

bitcoin_price.head()

In [None]:
# Add windowed columns
bitcoin_price_nbeats = bitcoin_price.copy()
for i in range(WINDOW_SIZE):
  bitcoin_price_nbeats[f"Price+{i+1}"] = bitcoin_price_nbeats["Price"].shift(periods=i+1)
bitcoin_price_nbeats.head()

In [None]:
# Make features and labels
X = bitcoin_price_nbeats.dropna().drop("Price", axis=1)
y = bitcoin_price_nbeats.dropna()["Price"]

# Make train and test sets
split_size = int(len(X) * 0.8)
X_train, y_train = X[:split_size], y[:split_size]
X_test, y_test = X[split_size:], y[split_size:]
len(X_train), len(y_train), len(X_test), len(y_test)


In [None]:
# Time to make our dataset performant using tf.data API
train_features_dataset = tf.data.Dataset.from_tensor_slices(X_train)
train_labels_dataset = tf.data.Dataset.from_tensor_slices(y_train)

test_features_dataset = tf.data.Dataset.from_tensor_slices(X_test)
test_labels_dataset = tf.data.Dataset.from_tensor_slices(y_test)

# combine labels and features by zipping together -> (features, labels)
train_dataset = tf.data.Dataset.zip((train_features_dataset, train_labels_dataset))
test_dataset = tf.data.Dataset.zip((test_features_dataset, test_labels_dataset))

# Batch and prefetch
BATCH_SIZE = 1024
train_dataset = train_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

train_dataset, test_dataset

In [None]:
# Hyperparameters for the NBeats algorithm
N_EPOCHS = 5000
N_NEURONS = 512
N_LAYERS = 4
N_STACKS = 30

INPUT_SIZE = WINDOW_SIZE * HORIZON
THETA_SIZE = INPUT_SIZE + HORIZON

INPUT_SIZE, THETA_SIZE


In [None]:
# Make tensors
tensor_1 = tf.range(10) + 10
tensor_2 = tf.range(10)

# Subtract
subtracted = layers.subtract([tensor_1, tensor_2])

# Add
added = layers.add([tensor_1, tensor_2])

# Get outputs
print(f"Input tensors: {tensor_1.numpy()} & {tensor_2.numpy()}")
print(f"Subtracted: {subtracted.numpy()}")
print(f"Added: {added.numpy()}")

In [None]:
### Building, compiling, and fitting the N-BEATS algorithm
%%time
tf.random.set_seed(42)

#1. setup an instance of the NBeatsBlock
nbeats_block_layer = NBeatsBlock(input_size=INPUT_SIZE,
                                 theta_size=THETA_SIZE,
                                 horizon=HORIZON,
                                 n_neurons=N_NEURONS,
                                 n_layers=N_LAYERS,
                                 name="InitialBlock")

# 2. Create input to stack
stack_input = layers.Input(shape=(INPUT_SIZE), name="stack_input")

# 3. Create initial backcast and forecast input (backwards prediction + horizon prediction)
residuals, forecast = nbeats_block_layer(stack_input)

# 4. Create stacks of block layers
for i, _ in enumerate(range(N_STACKS-1)): # first stack is already created in step 3 above

  # 5. Use the NBeatsBlock to calculate the backcast as well as the forecast
  backcast, block_forecast = NBeatsBlock(
    input_size=INPUT_SIZE,
    theta_size=THETA_SIZE,
    horizon=HORIZON,
    n_neurons=N_NEURONS,
    n_layers=N_LAYERS,
    name=f"NBeatsBlock_{i}"
  )(residuals) # pass in the residuals

# 6. Create the double residual stacking
residuals = layers.subtract([residuals, backcast], name=f"subtract_{i}")
forecast = layers.add([forecast, block_forecast], name=f"add_{i}")

# 7. Put the stack model together
model_7 = tf.keras.Model(inputs=stack_input, outputs=forecast, name="model_7_NBEATS")

# 8. Compile model with MAE loss
model_7.compile(loss="mae",optimizer=tf.keras.optimizers.Adam())

# 9. Fit the model with EarlyStopping and ReduceLROnPlateau callbacks
model_7.fit(train_dataset,
            epochs=N_EPOCHS,
            validation_data=test_dataset,
            verbose=0,
            callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss",patience=200,restore_best_weights=True),tf.keras.callbacks.ReduceLROnPlateau(monitor="val_loss",patience=100,verbose=1)])

In [None]:
# Evaluate N-Beatsl Model on the test dataset
model_7.evaluate(test_dataset)

# Make predictions with N-BEATS model
model_7_preds = make_preds(model_7, test_dataset)
model_7_preds[:10]

# Evaluate N-BEATS model preds
model_7_results = evaluate_preds(y_true=y_test,y_pred=model_7_preds)
model_7_results

In [None]:
model_1_results

In [None]:
naive_results

In [None]:
# Plotting the N-BEATS architecture
from tensorflow.keras.utils import plot_model
plot_model(model_7)