Import the excel file into a workable dataframe

In [None]:
import pandas as pd

df = pd.read_excel('FRED.xlsx')
df.dropna(subset=['DGS10'], inplace=True)  # Remove rows where DGS10 is null
df.rename(columns={'observation_date': 'Date'}, inplace=True)
display(df.head())
print(df.shape)

Plot the data to see what we are dealing with--visualize the data

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 5))
plt.plot(df['Date'], df['DGS10'])
plt.title('FRED Data Over Time')
plt.xlabel('Date')
plt.ylabel('DGS10')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Model for one step ahead prediction, which uses a sliding window and trains off of test data periodically.
- this is not exactly what we want for long term future predictions. 
- This is to predict one value ahead

In [None]:
import numpy as np
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler  

def predict_and_evaluate_column_one_step_ahead(column_name):

    window = 60 #This sets the number of months the model looks at at a time
    training_data_percent = .8 #This is the % of data that we show to the model to train

    industry = df.filter([column_name]) #Select the industry/niche we want to look at
    dataset = industry.values #^
    training_data_len = int(np.ceil( len(dataset) * training_data_percent )) #Show the model the data

    scaler = StandardScaler() #transform the data into a format the model can work with
    scaled_data = scaler.fit_transform(dataset) #normalize that data (Models work better with data -1:1)

    training_data = scaled_data[:training_data_len] #Take only the scaled data into account

    x_train, y_train = [], [] #create bins for the model to fill
    for i in range(window, len(training_data)):
        x_train.append(training_data[i-window:i, 0])
        y_train.append(training_data[i, 0])
    #^ this is a loop that trains the model based on window size

    x_train, y_train = np.array(x_train), np.array(y_train) #turn the data back into maliable arrays
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) #Account for discrepencies in data

    early_stop = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True) #Stop training if the model stops improving

    model = keras.models.Sequential() #create the model itself and set it as a sequential framework.
        #^ Imported from TensorFlow's Keras library--open source
    model.add(keras.layers.LSTM(64, return_sequences=True, input_shape=(x_train.shape[1], 1)))
        #^ Build a hidden layer, this is what sets it as a RNN
    model.add(keras.layers.LSTM(64, return_sequences=False))
        #^ Same thing, adding detail
    model.add(keras.layers.Dense(128, activation='relu'))
        #^ Memory
    model.add(keras.layers.Dropout(.5))
        #^ Forget Layer
    model.add(keras.layers.Dense(1))
        #^ Another Dense Layer
    model.compile(optimizer='adam',
                  loss='mae',
                  metrics=[keras.metrics.RootMeanSquaredError()])
        #^ Put them all together and analyze loss

    model.fit(x_train, y_train, batch_size=32, epochs=20, verbose=1, callbacks=[early_stop]) #Run the training. Adjust based on needs

    test_data = scaled_data[training_data_len - window:]
    x_test = []
    y_test = dataset[training_data_len:]

    for i in range(window, len(test_data)):
        x_test.append(test_data[i-window:i, 0])

    x_test = np.array(x_test)
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

    predictions = model.predict(x_test)
    predictions = scaler.inverse_transform(predictions)

    # Create a DataFrame with dates and predictions
    prediction_dates = df['Date'][training_data_len:].reset_index(drop=True)
    predictions_df = pd.DataFrame({'Date': prediction_dates, 'Predictions': predictions.flatten()})

    # Calculate Mean Absolute Percentage Error (MAPE)
    y_true = df[column_name][training_data_len:].reset_index(drop=True)
    y_pred = predictions_df['Predictions']
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100


    return predictions_df, mape



Run the one step ahead algorithm

In [None]:
predictions_df, mape = predict_and_evaluate_column('DGS10')

Model for true backtest/forecast. 

Differences:
- This prediction model appends the algorithm based on the prediction values, essentially using the same sliding window system as the one step ahead model, but uses predictive models.
- This model predicts the nect value, appends it to the input window, and repeatst this for the length of the test set.

In [33]:
from keras_tuner import RandomSearch
from tensorflow.keras.optimizers import Adam

def build_model(hp, input_shape):
    model = keras.models.Sequential()
    model.add(keras.layers.LSTM(
        units=hp.Int('lstm_units', min_value=32, max_value=128, step=32),
        return_sequences=True,
        input_shape=input_shape
    ))
    model.add(keras.layers.LSTM(
        units=hp.Int('lstm_units2', min_value=32, max_value=128, step=32),
        return_sequences=False
    ))
    model.add(keras.layers.Dense(
        units=hp.Int('dense_units', min_value=64, max_value=256, step=64),
        activation='relu'
    ))
    model.add(keras.layers.Dropout(
        rate=hp.Float('dropout', min_value=0.2, max_value=0.7, step=0.1)
    ))
    model.add(keras.layers.Dense(1))
    model.compile(
        optimizer=Adam(learning_rate=hp.Float('lr', min_value=1e-4, max_value=1e-2, sampling='LOG')),
        loss='mae',
        metrics=[keras.metrics.RootMeanSquaredError()]
    )
    return model

def predict_and_evaluate_column_true_backtest_tuned(column_name):
    window = 60
    training_data_percent = .8

    industry = df.filter([column_name])
    dataset = industry.values
    training_data_len = int(np.ceil(len(dataset) * training_data_percent))

    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(dataset)

    training_data = scaled_data[:training_data_len]

    x_train, y_train = [], []
    for i in range(window, len(training_data)):
        x_train.append(training_data[i-window:i, 0])
        y_train.append(training_data[i, 0])

    x_train, y_train = np.array(x_train), np.array(y_train)
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

    tuner = RandomSearch(
        lambda hp: build_model(hp, (x_train.shape[1], 1)),
        objective='loss',
        max_trials=10,
        executions_per_trial=1,
        directory='tuner_dir',
        project_name='fred_lstm'
    )

    tuner.search(x_train, y_train, epochs=20, batch_size=32, verbose=1)
    best_model = tuner.get_best_models(num_models=1)[0]

    test_len = len(scaled_data) - training_data_len
    input_seq = scaled_data[training_data_len-window:training_data_len, 0].copy()
    predictions = []

    for i in range(test_len):
        x_input = np.array(input_seq).reshape((1, window, 1))
        pred = best_model.predict(x_input)
        predictions.append(pred[0, 0])
        input_seq = np.append(input_seq[1:], pred[0, 0])

    predictions = np.array(predictions).reshape(-1, 1)
    predictions = scaler.inverse_transform(predictions)

    prediction_dates = df['Date'][training_data_len:].reset_index(drop=True)
    predictions_df = pd.DataFrame({'Date': prediction_dates, 'Predictions': predictions.flatten()})

    y_true = df[column_name][training_data_len:].reset_index(drop=True)
    y_pred = predictions_df['Predictions']
    mape = np.mean(np.abs((y_true - y_pred) / y_true)) * 100

    return predictions_df, mape

Run the true backtest model

In [None]:
predictions_df, mape = predict_and_evaluate_column_true_backtest_tuned('DGS10')

Function that plots all training data against the predictions and test data.

In [37]:
def plot_predictions(column_name, predictions_df, mape):
    """
    Plots the actual data and predictions for a given column and prints the MAPE.

    Args:
        column_name (str): The name of the column being plotted.
        predictions_df (pandas.DataFrame): DataFrame with 'Date' and 'Predictions' columns.
        mape (float): The Mean Absolute Percentage Error (MAPE) for the column.
    """
    training_data_percent = .8

    train = df[:int(np.ceil( len(df) * training_data_percent ))]
    test = df[int(np.ceil( len(df) * training_data_percent )):]

    plt.figure(figsize=(12,8))
    plt.plot(train['Date'], train[column_name], label='Training Data', color='blue')
    plt.plot(test['Date'], test[column_name], label='Test Data', color='orange')
    plt.plot(predictions_df['Date'], predictions_df['Predictions'], label='Predictions', color='green')
    plt.xlabel('Date')
    plt.ylabel(column_name)
    plt.title(f'{column_name} Data')
    plt.legend()
    plt.show()
    print(f"Mean Absolute Percentage Error (MAPE) for {column_name}: {mape:.2f}%")

Call plot_predictions function

In [None]:
plot_predictions('DGS10', predictions_df, mape)