In [1]:
import os
import tensorflow as tf
from methods.config import *
from methods.clean_data import Data_Prep
from methods.build_nn_model import build_model
from methods.nn import get_NN_results
from methods.var import get_VAR_results
from methods.model_results import get_model_details
from methods.data_methods import prepare_model_data, remove_outliers, prepare_X
from methods.plot import plot_variables, plot_results, cluster_columns
from methods.residual_bootstrap import get_prediction_intervals
from methods.fund_forecast import reverse_diff
import shap

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
variable = "CPIAUCSL"  # Select from CPIAUCSL, GS5, RPI and UNRATE

In [3]:
END_YEAR = 2019  # Latest model
OUTPUT_STEPS = 24  # Max forecast
model_details = get_model_details(END_YEAR, variable, OUTPUT_STEPS)
look_back_steps = int(model_details["look_back_years"] * 12)
number_of_pca = model_details["number_of_pca"]

# Train Model

In [4]:
data_prep = Data_Prep(LATEST_DATA_PATH, TRANSFORM_PATH)
data_prep.transform_to_supervised_learning(
    NA_CUTOFF,
    [variable],
    24,
    start=f"{START_YEAR}-01-01",
    end=f"2023-01-01",
)
dataset = data_prep.supervised_dataset
full_dataset = dataset["transformed_data"]

Variable BOGMBASE not found in transformation dictionary


In [5]:
data = prepare_model_data(
    window=full_dataset,
    X_variables=dataset["X_variables"],
    Y_variables=dataset["Y_variables"],
    val_steps=VAL_STEPS,
    look_back=look_back_steps,
    test_steps=1,
    remove_outlier=REMOVE_OUTLIER,
    number_of_pca=number_of_pca,
    target_variables=dataset["target_variables"],
)

data["train_X"] = data["train_X"][len(data["train_X"]) % BATCH_SIZE :]
data["train_Y"] = data["train_Y"][len(data["train_Y"]) % BATCH_SIZE :]

In [6]:
model = build_model(data, model_details)
model.summary()

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 23, 32)            3936      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 11, 32)            0         
_________________________________________________________________
dropout (Dropout)            (None, 11, 32)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 10, 96)            6240      
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 5, 96)             0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 5, 96)             0         
____

In [7]:
model.fit(
    x=data["train_X"],
    y=data["train_Y"],
    verbose=0,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[tf.keras.callbacks.EarlyStopping("loss", patience=5)],
    validation_data=(data["val_X"], data["val_Y"]),
)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


<tensorflow.python.keras.callbacks.History at 0x15e9a7fa550>

# Residual Bootstrapping

In [8]:
import numpy as np

full_X = np.concatenate((data["train_X"], data["val_X"]), axis=0)
full_Y = np.concatenate((data["train_Y"], data["val_Y"]), axis=0)

In [9]:
residuals = model.predict(full_X) - full_Y

In [10]:
lower_10 = np.percentile(residuals, 5, axis=0)
upper_10 = np.percentile(residuals, 95, axis=0)
lower_40 = np.percentile(residuals, 20, axis=0)
upper_40 = np.percentile(residuals, 80, axis=0)

# Predict Latest

In [11]:
latest_data_prep = Data_Prep(LATEST_DATA_PATH, TRANSFORM_PATH)
latest_data_prep.transform_to_supervised_learning(
    NA_CUTOFF,
    [variable],
    0,
    start=f"{START_YEAR}-01-01",
    end=f"2023-01-01",
)
latest_dataset = latest_data_prep.supervised_dataset
latest_full_dataset = latest_dataset["transformed_data"]

Variable BOGMBASE not found in transformation dictionary


In [12]:
window = remove_outliers(latest_full_dataset, 0, 0, REMOVE_OUTLIER)
X_data = prepare_X(
    window, dataset["X_variables"], 0, 1, number_of_pca, dataset["target_variables"]
)
X = list()
for i in range(len(X_data)):
    # Find the start of the final window
    end_ix = i + look_back_steps
    # Check if the end of the final window exceeds the length of dataset
    if end_ix > len(X_data):
        break
    # gather input and output parts of the pattern
    seq_x = X_data[i:end_ix]
    X.append(seq_x)

In [13]:
from numpy import array

historical_data = latest_full_dataset[f"{variable}(t)"]

predicted_series = model.predict(array(X))[-1]
predicted_index = pd.date_range(
    start=historical_data.index[-1], periods=len(predicted_series) + 1, freq="MS"
)[1:]
predicted_data = pd.Series(data=predicted_series, index=predicted_index)
predict_lower_10 = predicted_data + lower_10
predict_lower_40 = predicted_data + lower_40
predict_upper_10 = predicted_data + upper_10
predict_upper_40 = predicted_data + upper_40

In [14]:
def reverse_transform(series, variables_t, variables_t_1):
    if variable == "GS5" or variable == "UNRATE":
        data = 1 + np.array(reverse_diff(variables_t, series))[1:] / 100
    elif variable == "RPI":
        data = np.exp(series)
    elif variable == "CPIAUCSL":
        data = np.exp(
            reverse_diff(np.log(variables_t) - np.log(variables_t_1), series)
        )[1:]
    return pd.Series(data=data, index=series.index)

In [15]:
reverse_historical_variables_t = data_prep.raw_data[variable].loc[
    historical_data.index[1]
]
reverse_historical_variables_t_1 = data_prep.raw_data[variable].loc[
    historical_data.index[0]
]
reverse_historical_data = reverse_transform(
    historical_data[2:],
    reverse_historical_variables_t,
    reverse_historical_variables_t_1,
)

In [16]:
def reverse_predicted_data_func(predicted_data):
    reverse_predict_variables_t = data_prep.raw_data[variable].loc[
        historical_data.index[-1]
    ]
    reverse_predict_variables_t_1 = data_prep.raw_data[variable].loc[
        historical_data.index[-2]
    ]
    return reverse_transform(
        predicted_data, reverse_predict_variables_t, reverse_predict_variables_t_1
    )

In [17]:
reverse_predicted_data = reverse_predicted_data_func(predicted_data)
reverse_predict_lower_10 = reverse_predicted_data_func(predict_lower_10)
reverse_predict_lower_40 = reverse_predicted_data_func(predict_lower_40)
reverse_predict_upper_10 = reverse_predicted_data_func(predict_upper_10)
reverse_predict_upper_40 = reverse_predicted_data_func(predict_upper_40)

In [18]:
import plotly.graph_objects as go

# Create a new figure
fig = go.Figure()

# Add each time series as a separate trace with the 'lines' mode
fig.add_trace(
    go.Scatter(
        x=historical_data.index,
        y=historical_data.values,
        name="Historical",
        mode="lines",
        line=dict(color="orange"),
    )
)
fig.add_trace(
    go.Scatter(
        x=predicted_data.index,
        y=predicted_data.values,
        name="Predicted",
        mode="lines",
        line=dict(color="blue"),
    )
)
fig.add_trace(
    go.Scatter(
        x=predict_lower_10.index,
        y=predict_lower_10.values,
        name="0.1",
        mode="lines",
        line=dict(color="cyan", dash="dash"),
    )
)
fig.add_trace(
    go.Scatter(
        x=predict_lower_40.index,
        y=predict_lower_40.values,
        name="0.4",
        mode="lines",
        line=dict(color="paleturquoise", dash="dash"),
    )
)
fig.add_trace(
    go.Scatter(
        x=predict_upper_10.index,
        y=predict_upper_10.values,
        name="0.1",
        mode="lines",
        line=dict(color="cyan", dash="dash"),
    )
)
fig.add_trace(
    go.Scatter(
        x=predict_upper_40.index,
        y=predict_upper_40.values,
        name="0.4",
        mode="lines",
        line=dict(color="paleturquoise", dash="dash"),
    )
)

names = set()
fig.for_each_trace(
    lambda trace: trace.update(showlegend=False)
    if (trace.name in names)
    else names.add(trace.name)
)

fig.update_layout(plot_bgcolor="white", title=VARIABLES_MAP[variable])

In [19]:
import plotly.graph_objects as go

# Create a new figure
fig = go.Figure()

# Add each time series as a separate trace with the 'lines' mode
fig.add_trace(
    go.Scatter(
        x=reverse_historical_data.index,
        y=reverse_historical_data.values,
        name="Historical",
        mode="lines",
        line=dict(color="orange"),
    )
)
fig.add_trace(
    go.Scatter(
        x=reverse_predicted_data.index,
        y=reverse_predicted_data.values,
        name="Predicted",
        mode="lines",
        line=dict(color="blue"),
    )
)
fig.add_trace(
    go.Scatter(
        x=reverse_predict_lower_10.index,
        y=reverse_predict_lower_10.values,
        name="0.1",
        mode="lines",
        line=dict(color="cyan", dash="dash"),
    )
)
fig.add_trace(
    go.Scatter(
        x=reverse_predict_lower_40.index,
        y=reverse_predict_lower_40.values,
        name="0.4",
        mode="lines",
        line=dict(color="paleturquoise", dash="dash"),
    )
)
fig.add_trace(
    go.Scatter(
        x=reverse_predict_upper_10.index,
        y=reverse_predict_upper_10.values,
        name="0.1",
        mode="lines",
        line=dict(color="cyan", dash="dash"),
    )
)
fig.add_trace(
    go.Scatter(
        x=reverse_predict_upper_40.index,
        y=reverse_predict_upper_40.values,
        name="0.4",
        mode="lines",
        line=dict(color="paleturquoise", dash="dash"),
    )
)

names = set()
fig.for_each_trace(
    lambda trace: trace.update(showlegend=False)
    if (trace.name in names)
    else names.add(trace.name)
)

fig.update_layout(plot_bgcolor="white", title=VARIABLES_MAP[variable])