In [1]:
import os
import matplotlib.pyplot as plt
import pathlib
import PIL

# Data Manipulation
import numpy as np
import pandas as pd

# Deep Learning
import tensorflow as tf
import tensorflow_datasets as tfds

# TensorFlow
from tensorflow.keras import Sequential, layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Normalization
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras import models, layers


# Sklearn
from sklearn import set_config
set_config(display="diagram")

from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.pipeline import make_pipeline


# 1 - DATA MANIPULATION
import pandas as pd
import numpy as np

# 2 - DATA VISUALISATION
import matplotlib.pyplot as plt
import seaborn as sns

# 3 - STATISTICS
from statsmodels.graphics.gofplots import qqplot

# 4 - MACHINE LEARNING
## 4.1 - Preprocessing

### 4.1.1 - Scalers
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler


### 4.1.3 - Crossvalidation, Training, Model
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split


# Make all figures tiny for readability purpose
from matplotlib import rcParams
rcParams['figure.figsize'] = (5,3)
import macrobond_data_api as mda
from macrobond_data_api.web import WebClient

from macrobond_data_api.web import WebClient
from macrobond_data_api.common.types import SearchFilter

from sklearn.pipeline import make_pipeline
from sklearn.pipeline import Pipeline
import numpy as np

from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.impute import KNNImputer
from sklearn.preprocessing import RobustScaler
import pandas as pd


In [2]:
Target = 'Advance Estimate From BEA'
Drop = ['GDP Nowcast', 'Quarter being forecasted', 'Advance Estimate From BEA', 'Publication Date of Advance Estimate',
       'Days until advance estimate', 'Forecast Error', 'Data releases']

In [None]:
gdpnow = pd.read_csv('data/gdpnow_daily_df.csv', index_col='Unnamed: 0', parse_dates=True) #date_parser=dateparse)
# Ensure that load_df index is in the same date format
gdpnow.index = pd.to_datetime(gdpnow.index)

In [3]:
X = gdpnow.drop(columns=Drop)
y = gdpnow['Advance Estimate From BEA']
y= y[-X.shape[0]:]

In [30]:
y = y.fillna(method='ffill')
y = pd.Series(y)

90

In [4]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

# Replace NaN values with forward fill
y = y.fillna(method='ffill')
y = pd.Series(y)

# Replace NaN values with the mean
# # # Calculate the mean excluding NaN values
mean_val_y_test = np.nanmean(y_test)
y_test[np.isnan(y_test)] = mean_val_y_test
y_test = pd.Series(y_test)


In [5]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler


# Build the preprocessing pipeline
preproc = make_pipeline(
    SimpleImputer(strategy='mean'),# Impute missing values via linear interpolation
    StandardScaler()  # Standardize features by removing the mean and scaling to unit variance
)

X_train = preproc.fit_transform(X_train)
X_test = preproc.transform(X_test)
new_data = preproc.transform(new_data)

In [32]:
new_data = preproc.transform(new_data)
len(new_data)



90

In [6]:
def create_sequences(X, y, n_steps):
    Xs, ys = [], []
    for i in range(len(X) - n_steps):
        Xs.append(X[i:(i + n_steps)])
        ys.append(y[i + n_steps])
    return np.array(Xs), np.array(ys)

# Number of time steps you want to look back
n_steps = 62

# Create sequences using the updated function
X_train, y_train = create_sequences(X_train, y_train, n_steps)
X_test, y_test = create_sequences(X_test, y_test, n_steps)

print(f"Shape of X_train: {X_train.shape}")
print(f"Shape of y_train: {y_train.shape}")

print(f"Shape of X_test: {X_test.shape}")
print(f"Shape of y_test: {y_test.shape}")


Shape of X_train: (7002, 62, 674)
Shape of y_train: (7002,)
Shape of X_test: (1704, 62, 674)
Shape of y_test: (1704,)


In [35]:
import numpy as np

def create_single_sequence_for_prediction(data, n_steps):
    # Assuming data is already preprocessed and in the correct shape
    if len(data) >= n_steps:
        # Extract the last n_steps values for the prediction
        X = np.array(data[-n_steps:]).reshape(1, n_steps, -1)  # Reshape for model input
        return X
    else:
        print("Not enough data to create a sequence.")
        return None

# Example usage with your single series 'new_data' and 'n_steps'
n_steps = 62  # Number of time steps you want to look back
# Assuming 'new_data' is a numpy array or a list with the recent observations
X_for_prediction = create_single_sequence_for_prediction(new_data, n_steps)

# Now 'X_for_prediction' is ready to be used with your model for making the prediction
# Assuming you have a trained model named 'model'
# prediction = model.predict(X_for_prediction)


In [36]:
X_for_prediction.shape

(1, 62, 674)

In [27]:
def create_single_sequence(data, n_steps):
    Xs, ys = [], []
    for i in range(len(data) - n_steps):
        Xs.append(data[i:(i + n_steps)])
        ys.append(data[i + n_steps])
    return np.array(Xs), np.array(ys)

# Example usage with your single series 'new_data' and 'n_steps'
n_steps = 62  # Number of time steps you want to look back
new_data = create_single_sequence(new_data, n_steps)


In [8]:
# Count and print the number of NaNs in each dataset
print(f"NaN in X_train: {np.isnan(X_train).sum()}")
print(f"NaN in y_train: {np.isnan(y_train).sum()}")
print(f"NaN in X_test: {np.isnan(X_test).sum()}")
print(f"NaN in y_test: {np.isnan(y_test).sum()}")

NaN in X_train: 0
NaN in y_train: 0
NaN in X_test: 0
NaN in y_test: 0


In [9]:
# ## model architechture ##
from tensorflow.keras import models, layers

# The Normalization Layer
# normalizer = Normalization()  # Instantiate a "normalizer" layer
# normalizer.adapt(X_train) # "Fit" it on the train set
# Assuming X_train is your preprocessed training data ready for the RNN

timesteps = X_train.shape[1]  # Number of timesteps in each sequence
features = X_train.shape[2]  # Number of features per timestep


def initialize_model(input_shape):
    model = models.Sequential([
        layers.LSTM(5, input_shape=input_shape, activation='tanh'),
        layers.Dense(10, activation="relu"),
        layers.Dropout(rate=0.2),
        layers.Dense(1, activation='linear')  # Assuming a regression task
    ])


    return model

# You can now proceed to train model_LSTM with your data


In [10]:
import tensorflow as tf
from tensorflow.keras.optimizers.schedules import ExponentialDecay

input_shape=(timesteps, features)
initial_learning_rate = 0.001  # Start with default Adam value

# # Define the learning rate schedule
lr_schedule = ExponentialDecay(
    initial_learning_rate=initial_learning_rate,
    decay_steps=5000,  # Learning rate will decay every 5000 steps
    decay_rate=0.7,  # Learning rate will be multiplied by 0.7 at each decay step
    staircase=True  # If True, learning rate changes at discrete intervals, making the decay stepwise
)

# Initialize the RMSprop optimizer with the ExponentialDecay learning rate schedule
optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr_schedule)

# Initialize the optimizer with the learning rate schedule
# optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

# 2. Initializing the model
model = initialize_model(input_shape)

model.compile(optimizer=optimizer,
                loss='mse',
                metrics=['mae'])

# 4. Training the neural net
es = EarlyStopping(patience=20, restore_best_weights=True)
history = model.fit(
    X_train,
    y_train,
    epochs=1000,
    validation_split = 0.2,
    shuffle = True,
    batch_size=8,
    callbacks = [es],
    verbose = 1
)


Epoch 1/1000


2024-03-12 15:03:14.121378: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000


In [11]:
result = model.evaluate(X_test, y_test)



In [38]:
predictions = model.predict(X_test)
predictions



array([[195.48323],
       [195.48291],
       [195.4766 ],
       ...,
       [195.48318],
       [195.48297],
       [195.45094]], dtype=float32)

In [24]:
loss, mae = model.evaluate(X_test, y_test)




In [None]:
def plot_loss_mae(history):
    # Setting figures
    fig, (ax1, ax2) = plt.subplots(1,2, figsize=(13,4))

    # Create the plots
    ax1.plot(history.history['loss'])
    ax1.plot(history.history['val_loss'])

    ax2.plot(history.history['mae'])
    ax2.plot(history.history['val_mae'])

    # Set titles and labels
    ax1.set_title('Model loss')
    ax1.set_ylabel('Loss')
    ax1.set_xlabel('Epoch')

    ax2.set_title('MAE')
    ax2.set_ylabel('MAE')
    ax2.set_xlabel('Epoch')

    # Set limits for y-axes
    ax1.set_ylim(ymin=0, ymax=200)
    ax2.set_ylim(ymin=0, ymax=20)

    # Generate legends
    ax1.legend(['Train', 'Validation'], loc='best')
    ax2.legend(['Train', 'Validation'], loc='best')

    # Show grids
    ax1.grid(axis="x",linewidth=0.5)
    ax1.grid(axis="y",linewidth=0.5)

    ax2.grid(axis="x",linewidth=0.5)
    ax2.grid(axis="y",linewidth=0.5)

    plt.show()