In [7]:
import pandas as pd
import numpy as np
import random
import tensorflow as tf
from tensorflow.keras import regularizers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import wandb
wandb.login()
from wandb.keras import WandbMetricsLogger, WandbModelCheckpoint

# set seed for reproducibility
random_seed = 1

In [8]:
df = pd.read_csv('fred_230718.csv', index_col='Date', parse_dates=True)
df = df.asfreq('QS')
earliest_date = '1963-01-01'
latest_date = '2021-10-01'
# # filter df index to be between earliest_date and latest_date
df = df.loc[(df.index >= earliest_date) & (df.index <= latest_date)]
df.dropna(axis=1, inplace=True)
print(f'feature count: {len(df.columns) - 1}')
print(f'row count: {len(df)}')
df.head()

feature count: 25
row count: 236


Unnamed: 0_level_0,GDP,GDPC1,GDPPOT,CPIAUCSL,CPILFESL,GDPDEF,M1V,M2V,DFF,UNRATE,...,MANEMP,DSPIC96,PCE,PCEDG,PSAVERT,DSPI,INDPRO,HOUST,GPDI,MSPUS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1963-01-01,621.672,3628.306,3662.738125,30.44,31.5,17.134,4.178,1.69,3.0,5.7,...,15545.0,2541.1,374.4,53.1,10.9,430.0,26.0448,1244.0,99.689,17800.0
1963-04-01,629.752,3669.02,3701.698767,30.48,31.7,17.164,4.194,1.675,3.0,5.7,...,15602.0,2547.1,376.4,53.2,10.7,431.1,26.7473,1689.0,101.65,18000.0
1963-07-01,644.444,3749.681,3741.388301,30.69,31.8,17.187,4.248,1.68,3.0,5.6,...,15646.0,2572.6,384.4,55.5,10.1,438.0,27.0445,1614.0,104.612,17900.0
1963-10-01,653.938,3774.264,3781.880559,30.75,32.0,17.326,4.269,1.672,3.5,5.5,...,15714.0,2617.3,386.0,54.2,11.5,447.0,27.5578,1779.0,107.189,18500.0
1964-01-01,669.822,3853.835,3822.450115,30.94,32.2,17.381,4.345,1.685,3.25,5.6,...,15715.0,2652.8,396.8,57.9,10.7,455.3,27.882,1603.0,110.474,18500.0


set target and create, train, validate, and test datasets and then scale and transform them so they will work better with the neural network

In [9]:
target = 'MSPUS'

drop_cols = ['GDPC1', 'GDPPOT', 'CPIAUCSL', 'CPILFESL', 'GDPDEF'
            , 'M2V'
            , 'DSPIC96', 'PCE', 'PCEDG', 'DSPI', 'INDPRO', 'GPDI']
df.drop(columns=drop_cols, inplace=True)

y = df[target]
X = df.drop(columns=[target]).shift(1).dropna()
y = y.loc[X.index] # Make sure y and X have the same rows after dropna

# https://datascience.stackexchange.com/questions/15135/train-test-validation-set-splitting-in-sklearn
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.2, random_state=random_seed)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, test_size=0.25, random_state=random_seed)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)  # validation data should also be scaled
X_test_scaled = scaler.transform(X_test)

# Log-transform the target variable
y_train_log = np.log1p(y_train)
y_valid_log = np.log1p(y_valid)

In [10]:
from wandb.keras import WandbMetricsLogger, WandbModelCheckpoint

def train_model(X_train, y_train, 
                X_valid, y_valid,
                layer_sizes=[100, 100], 
                activation="relu", 
                kernel_initializer="he_normal", 
                loss='mse',
                learning_rate=0.001, 
                epochs=100,
                batch_norm=False,
                l1_l2=False,
                l1=.01,
                l2=.01,
                metrics=['mse'],
                wandb = 'y'):

    # Create a sequential model
    model = tf.keras.Sequential()

    # Add batch normalization and dense layers according to the layer_sizes
    for size in layer_sizes:
        if batch_norm:
            model.add(tf.keras.layers.BatchNormalization())
        if l1_l2:
            model.add(tf.keras.layers.Dense(size, activation=activation, kernel_initializer=kernel_initializer
                                            , kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2)))
        else:
            model.add(tf.keras.layers.Dense(size, activation=activation, kernel_initializer=kernel_initializer))

    # Add a final Dense layer with no activation
    model.add(tf.keras.layers.Dense(1))

    # Create the optimizer with the custom learning rate
    sgd = tf.keras.optimizers.SGD(learning_rate=learning_rate)

    # Compile the model
    model.compile(loss=loss, optimizer=sgd, metrics=metrics)
    
    if wandb == 'y':
        # Add WandbMetricsLogger to log metrics and WandbModelCheckpoint to log model checkpoints
        wandb_callbacks = [
            WandbMetricsLogger(),
            # WandbModelCheckpoint(filepath="my_model_{epoch:02d}"),
            # WandbModelCheckpoint(filepath="my_model_best", save_best_only=True, monitor='val_loss'),
        ]

        # Train the model using the scaled data
        model.fit(X_train, y_train, epochs=epochs, validation_data=(X_valid, y_valid), callbacks=wandb_callbacks)
    else:
        # Train the model using the scaled data
        model.fit(X_train, y_train, epochs=epochs, validation_data=(X_valid, y_valid))
    
    return model

In [11]:
sweep_config = {
    'method': 'bayes',  # can be grid, random, or bayes
    'metric': {
        'name': 'val_loss',
        'goal': 'minimize'
    },
    'parameters': {
        'learning_rate': {
            'min': 0.0001,
            'max': 0.01
        },
        'activation': {
            'values': ['relu']
        }, 
        'kernel_initializer': {
            'values': ['he_normal']
        }, 
        'loss': {
            'values': ['mse']
        }, 
        # 'activation': {
        #     'values': ['relu', 'tanh', 'sigmoid', 'elu', 'selu', 'softplus']  # more activation functions
        # }, 
        # 'kernel_initializer': {
        #     'values': ['he_normal', 'glorot_uniform', 'glorot_normal', 'lecun_normal']  # more initializers
        # }, 
        # 'loss': {
        #     'values': ['mse', 'mae', 'logcosh', 'huber']  # more loss functions
        # },
        'epoch': {
            'values': [100]
        }, 
        # 'epoch': {
        #     'min': 10,
        #     'max': 500
        # },
        'batch_norm': {
            'values': [True, False]
        }, 
        'l1_l2': {
            'values': [True]
        },        
        'l1': {
            'min': 0.0001,
            'max': 0.5
        },
        'l2': {
            'min': 0.0001,
            'max': 0.5
        },
        'metrics': {
            'values': ['mae']
        }
        # 'metrics': {
        #     'values': ['mae', 'mse', 'mape', 'msle']  # adding more metrics
        # }
    }
}

def train(config=None):
    with wandb.init(config=config):
        config = wandb.config
        model = train_model(
            X_train=X_train_scaled, y_train=y_train_log,
            X_valid=X_test_scaled, y_valid=y_valid_log,
            activation=config.activation, kernel_initializer=config.kernel_initializer, 
            loss=config.loss, learning_rate=config.learning_rate, 
            epochs=config.epoch, batch_norm=config.batch_norm, 
            l1_l2=config.l1_l2, l1=config.l1, l2=config.l2, 
            metrics=config.metrics)

sweep_id = wandb.sweep(sweep_config, project="fred_dnn_sweep")
wandb.agent(sweep_id, train, count=100)

Create sweep with ID: azovk6yv
Sweep URL: https://wandb.ai/grantbell/fred_dnn_sweep/sweeps/azovk6yv


[34m[1mwandb[0m: Agent Starting Run: vcaadsj6 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_norm: False
[34m[1mwandb[0m: 	epoch: 100
[34m[1mwandb[0m: 	kernel_initializer: he_normal
[34m[1mwandb[0m: 	l1: 0.1669997122667422
[34m[1mwandb[0m: 	l1_l2: True
[34m[1mwandb[0m: 	l2: 0.319413580861995
[34m[1mwandb[0m: 	learning_rate: 0.0044572913522162435
[34m[1mwandb[0m: 	loss: mse
[34m[1mwandb[0m: 	metrics: mae
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

0,1
epoch/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
epoch/learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/loss,█▇▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/mae,█▃▂▂▂▂▂▁▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/val_loss,█▇▆▆▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch/val_mae,█▇▅▅▅▅▄▄▄▄▄▄▄▃▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁

0,1
epoch/epoch,99.0
epoch/learning_rate,0.00446
epoch/loss,1.60642
epoch/mae,0.2842
epoch/val_loss,2.63928
epoch/val_mae,0.89567


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


the best epoch/val_mae is run 94
https://wandb.ai/grantbell/fred_dnn_sweep/runs/qgc616q6?workspace=user-grantbell

In [13]:
def evaluate_model(model, X_test, y_test, log_target=False):
    # When predicting, transform the predictions back
    y_pred = model.predict(X_test)
    if log_target:
        y_pred = np.expm1(y_pred).flatten()  # inverse of np.log1p(), make it 1D


    # compute the RMSE on the original scale
    mse = np.mean(tf.keras.losses.MSE(y_test, y_pred))
    print('Test set MSE:', mse)
    rmse = np.sqrt(mse)
    print('Test set RMSE:', rmse)
    rmspe = (np.sqrt(np.mean(np.square((y_test - y_pred) / y_test)))) * 100
    print('Test set RMSPE (%):', rmspe)
    
print('default')
model = train_model(X_train=X_train_scaled, y_train=y_train_log, X_valid=X_test_scaled, y_valid=y_valid_log
                    , wandb='n')
evaluate_model(model, X_test_scaled, y_test, log_target=True)
print('run 94')
model = train_model(X_train=X_train_scaled, y_train=y_train_log, X_valid=X_test_scaled, y_valid=y_valid_log
                    , wandb='n'
                    , l1 = .2484
                    , l1_l2=True
                    , l2 = .3598
                    , learning_rate=.00264)
evaluate_model(model, X_test_scaled, y_test, log_target=True)

default
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100


next things to try
- shuffle within the train function so the train, test and validate sets are different each time?
- different number and size of layers
- different lags (e.i 1 year instead of one quarter) and/or combinations of lags
- different static number or make dynamic of epochs