In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import regularizers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# set seed for reproducibility
random_seed = 1

In [2]:
df = pd.read_csv('fred_230718.csv', index_col='Date', parse_dates=True)
df = df.asfreq('QS')
earliest_date = '1963-01-01'
latest_date = '2021-10-01'
# # filter df index to be between earliest_date and latest_date
df = df.loc[(df.index >= earliest_date) & (df.index <= latest_date)]
df.dropna(axis=1, inplace=True)
df.head()

Unnamed: 0_level_0,GDP,GDPC1,GDPPOT,CPIAUCSL,CPILFESL,GDPDEF,M1V,M2V,DFF,UNRATE,...,MANEMP,DSPIC96,PCE,PCEDG,PSAVERT,DSPI,INDPRO,HOUST,GPDI,MSPUS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1963-01-01,621.672,3628.306,3662.738125,30.44,31.5,17.134,4.178,1.69,3.0,5.7,...,15545.0,2541.1,374.4,53.1,10.9,430.0,26.0448,1244.0,99.689,17800.0
1963-04-01,629.752,3669.02,3701.698767,30.48,31.7,17.164,4.194,1.675,3.0,5.7,...,15602.0,2547.1,376.4,53.2,10.7,431.1,26.7473,1689.0,101.65,18000.0
1963-07-01,644.444,3749.681,3741.388301,30.69,31.8,17.187,4.248,1.68,3.0,5.6,...,15646.0,2572.6,384.4,55.5,10.1,438.0,27.0445,1614.0,104.612,17900.0
1963-10-01,653.938,3774.264,3781.880559,30.75,32.0,17.326,4.269,1.672,3.5,5.5,...,15714.0,2617.3,386.0,54.2,11.5,447.0,27.5578,1779.0,107.189,18500.0
1964-01-01,669.822,3853.835,3822.450115,30.94,32.2,17.381,4.345,1.685,3.25,5.6,...,15715.0,2652.8,396.8,57.9,10.7,455.3,27.882,1603.0,110.474,18500.0


set target and create, train, validate, and test datasets and then scale and transform them so they will work better with the neural network

In [3]:
target = 'MSPUS'
y = df[target]
X = df.drop(columns=[target]).shift(1).dropna()
y = y.loc[X.index] # Make sure y and X have the same rows after dropna

# https://datascience.stackexchange.com/questions/15135/train-test-validation-set-splitting-in-sklearn
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.2, random_state=random_seed)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, test_size=0.25, random_state=random_seed)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)  # validation data should also be scaled
X_test_scaled = scaler.transform(X_test)

# Log-transform the target variable
y_train_log = np.log1p(y_train)
y_valid_log = np.log1p(y_valid)

let's create functions to train and evaluate models

In [4]:
def train_model(X_train, y_train, 
                X_valid, y_valid,
                layer_sizes=[100, 100], 
                activation="relu", 
                kernel_initializer="he_normal", 
                learning_rate=0.001, 
                epochs=100,
                batch_norm=False,
                l1_l2=False,
                l1=.01,
                l2=.01):

    # Create a sequential model
    model = tf.keras.Sequential()

    # Add batch normalization and dense layers according to the layer_sizes
    for size in layer_sizes:
        if batch_norm:
            model.add(tf.keras.layers.BatchNormalization())
        if l1_l2:
            model.add(tf.keras.layers.Dense(size, activation=activation, kernel_initializer=kernel_initializer
                                            , kernel_regularizer=regularizers.l1_l2(l1=l1, l2=l2)))
        else:
            model.add(tf.keras.layers.Dense(size, activation=activation, kernel_initializer=kernel_initializer))

    # Add a final Dense layer with no activation
    model.add(tf.keras.layers.Dense(1))

    # Create the optimizer with the custom learning rate
    sgd = tf.keras.optimizers.SGD(learning_rate=learning_rate)

    # Compile the model
    model.compile(loss="mse", optimizer=sgd)

    # Train the model using the scaled data
    model.fit(X_train, y_train, epochs=epochs, validation_data=(X_valid, y_valid))
    
    return model

In [5]:
def evaluate_model(model, X_test, y_test, log_target=False):
    # When predicting, transform the predictions back
    y_pred = model.predict(X_test)
    if log_target:
        y_pred = np.expm1(y_pred).flatten()  # inverse of np.log1p(), make it 1D


    # compute the RMSE on the original scale
    mse = np.mean(tf.keras.losses.MSE(y_test, y_pred))
    print('Test set MSE:', mse)
    rmse = np.sqrt(mse)
    print('Test set RMSE:', rmse)
    rmspe = (np.sqrt(np.mean(np.square((y_test - y_pred) / y_test)))) * 100
    print('Test set RMSPE (%):', rmspe)

Now we'll test some different architectures

In [6]:
print('no regularization')
model = train_model(X_train=X_train_scaled, y_train=y_train_log, X_valid=X_test_scaled, y_valid=y_valid_log)
evaluate_model(model, X_test_scaled, y_test, log_target=True)

no regularization
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epo

In [7]:
print('batch norm regularization')
model = train_model(X_train=X_train_scaled, y_train=y_train_log, X_valid=X_test_scaled, y_valid=y_valid_log, batch_norm=True)
evaluate_model(model, X_test_scaled, y_test, log_target=True)

batch norm regularization
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76

In [8]:
print('L1/L2 regularization')
model = train_model(X_train=X_train_scaled, y_train=y_train_log, X_valid=X_test_scaled, y_valid=y_valid_log, l1_l2=True)
evaluate_model(model, X_test_scaled, y_test, log_target=True)

L1/L2 regularization
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


In [9]:
print('batch norm & L1/L2 regularization')
model = train_model(X_train=X_train_scaled, y_train=y_train_log, X_valid=X_test_scaled, y_valid=y_valid_log, batch_norm=True, l1_l2=True)
evaluate_model(model, X_test_scaled, y_test, log_target=True)

batch norm & L1/L2 regularization
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100


In [10]:
model = train_model(epochs=500, layer_sizes=[100, 100, 100], X_train=X_train_scaled, y_train=y_train_log, X_valid=X_test_scaled, y_valid=y_valid_log, batch_norm=True, l1_l2=True)
evaluate_model(model, X_test_scaled, y_test, log_target=True)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [11]:
model = train_model(epochs=1_000, layer_sizes=[100, 100, 100, 100], X_train=X_train_scaled, y_train=y_train_log, X_valid=X_test_scaled, y_valid=y_valid_log, batch_norm=True, l1_l2=True)
evaluate_model(model, X_test_scaled, y_test, log_target=True)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [None]:
import time

# Load MNIST dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Create a more complex DNN model
def create_model():
    return tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    # # this one was actually slower on GPU, batch size wasn't specified either
    # return tf.keras.models.Sequential([
    #     tf.keras.layers.Flatten(input_shape=(28, 28)),
    #     tf.keras.layers.Dense(128, activation='relu'),
    #     tf.keras.layers.Dropout(0.2),
    #     tf.keras.layers.Dense(10)
    # ])

# Train and evaluate the model, returning the time taken
def train_and_evaluate(model, x_train, y_train, x_test, y_test):
    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                  metrics=['accuracy'])
    
    start_time = time.time()
    # model.fit(x_train, y_train, epochs=5)
    model.fit(x_train, y_train, epochs=5, batch_size=512)
    end_time = time.time()

    model.evaluate(x_test, y_test, verbose=2)
    
    return end_time - start_time

# Check if a GPU is available
if tf.config.list_physical_devices('GPU'):
    print("GPU is available")
    with tf.device('/GPU:0'):  # train with GPU
        model = create_model()
        gpu_time = train_and_evaluate(model, x_train, y_train, x_test, y_test)
        print("Time taken to train with GPU: {:.2f} seconds".format(gpu_time))
        
    with tf.device('/CPU:0'):  # train with CPU
        model = create_model()
        cpu_time = train_and_evaluate(model, x_train, y_train, x_test, y_test)
        print("Time taken to train with CPU: {:.2f} seconds".format(cpu_time))
        
    speed_increase = (cpu_time - gpu_time) / cpu_time * 100
    print("GPU is {:.2f}% faster than CPU".format(speed_increase))
else:
    print("GPU not available, training with CPU only")
    model = create_model()
    cpu_time = train_and_evaluate(model, x_train, y_train, x_test, y_test)
    print("Time taken to train with CPU: {:.2f} seconds".format(cpu_time))