# Neural Network Hyperparameter Grid Search

In [1]:
import tensorflow as tf
import random
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
import seaborn as sns
import warnings
from sklearn.metrics import r2_score
warnings.filterwarnings('ignore')
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping
from sklearn.utils import shuffle
from tqdm import tqdm

2023-02-26 00:09:28.260137: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-26 00:09:28.360289: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-02-26 00:09:28.898787: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: :/home/tychi/miniconda3/lib/
2023-02-26 00:09:28.898874: W tensorflow/compiler/xla/stre

In [2]:
training_data = pd.read_csv('../Datasets/training_data_full.csv')
training_data = shuffle(training_data)

In [3]:
non_numerical_features = set([
    'StateAbbr','StateDesc','CityName','CountyFIPS','TractFIPS'
])

target_features = set(['DIABETES_3Y_Change_Percentage'])

numerical_features = list(
    set(training_data.columns) - non_numerical_features - target_features
)

features = training_data[numerical_features]
labels = training_data['DIABETES_3Y_Change_Percentage']

In [4]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.33, random_state=42)

In [5]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(features) # technically should just do this on X_train

2023-02-26 00:09:30.554065: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-02-26 00:09:30.558506: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-02-26 00:09:30.558716: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:967] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-02-26 00:09:30.559252: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them i

In [6]:
def build_model(layer_config, learning_rate=0.001, add_dropout=False):
    model = tf.keras.Sequential([
        normalizer
    ])
    
    for layer_size in layer_config:
        # dense layer
        model.add(tf.keras.layers.Dense(layer_size, activation='relu'))
        
        # some studies suggest adding dropout layer after each dense layer
        if add_dropout: model.add(tf.keras.layers.Dropout(0.2))
    
    # activation layer
    model.add(tf.keras.layers.Dense(1, activation='linear'))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
        loss="mean_squared_error"
    )
    return model

# Grid Search

In [7]:
layer_config_options = [
    [50,25,5],
    [30,15,3],
    [20,10,2],
    [20,5]
]

learning_rate_options = [
    0.1,
    0.01,
    0.001,
]

add_dropout_options = [
    True,
    False
]

batch_size_options = [
    2, 16, 32
]

early_stop_callback = EarlyStopping(monitor='val_loss', patience=3)

In [8]:
combination_count =\
    len(layer_config_options) * len(learning_rate_options) *\
    len(add_dropout_options) * len(batch_size_options)

print(f'Combinations to try: {combination_count}')

tqdm._instances.clear() 
pbar = tqdm(total=combination_count) 

result_list = [] # later to turn into dataframe

for layer_config in layer_config_options:
    for learning_rate in learning_rate_options:
        for add_dropout in add_dropout_options:
            for batch_size in batch_size_options:
                nn_model = build_model(
                    layer_config, learning_rate, add_dropout
                )
                
                nn_model.fit(
                    X_train,
                    y_train,
                    validation_data=(X_test, y_test),
                    batch_size=batch_size,
                    epochs=50, # there is early stop, epoch not an issue
                    callbacks=[early_stop_callback],
                    verbose=0
                )
                
                # make predictions
                y_test_predictions = nn_model.predict(X_test)
                y_train_predictions = nn_model.predict(X_train)
                
                # calculate r2 scores
                train_r2 = r2_score(y_train, y_train_predictions)
                test_r2 = r2_score(y_test, y_test_predictions)
                
                result_list.append({
                    'layer_config': str(layer_config),
                    'learning_rate': learning_rate,
                    'add_dropout': add_dropout,
                    'batch_size': batch_size,
                    'train_r2': train_r2,
                    'test_r2': test_r2
                })
                
                pbar.update(1)

Combinations to try: 72


  0%|                                                                                            | 0/72 [00:00<?, ?it/s]2023-02-26 00:09:35.222398: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-02-26 00:09:35.229876: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x1fc9d700 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-02-26 00:09:35.229906: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): NVIDIA GeForce RTX 3080, Compute Capability 8.6
2023-02-26 00:09:35.234696: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-02-26 00:09:35.329002: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-02-26 00:09:35.357372:



  1%|█                                                                               | 1/72 [14:28<17:07:16, 868.12s/it]



  3%|██▎                                                                              | 2/72 [15:35<7:43:07, 396.96s/it]



  4%|███▍                                                                             | 3/72 [16:30<4:36:52, 240.76s/it]



  6%|████▌                                                                            | 4/72 [27:53<7:50:41, 415.32s/it]



  7%|█████▋                                                                           | 5/72 [29:29<5:35:13, 300.20s/it]



  8%|██████▊                                                                          | 6/72 [30:00<3:49:44, 208.85s/it]



 10%|███████▉                                                                         | 7/72 [46:31<8:23:12, 464.50s/it]



 11%|█████████                                                                        | 8/72 [48:15<6:13:07, 349.81s/it]



 12%|██████████▏                                                                      | 9/72 [49:02<4:27:40, 254.93s/it]



 14%|███████████                                                                     | 10/72 [54:26<4:45:39, 276.44s/it]



 15%|████████████▏                                                                   | 11/72 [55:12<3:29:15, 205.83s/it]



 17%|█████████████▎                                                                  | 12/72 [56:36<2:48:56, 168.95s/it]



 18%|██████████████                                                                | 13/72 [1:20:39<9:05:27, 554.70s/it]



 19%|███████████████▏                                                              | 14/72 [1:22:59<6:55:11, 429.50s/it]



 21%|████████████████▎                                                             | 15/72 [1:24:37<5:13:00, 329.48s/it]



 22%|█████████████████▎                                                            | 16/72 [1:40:17<7:59:09, 513.39s/it]



 24%|██████████████████▍                                                           | 17/72 [1:42:48<6:10:37, 404.31s/it]



 25%|███████████████████▌                                                          | 18/72 [1:44:00<4:34:07, 304.58s/it]



 26%|████████████████████▌                                                         | 19/72 [1:52:33<5:24:09, 366.98s/it]



 28%|█████████████████████▋                                                        | 20/72 [1:53:28<3:57:00, 273.48s/it]



 29%|██████████████████████▊                                                       | 21/72 [1:54:19<2:55:37, 206.61s/it]



 31%|███████████████████████▊                                                      | 22/72 [2:02:30<4:03:25, 292.11s/it]



 32%|████████████████████████▉                                                     | 23/72 [2:03:10<2:56:37, 216.28s/it]



 33%|██████████████████████████                                                    | 24/72 [2:03:45<2:09:31, 161.91s/it]



 35%|███████████████████████████                                                   | 25/72 [2:13:16<3:43:02, 284.74s/it]



 36%|████████████████████████████▏                                                 | 26/72 [2:14:23<2:48:07, 219.29s/it]



 38%|█████████████████████████████▎                                                | 27/72 [2:15:08<2:05:23, 167.20s/it]



 39%|██████████████████████████████▎                                               | 28/72 [2:22:13<2:59:19, 244.53s/it]



 40%|███████████████████████████████▍                                              | 29/72 [2:24:05<2:26:45, 204.77s/it]



 42%|████████████████████████████████▌                                             | 30/72 [2:25:04<1:52:34, 160.82s/it]



 43%|█████████████████████████████████▌                                            | 31/72 [2:41:29<4:38:51, 408.09s/it]



 44%|██████████████████████████████████▋                                           | 32/72 [2:47:38<4:24:17, 396.45s/it]



 46%|███████████████████████████████████▊                                          | 33/72 [2:50:36<3:35:02, 330.82s/it]



 47%|████████████████████████████████████▊                                         | 34/72 [3:05:55<5:21:16, 507.29s/it]



 49%|█████████████████████████████████████▉                                        | 35/72 [3:07:48<3:59:55, 389.08s/it]



 50%|███████████████████████████████████████                                       | 36/72 [3:10:51<3:16:18, 327.18s/it]



 51%|████████████████████████████████████████                                      | 37/72 [3:19:05<3:40:01, 377.18s/it]



 53%|█████████████████████████████████████████▏                                    | 38/72 [3:21:04<2:49:54, 299.84s/it]



 54%|██████████████████████████████████████████▎                                   | 39/72 [3:21:39<2:01:17, 220.52s/it]



 56%|███████████████████████████████████████████▎                                  | 40/72 [3:28:48<2:30:55, 282.98s/it]



 57%|████████████████████████████████████████████▍                                 | 41/72 [3:29:55<1:52:46, 218.26s/it]



 58%|█████████████████████████████████████████████▌                                | 42/72 [3:30:21<1:20:16, 160.54s/it]



 60%|██████████████████████████████████████████████▌                               | 43/72 [3:38:34<2:05:46, 260.23s/it]



 61%|███████████████████████████████████████████████▋                              | 44/72 [3:40:13<1:38:53, 211.89s/it]



 62%|████████████████████████████████████████████████▊                             | 45/72 [3:40:54<1:12:16, 160.61s/it]



 64%|█████████████████████████████████████████████████▊                            | 46/72 [3:46:57<1:35:53, 221.30s/it]



 65%|██████████████████████████████████████████████████▉                           | 47/72 [3:49:36<1:24:23, 202.55s/it]



 67%|████████████████████████████████████████████████████                          | 48/72 [3:51:06<1:07:33, 168.91s/it]



 68%|█████████████████████████████████████████████████████                         | 49/72 [4:12:58<3:16:10, 511.75s/it]



 69%|██████████████████████████████████████████████████████▏                       | 50/72 [4:17:34<2:41:43, 441.06s/it]



 71%|███████████████████████████████████████████████████████▎                      | 51/72 [4:19:23<1:59:30, 341.44s/it]



 72%|████████████████████████████████████████████████████████▎                     | 52/72 [4:30:01<2:23:27, 430.38s/it]



 74%|█████████████████████████████████████████████████████████▍                    | 53/72 [4:35:50<2:08:35, 406.07s/it]



 75%|██████████████████████████████████████████████████████████▌                   | 54/72 [4:39:24<1:44:29, 348.31s/it]



 76%|███████████████████████████████████████████████████████████▌                  | 55/72 [4:44:07<1:33:10, 328.86s/it]



 78%|████████████████████████████████████████████████████████████▋                 | 56/72 [4:44:46<1:04:30, 241.93s/it]



 79%|███████████████████████████████████████████████████████████████▎                | 57/72 [4:45:08<43:56, 175.74s/it]



 81%|██████████████████████████████████████████████████████████████▊               | 58/72 [4:55:35<1:12:35, 311.14s/it]



 82%|█████████████████████████████████████████████████████████████████▌              | 59/72 [4:56:10<49:28, 228.38s/it]



 83%|██████████████████████████████████████████████████████████████████▋             | 60/72 [4:56:37<33:36, 168.08s/it]



 85%|██████████████████████████████████████████████████████████████████            | 61/72 [5:10:40<1:07:55, 370.54s/it]



 86%|████████████████████████████████████████████████████████████████████▉           | 62/72 [5:11:47<46:33, 279.38s/it]



 88%|██████████████████████████████████████████████████████████████████████          | 63/72 [5:13:08<32:59, 219.99s/it]



 89%|███████████████████████████████████████████████████████████████████████         | 64/72 [5:19:27<35:40, 267.57s/it]



 90%|████████████████████████████████████████████████████████████████████████▏       | 65/72 [5:22:29<28:13, 241.92s/it]



 92%|█████████████████████████████████████████████████████████████████████████▎      | 66/72 [5:23:50<19:20, 193.48s/it]



 93%|██████████████████████████████████████████████████████████████████████████▍     | 67/72 [5:47:16<46:27, 557.40s/it]



 94%|███████████████████████████████████████████████████████████████████████████▌    | 68/72 [5:52:20<32:05, 481.28s/it]



 96%|████████████████████████████████████████████████████████████████████████████▋   | 69/72 [5:54:35<18:52, 377.60s/it]



 97%|█████████████████████████████████████████████████████████████████████████████▊  | 70/72 [6:11:18<18:49, 564.96s/it]



 99%|██████████████████████████████████████████████████████████████████████████████▉ | 71/72 [6:14:03<07:25, 445.15s/it]



100%|████████████████████████████████████████████████████████████████████████████████| 72/72 [6:15:36<00:00, 339.36s/it]

In [None]:
result_df = pd.DataFrame(result_list)

In [10]:
result_df.to_csv('./NeuralNetworkResults.csv', index=False)

In [28]:
result_df = pd.read_csv('./NeuralNetworkResults.csv')

In [29]:
result_df = result_df.sort_values(by=['test_r2'], ascending=False).reset_index(drop=True)

In [31]:
result_df = result_df.round(4)

In [35]:
result_df.to_csv('NeuralNetwork_GridSearch_Results_Sorted.csv', index=False)