# Shallow MLP


# Preliminaries

Check for GPU

In [1]:
import tensorflow as tf

In [2]:
if tf.test.gpu_device_name() != '/device:GPU:0':
  print("No GPU found")
else:
  print("GPU ready: {}".format(tf.test.gpu_device_name()))

No GPU found


Import preprocessing helper functions

In [3]:
import sys
sys.path.append("../../helper-modules")
from preprocessing_utils import read_in_data, preprocess

Read in the data to df_train, df_val and df_test

In [4]:
df_train, df_val, df_test = read_in_data()

Create X_train, y_train, X_val, y_val, X_test, y_test

In [5]:
(X_train, y_train), (X_val, y_val), (X_test, y_test) = preprocess("MLP", df_train, df_test, df_val)

# Model Building

Imports

In [6]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from itertools import product
import time
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.utils import to_categorical

Enable seed setting for improved reproducibility

In [7]:
import os
import random
import numpy as np

In [8]:
def set_seed(seed=42):
  SEED=seed

  # 1. Set `PYTHONHASHSEED` environment variable at a fixed value
  os.environ['PYTHONHASHSEED']=str(SEED)

  # 2. Set `python` built-in pseudo-random generator at a fixed value
  random.seed(SEED)

  # 3. Set `numpy` pseudo-random generator at a fixed value
  np.random.seed(SEED)

  # 4. Set the `tensorflow` pseudo-random generator at a fixed value
  tf.random.set_seed(SEED)

Extract number of classes (10 classes)

In [9]:
NUM_CLASSES = df_test["label"].nunique() 

Model design

In [10]:
def compile_model(hidden_layers_sizes=(1024,), act_function="relu", dropout_rate=0.1, l_rate=0.001, reg='l2', opt_algo='adam'):
    """
    Compiles a shallow MLP model with one hidden layer with the given hyperparameters.
    
    Hyperparameter options:
    
    hidden_layers_sizes : tuple with the number of neurons in the hidden layers
    act_function : options include "relu", "sigmoid" and "tanh"
    dropout_rate : a float with the rate of dropout to be applied to the fully connected layer
    l_rate : a float with the learning rate
    reg : string with the regularization type e.g. "l2"
    opt_algo : the optimization algorithm, "adam" or "sgd"
    """  
    
    model = Sequential()

    # Add hidden layers with dropout to prevent overfitting
    for i, h in enumerate(hidden_layers_sizes):
      if i == 0:
        model.add(Dense(h, activation=act_function, input_shape=(1480,)))
      else: 
        model.add(Dense(h, activation=act_function))
      model.add(Dropout(dropout_rate))
    
    # LAST LAYER IS THE CLASSIFIER, THUS 12 POSSIBLE CLASSES
    model.add(Dense(NUM_CLASSES, activation='softmax', kernel_regularizer=reg))

    # Optimization algorithm
    if opt_algo == 'adam':
      opt = tf.keras.optimizers.Adam(learning_rate=l_rate)
    elif opt_algo == 'sgd':
      opt = tf.keras.optimizers.SGD(learning_rate=l_rate)

    model.compile(loss='categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])
    
    return model

# Experiment: Grid Search

Defining grid search hyperparameter options

In [11]:
hidden_layers_sizes = [(1,), (5,), (20,), (81,), (322,), (1288,)]
l_rates = [0.01, 0.005, 0.001, 0.0005, 0.0001]
dropout_rates = [0.05, 0.1, 0.2, 0.5]
act_functions=["relu"]
regs = ['l2']
opt_algos = ['adam']

all_hparams = [hidden_layers_sizes, l_rates, dropout_rates, act_functions, regs, opt_algos]

Conduct the grid search

In [12]:
def grid_search():
  """
  For each `hidden_layers_sizes` option (corresponding to a given number of parameters), conduct a grid search
  over the hyperparameter options, writing the validation results and model details for each model to a file.
  
  All trained models are saved as h5 files. 
  """
 
  best_model = None
  best_accuracy = 0
  best_history = None

  EPOCHS = 50
  BATCH_SIZE = 32
  STOPPING_PATIENCE = 5

  STARTING_POINT = 1

  with open('../training-results/ExperimentLogs_Shallow-MLP.csv', 'w') as log_file:
      log_file.write("ModelNumber;Timestamp;NumEpochs;ValAccuracy;NumParams;HiddenLayerSizes;LearningRate;DropoutRate;ActivationFunction;Regularization;OptimizationAlgorithm\n")

  for model_num, hparam_set in enumerate(list(product(*all_hparams))[0:], start=STARTING_POINT):
    # Extract hyperparams for current model from grid search grid
    hidden_layers_sizes_tup, l_rate, dropout_rate, act_function, reg, opt_algo = hparam_set

    # Define hyperparameters for the current model 
    hparams = {
        "hidden_layers_sizes" : hidden_layers_sizes_tup,
        "act_function" : act_function, 
        "dropout_rate" : dropout_rate, 
        "l_rate": l_rate,
        "reg" : reg,
        "opt_algo" : opt_algo
    }

    # Compile model and count number of parameters
    model = compile_model(**hparams)
    num_params = model.count_params()

    model_str = f"{hidden_layers_sizes_tup}; {l_rate}; {dropout_rate}; {act_function}; {reg}; {opt_algo}"
    print(f"Training model {model_num} with {num_params} params - {model_str}")

    # Prevent overfitting with early stopping
    early_stop = EarlyStopping(monitor='val_accuracy', patience=STOPPING_PATIENCE)
    # For saving best val accuracy model as an h5 (for test predictions later)
    model_check = ModelCheckpoint(f"../trained-models/Shallow-MLP_{model_num}.h5", monitor='val_accuracy', mode='max', verbose=0, save_best_only=True)

    # Set seed for reproducibility
    set_seed(42)

    # Train model
    # Note: one reason val accuracy might be higher than train accuracy during training is because dropout affects training but not validation
    history = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(X_val, y_val), callbacks=[early_stop, model_check])
    n_epochs = len(history.history['loss'])
                   
    # Time stamp when model finished training
    timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())

    # Extract best validation accuracy at inex 1 (index 0 has the loss)
    val_accuracy = max(history.history['val_accuracy'])
    print(f"Best val accuracy: {val_accuracy}")

    # Store results in log file
    with open('../training-results/ExperimentLogs_Shallow-MLP.csv', 'a') as log_file:
      log_file.write(f"{model_num};{timestamp};{n_epochs};{val_accuracy};{num_params};{model_str}\n")

    if val_accuracy > best_accuracy:
      best_accuracy = val_accuracy
      best_model = model
      best_history = history

  # Return the model with the highest validation accuracy
  return best_model, best_history


In [None]:
best_model, best_history = grid_search()

Training model 1 with 1507 params - (1,); 0.01; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Best val accuracy: 0.2742307782173157
Training model 2 with 1507 params - (1,); 0.01; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Best val accuracy: 0.07692307978868484
Training model 3 with 1507 params - (1,); 0.01; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Best val accuracy: 0.07692307978868484
Training model 4 with 1507 params - (1,); 0.01; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Best val accuracy: 0.07692307978868484
Training model 5 with 1507 params - (1,); 0.005; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Best val accuracy: 0.07692307978868484
Training model 6 with 1507 params - (1,); 0.005; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/5

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Best val accuracy: 0.2975480854511261
Training model 10 with 1507 params - (1,); 0.001; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Best val accuracy: 0.22735576331615448
Training model 11 with 1507 params - (1,); 0.001; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Best val accuracy: 0.22596153616905212
Training model 12 with 1507 params - (1,); 0.001; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Best val accuracy: 0.18081730604171753
Training model 13 with 1507 params - (1,); 0.0005; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Ep

Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Best val accuracy: 0.2980288565158844
Training model 14 with 1507 params - (1,); 0.0005; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Best val accuracy: 0.22149038314819336
Training model 15 with 1507 params - (1,); 0.0005; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Best val accuracy: 0.21591345965862274
Training model 16 with 1507 params - (1,); 0.0005; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Best val accuracy: 0.2001442313194275
Training model 17 with 1507 params - (1,); 0.0001; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoc

Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Best val accuracy: 0.2049519270658493
Training model 18 with 1507 params - (1,); 0.0001; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Best val accuracy: 0.2164423018693924
Training model 19 with 1507 params - (1,); 0.0001; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Best val accuracy: 0.20817308127880096
Training model 20 with 1507 params - (1,); 0.0001; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Best val accuracy: 0.20278845727443695
Training model 21 with 7483 params - (5,); 0.01; 0.05; relu; l2; adam
Epoch 1/50
Epo

Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Best val accuracy: 0.524134635925293
Training model 22 with 7483 params - (5,); 0.01; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Best val accuracy: 0.49620193243026733
Training model 23 with 7483 params - (5,); 0.01; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Best val accuracy: 0.48721152544021606
Training model 24 with 7483 params - (5,); 0.01; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Best val accuracy: 0.2740384638309479
Training model 25 with 7483 params - (5,); 0.005; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Best val accuracy: 0.5064423084259033
Training model 2

Epoch 7/50
Best val accuracy: 0.5017788410186768
Training model 32 with 7483 params - (5,); 0.001; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Best val accuracy: 0.32624998688697815
Training model 33 with 7483 params - (5,); 0.0005; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Best val accuracy: 0.6062980890274048
Training model 34 with 7483 params - (5,); 0.0005; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Best val accuracy: 0.5405288338661194
Training model 35 with 7483 params - (5,); 0.0005; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Best val accuracy: 0.51192307472229
Training model 36 with 7483 par

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Best val accuracy: 0.5941346287727356
Training model 38 with 7483 params - (5,); 0.0001; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Best val accuracy: 0.5481250286102295
Training model 39 with 7483 params - (5,); 0.0001; 0.2; relu; l2; adam
Epoch 1/

Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Best val accuracy: 0.5444711446762085
Training model 43 with 29893 params - (20,); 0.01; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Best val accuracy: 0.589519202709198
Training model 44 with 29893 params - (20,); 0.01; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Best val accuracy: 0.43850961327552795
Training model 45 with 29893 params - (20,); 0.005; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Best val accuracy: 0.6441346406936646
Training model 46 with 29893 params - (20,); 0.005; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50

Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Best val accuracy: 0.6537981033325195
Training model 47 with 29893 params - (20,); 0.005; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Best val accuracy: 0.6433172821998596
Training model 48 with 29893 params - (20,); 0.005; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Best val accuracy: 0.40149039030075073
Training model 49 with 29893 params - (20,); 0.001; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50


Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Best val accuracy: 0.7250000238418579
Training model 50 with 29893 params - (20,); 0.001; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Best val accuracy: 0.6944230794906616
Training model 51 with 29893 params - (20,); 0.001; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Best val accuracy: 0.6768268942832947
Training model 52 with 29893 params - (2

Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Best val accuracy: 0.7139903903007507
Training model 55 with 29893 params - (20,); 0.0005; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Best val accuracy: 0.6210576891899109
Training model 56 with 29893 params - (20,); 0.0005; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Best val accuracy: 0.44769230484962463
Training model 57 with 29893 params - (20,); 0.0001; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch

Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Best val accuracy: 0.640096127986908
Training model 60 with 29893 params - (20,); 0.0001; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Best val accuracy: 0.5211538672447205
Training model 61 with 121027 params - (81,); 0.01; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Best val accuracy: 0.6494230628013611
Training model 62 with 121027 params - (81,); 0.01; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
E

Epoch 22/50
Best val accuracy: 0.6157692074775696
Training model 65 with 121027 params - (81,); 0.005; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Best val accuracy: 0.7410576939582825
Training model 66 with 121027 params - (81,); 0.005; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Best val accuracy: 0.7322115302085876
Training model 67 with 121027 params - (81,); 0.005; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/

Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Best val accuracy: 0.7646634578704834
Training model 70 with 121027 params - (81,); 0.001; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Best val accuracy: 0.7641826868057251
Training model 71 with 121027 params - (81,); 0.001; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Best val accuracy: 0.7498557567596436
Training model 72 with 121027 params - (81,); 0.001; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoc

Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Best val accuracy: 0.7631250023841858
Training model 75 with 121027 params - (81,); 0.0005; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Best val accuracy: 0.75
Training model 76 with 121027 params - (81,); 0.0005; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19

Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Best val accuracy: 0.7513942122459412
Training model 78 with 121027 params - (81,); 0.0001; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Best val accuracy: 0.7467788457870483
Training model 79 with 121027 params - (81,); 0.0001; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Ep

Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Best val accuracy: 0.6159615516662598
Training model 83 with 481081 params - (322,); 0.01; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Best val accuracy: 0.6340384483337402
Training model 84 with 481081 params - (322,); 0.01; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Best val accuracy: 0.5869711637496948
Training model 85 with 481081 params - (322,); 0.005; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Best val accuracy: 0.7550961375236511
Training model 86 with 481081 params

Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Best val accuracy: 0.7384615540504456
Training model 87 with 481081 params - (322,); 0.005; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Best val accuracy: 0.7453365325927734
Training model 88 with 481081 params - (322,); 0.005; 0.5; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Best val accuracy: 0.6985576748847961
Training model 89 with 481081 params - (322,); 0.001; 0.05; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50


Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Best val accuracy: 0.7809615135192871
Training model 90 with 481081 params - (322,); 0.001; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Best val accuracy: 0.7833173274993896
Training model 91 with 481081 params - (322,); 0.001; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Best val accuracy: 0.7839422821998596
Training model 92 with 481081 params - (322,); 0

Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Best val accuracy: 0.7861538529396057
Training model 94 with 481081 params - (322,); 0.0005; 0.1; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Best val accuracy: 0.776394248008728
Training model 95 with 481081 params - (322,); 0.0005; 0.2; relu; l2; adam
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
E

Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50

In [None]:
df_train.shape


In [16]:
df_test.shape

(30000, 1481)

In [17]:
df_val.shape

(24000, 1481)