## Uncovering Trends Between Hyperparameters and Performance Metrics for Binary Classification using Feedforward Neural Networks

How well does evolutionary algorithms evolve multilayer perceptrons for my Breast Cancer binary classification task to find a Pareto optimal set of non-dominated solutions based on multiple objectives?

In [15]:
import pickle
from pathlib import Path
import sys
import pandas as pd
import json


# Add your evolutionary-ml root folder to the Python path
sys.path.append(str(Path("../workflow/netArchSearch")))

# Now you can import directly
from solution import Solution

In [None]:
# path to .dat files
solutions_path = '../outputs/solutions.dat'
evo_final_pareto_path = '../outputs/evo_final_pareto.dat'
evo_final_pareto_validated_path = '../outputs/evo_final_pareto_validated.dat'
#pareto_historical_validated_path = '../outputs/pareto_historical_validated.dat'

# open and load the pickle file
with open(solutions_path, 'rb') as f:
    solutions = pickle.load(f)

with open(evo_final_pareto_path, 'rb') as f:
    evo_final_pareto_set = pickle.load(f)

with open(evo_final_pareto_validated_path, 'rb') as f:
    evo_final_pareto_validated_set = pickle.load(f)

# with open(pareto_historical_validated_path, 'rb') as f:
#     pareto_historical_validated_set = pickle.load(f)

1125

In [17]:
# ensure that both the files save properly (information should be the same0)
len(solutions), len(evo_final_pareto_set)

# compare keys (objective tuples)
print("Common:", len(set(solutions.keys()) & set(evo_final_pareto_set.keys())))
print("New in final:", len(set(evo_final_pareto_set.keys()) - set(solutions.keys())))

Common: 1125
New in final: 0


In [18]:
# load jsonl file of all solutions from evo process
jsonl_file = '../outputs/all_solutions.jsonl'
data = []
with open(jsonl_file, 'r') as f:
    for line in f:
        line = line.strip()
        if line:  # skip empty lines
            data.append(json.loads(line))

# flatten nested dictionaries and rename columns
df = pd.json_normalize(data, sep='_')  # hyperparams_loss_function, metrics_accuracy, etc.
df = df.rename(
    columns={col: col.replace('hyperparams_', 'hyp_').replace('metrics_', 'obj_') for col in df.columns}
)

In [19]:
# look at the entire simulated population in evolution
df

Unnamed: 0,hyp_loss_function,hyp_hidden_layer_count,hyp_units_per_hidden_layer,hyp_activation_per_hidden_layer,hyp_optimizer,hyp_epochs,hyp_batch_size,hyp_feature_count,hyp_class_count,obj_total_layers,...,obj_training_time,obj_loss,obj_accuracy,obj_false_positive_rate,obj_false_negative_rate,obj_true_positive_rate,obj_true_negative_rate,obj_f1,obj_precision,obj_auc
0,binary_focal_crossentropy,16,"[115, 86, 84, 110, 80, 23, 20, 30, 44, 40, 94,...","[tanh, relu, sigmoid, relu, tanh, sigmoid, sig...",sgd,18,64,30,2,18,...,0.507657,0.160724,0.666667,1.000000,0.000000,1.000000,0.000000,0.800000,0.666667,0.653566
1,binary_focal_crossentropy,11,"[63, 17, 100, 36, 17, 49, 36, 85, 53, 59, 38]","[relu, sigmoid, relu, relu, relu, relu, sigmoi...",rmsprop,8,16,30,2,13,...,0.721492,0.013756,0.991228,0.000000,0.013158,0.986842,1.000000,0.000000,1.000000,0.998615
2,cosine_similarity,10,"[118, 126, 83, 38, 67, 96, 68, 100, 103, 53]","[tanh, tanh, tanh, tanh, tanh, tanh, tanh, sig...",rmsprop,17,16,30,2,12,...,0.829604,-0.666667,0.666667,1.000000,0.000000,1.000000,0.000000,0.800000,0.666667,0.965028
3,binary_focal_crossentropy,2,"[45, 102]","[relu, relu]",rmsprop,20,128,30,2,4,...,0.481059,0.015444,0.982456,0.026316,0.013158,0.986842,0.973684,0.051265,0.986842,0.998615
4,binary_crossentropy,3,"[32, 31, 53]","[relu, relu, relu]",rmsprop,7,128,30,2,5,...,0.300834,0.253914,0.921053,0.157895,0.039474,0.960526,0.842105,0.269704,0.924051,0.983726
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4306,binary_crossentropy,2,"[45, 3]","[relu, tanh]",rmsprop,8,128,30,2,4,...,0.340876,0.423036,0.938596,0.131579,0.026316,0.973684,0.868421,0.230745,0.936709,0.986150
4307,binary_focal_crossentropy,3,"[32, 31, 116]","[relu, relu, relu]",rmsprop,7,128,30,2,5,...,0.381944,0.041907,0.956140,0.026316,0.052632,0.947368,0.973684,0.051264,0.986301,0.989612
4308,binary_focal_crossentropy,2,"[45, 112]","[relu, tanh]",sgd,20,128,30,2,4,...,0.400073,0.066312,0.885965,0.236842,0.052632,0.947368,0.763158,0.374026,0.888889,0.967798
4309,binary_focal_crossentropy,1,[45],[relu],sgd,20,128,30,2,3,...,0.390022,0.065872,0.938596,0.105263,0.039474,0.960526,0.894737,0.189487,0.948052,0.976108


In [20]:
# look at only the objective functions
objectives = [col for col in df.columns if 'obj' in col]
display(df[objectives])

Unnamed: 0,obj_total_layers,obj_total_nodes,obj_training_time,obj_loss,obj_accuracy,obj_false_positive_rate,obj_false_negative_rate,obj_true_positive_rate,obj_true_negative_rate,obj_f1,obj_precision,obj_auc
0,18,1115,0.507657,0.160724,0.666667,1.000000,0.000000,1.000000,0.000000,0.800000,0.666667,0.653566
1,13,585,0.721492,0.013756,0.991228,0.000000,0.013158,0.986842,1.000000,0.000000,1.000000,0.998615
2,12,884,0.829604,-0.666667,0.666667,1.000000,0.000000,1.000000,0.000000,0.800000,0.666667,0.965028
3,4,179,0.481059,0.015444,0.982456,0.026316,0.013158,0.986842,0.973684,0.051265,0.986842,0.998615
4,5,148,0.300834,0.253914,0.921053,0.157895,0.039474,0.960526,0.842105,0.269704,0.924051,0.983726
...,...,...,...,...,...,...,...,...,...,...,...,...
4306,4,80,0.340876,0.423036,0.938596,0.131579,0.026316,0.973684,0.868421,0.230745,0.936709,0.986150
4307,5,211,0.381944,0.041907,0.956140,0.026316,0.052632,0.947368,0.973684,0.051264,0.986301,0.989612
4308,4,189,0.400073,0.066312,0.885965,0.236842,0.052632,0.947368,0.763158,0.374026,0.888889,0.967798
4309,3,77,0.390022,0.065872,0.938596,0.105263,0.039474,0.960526,0.894737,0.189487,0.948052,0.976108


In [None]:
def dominates(p, q, obj_cols):
    """
    Returns True if row p dominates row q based on objectives in obj_cols.
    For minimization: p dominates q if p is better or equal in all objectives 
    and strictly better in at least one.
    """
    pscores = p[obj_cols].values
    qscores = q[obj_cols].values
    
    # p dominates q if p <= q in all objectives and p < q in at least one
    all_better_or_equal = all(pscores <= qscores)
    at_least_one_better = any(pscores < qscores)
    
    return all_better_or_equal and at_least_one_better

def remove_dominated(df, obj_cols):
    """
    Removes dominated rows from df based on objectives.
    """
    non_dominated_indices = set(df.index)

    for i, p in df.iterrows():
        if i in non_dominated_indices:
            # Only check against other non-dominated solutions
            dominated = {j for j in non_dominated_indices 
                        if i != j and dominates(p, df.loc[j], obj_cols)}
            non_dominated_indices -= dominated

    return df.loc[sorted(non_dominated_indices)]

In [None]:
# create the pareto set from the historical solutions
pareto_historical_set = remove_dominated(df, objectives)
pareto_historical_set

Unnamed: 0,hyp_loss_function,hyp_hidden_layer_count,hyp_units_per_hidden_layer,hyp_activation_per_hidden_layer,hyp_optimizer,hyp_epochs,hyp_batch_size,hyp_feature_count,hyp_class_count,obj_total_layers,...,obj_training_time,obj_loss,obj_accuracy,obj_false_positive_rate,obj_false_negative_rate,obj_true_positive_rate,obj_true_negative_rate,obj_f1,obj_precision,obj_auc
1,binary_focal_crossentropy,11,"[63, 17, 100, 36, 17, 49, 36, 85, 53, 59, 38]","[relu, sigmoid, relu, relu, relu, relu, sigmoi...",rmsprop,8,16,30,2,13,...,0.721492,0.013756,0.991228,0.000000,0.013158,0.986842,1.000000,0.000000,1.000000,0.998615
4,binary_crossentropy,3,"[32, 31, 53]","[relu, relu, relu]",rmsprop,7,128,30,2,5,...,0.300834,0.253914,0.921053,0.157895,0.039474,0.960526,0.842105,0.269704,0.924051,0.983726
14,cosine_similarity,2,"[45, 102]","[relu, relu]",rmsprop,20,128,30,2,4,...,0.452408,-0.666667,0.280702,0.315789,0.921053,0.078947,0.684211,0.324324,0.333333,0.257271
17,binary_focal_crossentropy,11,"[63, 17, 100, 36, 17, 49, 36, 85, 53, 96, 38]","[relu, sigmoid, relu, relu, relu, relu, sigmoi...",rmsprop,8,16,30,2,13,...,0.684649,0.019434,0.973684,0.078947,0.000000,1.000000,0.921053,0.145920,0.962025,0.998269
21,binary_focal_crossentropy,11,"[63, 17, 100, 36, 17, 49, 36, 85, 53, 59, 38]","[relu, sigmoid, relu, relu, relu, relu, sigmoi...",rmsprop,8,16,30,2,13,...,0.686648,0.023276,0.956140,0.131579,0.000000,1.000000,0.868421,0.230793,0.938272,0.998615
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4304,cosine_similarity,1,[45],[relu],rmsprop,8,16,30,2,3,...,0.442016,-0.666667,0.754386,0.684211,0.026316,0.973684,0.315789,0.711013,0.740000,0.809211
4307,binary_focal_crossentropy,3,"[32, 31, 116]","[relu, relu, relu]",rmsprop,7,128,30,2,5,...,0.381944,0.041907,0.956140,0.026316,0.052632,0.947368,0.973684,0.051264,0.986301,0.989612
4308,binary_focal_crossentropy,2,"[45, 112]","[relu, tanh]",sgd,20,128,30,2,4,...,0.400073,0.066312,0.885965,0.236842,0.052632,0.947368,0.763158,0.374026,0.888889,0.967798
4309,binary_focal_crossentropy,1,[45],[relu],sgd,20,128,30,2,3,...,0.390022,0.065872,0.938596,0.105263,0.039474,0.960526,0.894737,0.189487,0.948052,0.976108


In [42]:
# calculate what is the search space
df[df.obj_accuracy == df.obj_accuracy.max()]

Unnamed: 0,hyp_loss_function,hyp_hidden_layer_count,hyp_units_per_hidden_layer,hyp_activation_per_hidden_layer,hyp_optimizer,hyp_epochs,hyp_batch_size,hyp_feature_count,hyp_class_count,obj_total_layers,...,obj_training_time,obj_loss,obj_accuracy,obj_false_positive_rate,obj_false_negative_rate,obj_true_positive_rate,obj_true_negative_rate,obj_f1,obj_precision,obj_auc
260,binary_focal_crossentropy,1,[45],[relu],rmsprop,20,128,30,2,3,...,0.467447,0.020645,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
262,binary_focal_crossentropy,1,[45],[relu],rmsprop,20,128,30,2,3,...,0.467447,0.020645,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
357,binary_crossentropy,3,"[32, 40, 53]","[relu, relu, relu]",rmsprop,13,128,30,2,5,...,0.417824,0.077094,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
483,binary_focal_crossentropy,1,[45],[relu],rmsprop,20,128,30,2,3,...,0.467447,0.020645,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
634,binary_focal_crossentropy,1,[45],[relu],rmsprop,20,128,30,2,3,...,0.467447,0.020645,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
1026,binary_focal_crossentropy,10,"[63, 17, 100, 36, 17, 49, 36, 127, 59, 38]","[relu, sigmoid, relu, relu, relu, relu, sigmoi...",adam,8,16,30,2,12,...,1.063067,0.008093,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
1414,binary_focal_crossentropy,1,[45],[relu],rmsprop,20,128,30,2,3,...,0.467447,0.020645,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
1517,binary_focal_crossentropy,2,"[45, 12]","[relu, relu]",rmsprop,12,16,30,2,4,...,0.494472,0.008819,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
1556,binary_focal_crossentropy,1,[45],[relu],rmsprop,20,128,30,2,3,...,0.467447,0.020645,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0
1654,binary_crossentropy,2,"[45, 605]","[relu, relu]",rmsprop,12,64,30,2,4,...,0.413738,0.043737,1.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0


In [None]:
df.to_csv('evolution.csv', index=False)

In [None]:
# create dashboard that allows you to intialize the evolution process

# prompt user to input a data file or choose one hardcoded 

# prompt user to select objectives to consider (check and make sure the objectives make sense w the data)

# 
