# Monk1 
Grid Search for Monk1 dataset model selection


In [1]:
from exclusiveAI.components.Validation.HoldOut import parallel_hold_out
from exclusiveAI.ConfiguratorGen import ConfiguratorGen
from exclusiveAI.Composer import Composer
from tqdm import tqdm
import pandas as pd
import numpy as np
from exclusiveAI.utils import plot_history
import os, json

#### Read Monk1 dataset 

In [2]:
file_path = "MLCup/Data/training_data_split.json"
# Load training and test data from the JSON file
with open(file_path, 'r') as jsonfile:
    data_dict = json.load(jsonfile)

training_data = np.array(data_dict['training_data'])
training_labels = np.array(data_dict['training_labels'])
test_data = np.array(data_dict['test_data'])
test_labels = np.array(data_dict['test_labels'])
train_idx = np.array(data_dict['train_idx'])
test_idx = np.array(data_dict['test_idx'])

In [3]:
def read_json_files(my_dir_path):
        data = pd.DataFrame()
        for file in os.listdir(my_dir_path):
            if file.endswith('.json'):
                with open(os.path.join(my_dir_path, file), 'r') as f:
                    my_data = []
                    json_file = json.load(f)
                    for elem in json_file.values():
                        for row in elem.values():
                            my_data.append(row)
                    data = pd.concat([data,  pd.DataFrame(my_data)], ignore_index=True, axis=0)
        return data
batch_size = 200
epochs = 500

if not os.path.exists('MLCup_models_configs.json'):
    dir_path = "MLCup/"
    
    all_json_data = read_json_files(dir_path)
    regularizations = all_json_data['regularization'].unique().tolist()
    learning_rates = all_json_data['learning_rate'].unique().tolist()
    momentums = all_json_data['momentum'].unique().tolist()
    num_of_layers = all_json_data['num_layers'].unique().tolist()
    num_of_units = set([unit1 for unit in all_json_data['num_of_units'] for unit1 in unit])
    initializers = all_json_data['initializers'].unique().tolist()
    activations = ["sigmoid", 'tanh']
    
    myConfigurator = ConfiguratorGen(random=False, learning_rates=learning_rates, regularizations=regularizations,
                                     loss_function=['mse'], optimizer=['sgd'],
                                     activation_functions=activations,
                                     number_of_units=num_of_units, number_of_layers=num_of_layers,
                                     momentums=momentums, initializers=initializers,
                                     input_shapes=training_data.shape,
                                     verbose=False, nesterov=True, outputs=3,
                                     callbacks=["earlystopping"], output_activation='linear', show_line=False,
                                     ).get_configs()
    print(len(myConfigurator))
    
    configs=[]
    if __name__ == '__main__':
        configs.append(
            parallel_hold_out(myConfigurator, training=training_data, training_target=training_labels, epochs=epochs,
                              batch_size=batch_size, num_models=10, workers=8, number_of_initializations=3, regression=True,
                              ))

        configs = pd.DataFrame(configs)
        # Save as json
        configs.to_json('MLCup_models_configs.json')
else: 
    with open('MLCup_models_configs.json', 'r') as f:
        configs = [data['0'] for data in json.load(f).values()]

In [4]:
models = []
configs=[configs[1]]
with tqdm(total=len(configs)) as pbar:
    for config in configs:
        config['callbacks'].append('wandb')
        model = Composer(config=config).compose(regression=True)
        model.train(inputs=training_data, input_label=training_labels, val=test_data, val_labels=test_labels, epochs=epochs, batch_size=batch_size, name=config['model_name'], disable_line=True)
        test_val = model.evaluate(input=test_data, input_label=test_labels, metrics=['mse', 'mee'])
        models.append((model.get_last()['mse'], np.std(np.array(model.history['mse'])), model.get_last()['mee'], test_val[0], test_val[1], model.curr_epoch, model.history['mee'], Composer(config=config).compose(), config, config['num_layers'], config['num_of_units'], config['model_name']))
        pbar.update(1)

# Convert the list of tuples to a DataFrame with one column for each element in the tuple
df = pd.DataFrame(models, columns=['Score', 'History_Std', 'Mee', 'Test_Score', 'Test_Mee', 'Trained_Epochs', 'History', 'Model', 'Config', 'Num_Layers', 'Num_of_Units', 'Name'])

  0%|          | 0/1 [00:00<?, ?it/s][34m[1mwandb[0m: Currently logged in as: [33mp-magos[0m ([33mexclusiveai[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train/mae,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁
train/mee,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/mse,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train/step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
val/mae,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁
val/mee,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val/mse,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
train/mae,3.07933
train/mee,6.11987
train/mse,43.51764
train/step,102.0
val/mae,3.38622
val/mee,6.71422
val/mse,52.67233


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01116072361118212, max=1.0)…

100%|██████████| 1/1 [00:08<00:00,  8.82s/it]


In [5]:
# Sort the DataFrame by the first element in the tuple (column 'Value')

df_sorted = df.sort_values(by=['Num_Layers', 'Score', 'Test_Score', 'History_Std'])
histories = {row[0]: row[1] for row in df_sorted[['Name', 'History']].values}
df_sorted

Unnamed: 0,Score,History_Std,Mee,Test_Score,Test_Mee,Trained_Epochs,History,Model,Config,Num_Layers,Num_of_Units,Name
0,43.517644,240.856372,6.119869,52.672333,6.714223,102,"[42.7237410058453, 29.861483755259822, 15.0815...",<exclusiveAI.components.NeuralNetwork.NeuralNe...,"{'regularization': 1e-08, 'learning_rate': 0.2...",1,[15],Model233614


In [6]:
# plot_history(histories)

# plot_history({"Model286727": histories['Model286727']})