In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os
import pickle

In [2]:
best_param_folder = '/home/erussek/projects/RNNs/best_hyper_params_sequential'
if not os.path.exists(best_param_folder):
    os.mkdir(best_param_folder)
best_param_dict = {}

In [19]:
def load_results(run_idx, part_name,train_seq_part, fu, model_name, d_model, sim_lr, human_lr, n_head, n_layers):
    
    to_save_folder = '/scratch/gpfs/erussek/RNN_project/Hyper_Param_Search_Sequential'
    res_name_full = '{}_{}_{}_run_{}_model_name_{}_d_model_{}_sim_lr_{}_human_lr_{}_n_head_{}_n_layers_{}'.format(part_name,train_seq_part, fu, run_idx, model_name, d_model, sim_lr, human_lr, n_head, n_layers)
    param_dict = {'part_name': part_name, 'train_seq_part': train_seq_part, 'fu': fu, 'model_name':model_name, 'd_model':d_model, 'sim_lr':sim_lr, 'human_lr':human_lr, 'n_head':n_head, 'n_layers':n_layers}
    res_file_name = res_name_full + '.pickle'
    res_full_file_name = os.path.join(to_save_folder, res_file_name)
    file = open(res_full_file_name, 'rb')
    res = pickle.load(file)
    return res, param_dict

def load_results_all_runs(part_name,train_seq_part, fu, model_name, d_model, sim_lr, human_lr, n_head, n_layers, n_runs = 2):
    
    results_list = []
    
    for run_idx in range(n_runs):
        res, param_dict = load_results(run_idx, part_name,train_seq_part, fu, model_name, d_model, sim_lr, human_lr, n_head, n_layers)
        results_list.append(res)
        
    return results_list, param_dict

def get_learning_curve(part_name,train_seq_part, fu, model_name, d_model, sim_lr, human_lr, n_head, n_layers, n_runs = 2, which_loss = 'simulation_loss_results'): 
    
    results_list, param_dict = load_results_all_runs(part_name,train_seq_part, fu, model_name, d_model, sim_lr, human_lr, n_head, n_layers, n_runs = n_runs)
    
    loss_results_by_run = np.array([res[which_loss] for res in results_list])
    
    return np.mean(loss_results_by_run, axis=0), np.std(loss_results_by_run, axis=0)/np.sqrt(n_runs), results_list[0]['train_sequence_number'], results_list[0]['simulation_sequence_number'], results_list[0]['human_sequence_number'], param_dict

def get_best_params(res_losses, res_params, simulation_sequence_number, human_sequence_number):
    loss_arr = np.array(res_losses)
    
    min_val = np.min(loss_arr)
    
    min_flat_idx = np.argmin(loss_arr)
    (min_train_setting_idx,min_train_num_idx) = divmod(min_flat_idx, loss_arr.shape[1])

    best_params = res_params[min_train_setting_idx]
    
    best_params['best_sim_num'] = simulation_sequence_number[min_train_setting_idx][min_train_num_idx]
    best_params['best_hum_num'] = human_sequence_number[min_train_setting_idx][min_train_num_idx]

    best_params['min_loss'] = min_val
    
    return best_params



In [20]:
## These are the varieties of model types, training data types, and training input representation types that we want to find the best params for
model_names = ['LSTM','GRU','Transformer']
train_seq_parts = ['fix_only', 'fix_and_choice']
fix_unit_types = ['ID', 'all']

# These are the hyper-parameters that we want to vary / find the best of
hidden_sizes = np.array([32, 64, 128, 256])
sim_lrs = np.array([1e-4, 1e-3])
human_lrs_train = np.array([1e-4, 1e-3])
human_lrs_finetune = np.array([1e-5, 1e-4, 1e-3])

# For the transformer only
transformer_attention_heads = [4]
transformer_layers = [2]

n_runs = 2


part_names = ["Simulated_Only", "Human_Only", "Simulated_and_Human"]

In [5]:
load_results(0, part_names[0],train_seq_parts[0], fix_unit_types[0], model_names[0], hidden_sizes[0], sim_lrs[1], 0, 0, 0)

({'simulation_loss_results': array([4.85965735, 4.83791438, 4.83987013, 4.8395671 , 4.83546928,
         4.80007529, 4.72777243, 4.70001642, 4.61235152, 4.57637447,
         4.65835556, 4.6756411 , 4.62488444, 4.57074903, 4.54084338,
         4.57110047, 4.60072294, 4.56265923, 4.53306781, 4.56171766,
         4.55471793, 4.52578311, 4.57631099, 4.53145914, 4.50021669,
         4.51612635, 4.59120116, 4.5304355 , 4.51122344, 4.50346893,
         4.49617356, 4.54740226, 4.48313949, 4.49795447, 4.49169941,
         4.50472619, 4.61922103, 4.56994079, 4.49205108, 4.51842412,
         4.52584043, 4.51278733, 4.54520623, 4.5544816 , 4.5031466 ,
         4.51331602, 4.46537223, 4.54183449, 4.48968677, 4.47456452,
         4.4782313 , 4.46404071, 4.47585484, 4.52545494, 4.49539779,
         4.47536902, 4.4698094 , 4.48235159, 4.52275106, 4.47745813,
         4.53714383, 4.50893974, 4.44910313, 4.43547544, 4.44947015,
         4.45636815, 4.4297367 , 4.46171434, 4.4251087 , 4.60634892,
       

In [26]:
# get loss curve for each model / train_seq_part / model_name / part_name

res_params = {}
res_losses = {}
res_human_seq_nums = {}
res_sim_seq_nums = {}

# store all parameters and loss curves... 
for fu in fix_unit_types:
    for tsp in train_seq_parts:
        for part_name in part_names:
            for model_name in model_names:
                full_name = "{}_{}_{}_{}".format(tsp, part_name, model_name, fu)
                res_params[full_name] = []
                res_losses[full_name] = []
                res_human_seq_nums[full_name] = []
                res_sim_seq_nums[full_name] = []

                if part_name == 'Simulated_Only':
                    full_name = "{}_{}_{}_{}".format(tsp, 'Simulated_Only_Pred_Human', model_name, fu)
                    res_params[full_name] = []
                    res_losses[full_name] = []
                    res_human_seq_nums[full_name] = []
                    res_sim_seq_nums[full_name] = []
            
# for simulated only -- find both the params that maximize simulated data performance 
# and also the params that maximize human performance
for fu in fix_unit_types:
    for model_name in model_names:
        for tsp in train_seq_parts:
            for part_name in part_names:
                
                # loop through params that we we want to max over...
                for d_model in hidden_sizes:

                    if part_name == 'Simulated_Only':
                        for sim_lr in sim_lrs:
                            
                            human_lr = 0
                            
                            if model_name == 'Transformer':
                                for n_layers in transformer_layers:
                                    for n_head in transformer_attention_heads:
                                        
                                        # get the simulation loss...
                                        mean_loss, sem_loss, train_sequence_number,simulation_sequence_number, human_sequence_number, this_params  = get_learning_curve(part_name,tsp, fu, model_name, d_model, sim_lr, human_lr, n_head, n_layers, n_runs = 2, which_loss = 'simulation_loss_results')
                                        full_name = "{}_{}_{}_{}".format(tsp, part_name, model_name, fu)

                                        res_losses[full_name].append(mean_loss)
                                        res_params[full_name].append(this_params)
                                        
                                        res_human_seq_nums[full_name].append(human_sequence_number)
                                        res_sim_seq_nums[full_name].append(simulation_sequence_number)

                                        
                                        # get the human loss...
                                        mean_loss, sem_loss, train_sequence_number,simulation_sequence_number, human_sequence_number, this_params  = get_learning_curve(part_name,tsp, fu, model_name, d_model, sim_lr, human_lr, n_head, n_layers, n_runs = 2, which_loss = 'human_loss_results')
                                        full_name = "{}_{}_{}_{}".format(tsp, 'Simulated_Only_Pred_Human', model_name, fu)
                                        res_losses[full_name].append(mean_loss)
                                        res_params[full_name].append(this_params)
                                        
                                        res_human_seq_nums[full_name].append(human_sequence_number)
                                        res_sim_seq_nums[full_name].append(simulation_sequence_number)

        
                            else: # not transformer
                                n_head = 0
                                n_layers = 0   
                        
                                # get the simulation loss...
                                mean_loss, sem_loss, train_sequence_number,simulation_sequence_number, human_sequence_number, this_params  = get_learning_curve(part_name,tsp, fu, model_name, d_model, sim_lr, human_lr, n_head, n_layers, n_runs = 2, which_loss = 'simulation_loss_results')
                                full_name = "{}_{}_{}_{}".format(tsp, part_name, model_name, fu)

                                res_losses[full_name].append(mean_loss)
                                res_params[full_name].append(this_params)
                                
                                res_human_seq_nums[full_name].append(human_sequence_number)
                                res_sim_seq_nums[full_name].append(simulation_sequence_number)

                                # get the human loss...
                                mean_loss, sem_loss, train_sequence_number,simulation_sequence_number, human_sequence_number, this_params  = get_learning_curve(part_name,tsp, fu, model_name, d_model, sim_lr, human_lr, n_head, n_layers, n_runs = 2, which_loss = 'human_loss_results')
                                full_name = "{}_{}_{}_{}".format(tsp, 'Simulated_Only_Pred_Human', model_name, fu)
                                res_losses[full_name].append(mean_loss)
                                res_params[full_name].append(this_params)
                                
                                res_human_seq_nums[full_name].append(human_sequence_number)
                                res_sim_seq_nums[full_name].append(simulation_sequence_number)
                                
                                
                    else: # not Sim Only
                        
                        if part_name == 'Human_Only':
                            sim_lr = 0
                            these_human_lrs = human_lrs_train
                            
                        else:
                            sim_lr = .001
                            these_human_lrs = human_lrs_finetune
                            
                        
                        for human_lr in these_human_lrs:

                            
                            
                            if model_name == 'Transformer':

                                for n_layers in transformer_layers:
                                    for n_head in transformer_attention_heads:

                                        # get the human loss...
                                        mean_loss, sem_loss, train_sequence_number,simulation_sequence_number, human_sequence_number, this_params  = get_learning_curve(part_name,tsp, fu, model_name, d_model, sim_lr, human_lr, n_head, n_layers, n_runs = 2, which_loss = 'human_loss_results')
                                        full_name = "{}_{}_{}_{}".format(tsp, part_name, model_name, fu)

                                        res_losses[full_name].append(mean_loss)
                                        res_params[full_name].append(this_params)
                                        res_human_seq_nums[full_name].append(human_sequence_number)
                                        res_sim_seq_nums[full_name].append(simulation_sequence_number)

                            else: # not a transformer

                                n_head = 0
                                n_layers = 0   

                                # get the simulation loss...
                                mean_loss, sem_loss, train_sequence_number,simulation_sequence_number, human_sequence_number, this_params  = get_learning_curve(part_name,tsp, fu, model_name, d_model, sim_lr, human_lr, n_head, n_layers, n_runs = 2, which_loss = 'human_loss_results')
                                full_name = "{}_{}_{}_{}".format(tsp, part_name, model_name, fu)

                                res_losses[full_name].append(mean_loss)
                                res_params[full_name].append(this_params)
                                res_human_seq_nums[full_name].append(human_sequence_number)
                                res_sim_seq_nums[full_name].append(simulation_sequence_number)
                            
                            
            
            
            

In [34]:
best_params_seq = {}

for fu in fix_unit_types:
    for tsp in train_seq_parts:
        for part_name in part_names:
            for model_name in model_names:
                
                full_name = "{}_{}_{}_{}".format(tsp, part_name, model_name, fu)
                
                these_best_params = get_best_params(res_losses[full_name],res_params[full_name], res_sim_seq_nums[full_name], res_human_seq_nums[full_name])
                best_params_seq[full_name] = these_best_params

                if part_name == 'Simulated_Only':
                    full_name = "{}_{}_{}_{}".format(tsp, 'Simulated_Only_Pred_Human', model_name, fu)
                    these_best_params = get_best_params(res_losses[full_name],res_params[full_name], res_sim_seq_nums[full_name], res_human_seq_nums[full_name])
                    best_params_seq[full_name] = these_best_params
                    
# save
f = open(os.path.join(best_param_folder, "best_hyper_params.pkl"),"wb")

# write the python object (dict) to pickle file
pickle.dump(best_param_dict,f)

# close file
f.close()                    