In [23]:
# import packages

import numpy as np
import os
import pickle

# create folder to store best parameters
best_param_folder = '/home/erussek/projects/RNNs/best_hyper_params_control'
if not os.path.exists(best_param_folder):
    os.mkdir(best_param_folder)
best_param_dict = {}

# functions to generate loss curves for setting and get best parameters
def load_results(run_idx, part_name, fix_unit, d_model, sim_lr, human_lr):
    
    to_save_folder = '/scratch/gpfs/erussek/RNN_project/Hyper_Param_Search_Control'
    res_name_full = '{}_run_{}_fixunit_{}_d_model_{}_sim_lr_{}_human_lr_{}'.format(part_name,run_idx, fix_unit, d_model, sim_lr, human_lr)
    param_dict = {'part_name': part_name, 'fu': fix_unit, 'd_model':d_model, 'sim_lr':sim_lr, 'human_lr':human_lr}
    res_file_name = res_name_full + '.pickle'
    res_full_file_name = os.path.join(to_save_folder, res_file_name)
    file = open(res_full_file_name, 'rb')
    res = pickle.load(file)
    return res, param_dict

def load_results_all_runs(part_name, fix_unit, d_model, sim_lr, human_lr, n_runs = 2):
    
    results_list = []
    
    for run_idx in range(n_runs):
        res, param_dict = load_results(run_idx, part_name, fix_unit, d_model, sim_lr, human_lr)
        results_list.append(res)
        
    return results_list, param_dict

def get_learning_curve(part_name, fix_unit, d_model, sim_lr, human_lr, n_runs = 2, which_loss = 'simulation_loss_results'): 
    
    results_list, param_dict = load_results_all_runs(part_name, fix_unit, d_model, sim_lr, human_lr, n_runs = n_runs)
    
    loss_results_by_run = np.array([res[which_loss] for res in results_list])
    
    return np.mean(loss_results_by_run, axis=0), np.std(loss_results_by_run, axis=0)/np.sqrt(n_runs), results_list[0]['train_sequence_number'], results_list[0]['simulation_sequence_number'], results_list[0]['human_sequence_number'], param_dict

def get_best_params(res_losses, res_params, simulation_sequence_number, human_sequence_number):
    loss_arr = np.array(res_losses)
    
    min_val = np.min(loss_arr)
    
    min_flat_idx = np.argmin(loss_arr)
    (min_train_setting_idx,min_train_num_idx) = divmod(min_flat_idx, loss_arr.shape[1])

    best_params = res_params[min_train_setting_idx]
    
    best_params['best_sim_num'] = simulation_sequence_number[min_train_setting_idx][min_train_num_idx]
    best_params['best_hum_num'] = human_sequence_number[min_train_setting_idx][min_train_num_idx]

    best_params['min_loss'] = min_val
    
    return best_params


In [14]:
### Train on these vals... 
hidden_sizes = np.array([32, 64, 128, 256])
sim_lrs = np.array([1e-5, 1e-4, 1e-3])
human_lrs_train = np.array([1e-5, 1e-4, 1e-3])
human_lrs_finetune = np.array([1e-5, 1e-4, 1e-3])

n_runs = 2

part_names = ["Simulated_Only", "Human_Only", "Simulated_and_Human"]
# fixation types
fix_unit_types = ['ID', 'sum', 'prop', 'all']

In [15]:
load_results(0, part_names[0], fix_unit_types[0], hidden_sizes[0], sim_lrs[0], 0)

({'simulation_loss_results': array([19.64970618, 19.52325982, 19.39861405, 19.27435809, 19.15006   ,
         19.02729899, 18.9041664 , 18.78246939, 18.65992206, 18.53822196,
         18.4168697 , 18.29504985, 18.17353207, 18.05196297, 17.93219322,
         17.80944848, 17.68758535, 17.56495202, 17.44390386, 17.3211543 ,
         17.19919235, 17.07604754, 16.95182413, 16.82764614, 16.70458877,
         16.58137888, 16.45704353, 16.33085072, 16.20578533, 16.08097106,
         15.95534521, 15.82918054, 15.70252746, 15.57616192, 15.44870043,
         15.31883192, 15.19136441, 15.0643518 , 14.9362042 , 14.80900812,
         14.68148518, 14.55343956, 14.42380214, 14.29359776, 14.16408879,
         14.03588629, 13.90632433, 13.77661347, 13.64611262, 13.51545596,
         13.38387197, 13.25406724, 13.12367445, 12.99334711, 12.86192042,
         12.73033565, 12.59992814, 12.46807903, 12.33856589, 12.2081129 ,
         12.07869273, 11.94810498, 11.81870884, 11.69162166, 11.56157124,
         11

In [26]:
# get loss curve for each model / train_seq_part / model_name / part_name

res_params = {}
res_losses = {}
res_human_seq_nums = {}
res_sim_seq_nums = {}

# store all parameters and loss curves... 
for fu in fix_unit_types:
    for part_name in part_names:
            full_name = "{}_{}".format(part_name, fu)
            res_params[full_name] = []
            res_losses[full_name] = []
            res_human_seq_nums[full_name] = []
            res_sim_seq_nums[full_name] = []

            if part_name == 'Simulated_Only':
                full_name = "{}_{}".format('Simulated_Only_Pred_Human', fu)
                res_params[full_name] = []
                res_losses[full_name] = []
                res_human_seq_nums[full_name] = []
                res_sim_seq_nums[full_name] = []



# store all parameters and loss curves... 
for fix_unit in fix_unit_types:
    for part_name in part_names:
        for d_model in hidden_sizes:

            if part_name == 'Simulated_Only':

                for sim_lr in sim_lrs:

                    human_lr = 0
                    mean_loss, sem_loss, train_sequence_number,simulation_sequence_number, human_sequence_number, this_params  = get_learning_curve(part_name, fix_unit, d_model, sim_lr, human_lr, n_runs = 2, which_loss = 'simulation_loss_results')
                    full_name = "{}_{}".format(part_name, fu)

                    res_losses[full_name].append(mean_loss)
                    res_params[full_name].append(this_params)
                    res_human_seq_nums[full_name].append(human_sequence_number)
                    res_sim_seq_nums[full_name].append(simulation_sequence_number)
                    
                    
                    # get the human loss...
                    mean_loss, sem_loss, train_sequence_number,simulation_sequence_number, human_sequence_number, this_params  = get_learning_curve(part_name, fix_unit, d_model, sim_lr, human_lr, n_runs = 2, which_loss = 'human_loss_results')
                    full_name = "{}_{}".format('Simulated_Only_Pred_Human', fu)
                    res_losses[full_name].append(mean_loss)
                    res_params[full_name].append(this_params)
                    res_human_seq_nums[full_name].append(human_sequence_number)
                    res_sim_seq_nums[full_name].append(simulation_sequence_number)
                    
                    
            else: # not Sim Only
                        
                if part_name == 'Human_Only':
                    sim_lr = 0
                    these_human_lrs = human_lrs_train

                else:
                    sim_lr = .001
                    these_human_lrs = human_lrs_finetune
                    

                for human_lr in these_human_lrs:
                        mean_loss, sem_loss, train_sequence_number,simulation_sequence_number, human_sequence_number, this_params  = get_learning_curve(part_name, fix_unit, d_model, sim_lr, human_lr, n_runs = 2, which_loss = 'human_loss_results')
                        full_name = "{}_{}".format(part_name, fu)

                        res_losses[full_name].append(mean_loss)
                        res_params[full_name].append(this_params)
                        res_human_seq_nums[full_name].append(human_sequence_number)
                        res_sim_seq_nums[full_name].append(simulation_sequence_number)
                    
                
                
     
                    


In [20]:
best_params_control = {}

for fix_unit in fix_unit_types:
    for part_name in part_names:
                
        full_name = "{}_{}".format(part_name, fu)

        these_best_params = get_best_params(res_losses[full_name],res_params[full_name], res_sim_seq_nums[full_name], res_human_seq_nums[full_name])
        best_params_seq[full_name] = these_best_params

        if part_name == 'Simulated_Only':
            full_name = "{}_{}".format(tsp, 'Simulated_Only_Pred_Human', model_name, fu)
            these_best_params = get_best_params(res_losses[full_name],res_params[full_name], res_sim_seq_nums[full_name], res_human_seq_nums[full_name])
            best_params_seq[full_name] = these_best_params

# save
f = open(os.path.join(best_param_folder, "best_hyper_params.pkl"),"wb")

# write the python object (dict) to pickle file
pickle.dump(best_param_dict,f)

# close file
f.close()            

array([5.17011201, 5.22597867, 4.98371609, 5.03888014, 5.03869043,
       4.98141988, 5.00507413, 4.99601209, 4.96871218, 4.93030947,
       5.0280496 , 4.94046591, 4.95825697, 5.14752115, 4.97359946,
       4.89250467, 4.96015716, 4.94650763, 4.92947907, 4.90327157,
       4.89500638, 4.94511461, 4.93860693, 4.91925982, 4.85530254,
       4.92782534, 4.90469499, 4.98476245, 4.87818281, 4.93400221,
       4.92712507, 4.96111102, 4.86747284, 4.88458809, 4.91325142,
       4.87967699, 4.98304057, 4.90597173, 4.82423861, 4.86324577,
       4.90131412, 4.87503123, 4.88310558, 4.92206348, 4.86459783,
       4.88361721, 4.85726032, 4.92831937, 4.84910032, 4.86584831,
       4.93678661, 4.88175744, 4.8607062 , 4.94805004, 4.87499198,
       4.88306276, 4.89981397, 4.87231516, 4.87189773, 4.86771178,
       4.87701457, 4.89482921, 4.81903666, 4.84010283, 4.84925181,
       4.90674068, 4.86107674, 4.85994268, 4.84526507, 4.9223709 ,
       4.87835872, 4.90116756, 4.9302653 , 4.83583546, 4.90743