In [1]:
import sys
import gpytorch
import numpy as np
import pandas as pd
import torch
from datetime import datetime
from scipy.stats import qmc
import itertools
from itertools import combinations_with_replacement, combinations, permutations
import copy

import bo_methods_lib
# from bo_methods_lib.bo_methods_lib.bo_functions_generic import gen_theta_set, clean_1D_arrays
from bo_methods_lib.bo_methods_lib.GPBO_Classes_New import * #Fix this later
from bo_methods_lib.bo_methods_lib.GPBO_Class_fxns import * #Fix this later
from bo_methods_lib.bo_methods_lib.analyze_data import * #Fix this later
from bo_methods_lib.bo_methods_lib.GPBO_Classes_plotters import * #Fix this later
import pympler
import pickle

from pympler import asizeof

from matplotlib import pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import signac
meth_name_val_list = [1, 2, 3, 4, 5, 6]
criteria_dict = {"cs_name_val" : 10,
                 "meth_name_val": {"$in": meth_name_val_list}}

project = signac.get_project()
save_csv = True
save_figs = False
analyzer = General_Analysis(criteria_dict, project, save_csv)
plotters = Plotters(analyzer, save_figs)

In [3]:
### Get Best Data from ep experiment
# df_all_jobs, job_list, theta_true = analyzer.get_df_all_jobs()
df_best, job_list_best = analyzer.get_best_data()

for i in range(len(job_list_best)): 
    print(job_list_best[i].id)
    
df_best

In [None]:
def compare_muller_heat_map_hps(file_path, run_num, bo_iter, x_val_num, theta_choice, seed, gp_hp_alt, gen_meth_theta = Gen_meth_enum(1)):
    """
    Compares simulation and GP data for the Muller potential over a heat map
    
    Parameters
    ----------
    file_path: str, The file path of the data
    run_num: int, The run you want to analyze. Note, run_num 1 corresponds to index 0
    bo_iter: int, The BO iteration you want to analyze. Note, bo_iter 1 corresponds to index 0
    x_val_num: int, The number of x values to make heat maps over in each dimension of x data
    theta_choice: 1D ndarray, or None, the theta_value to evaluate the heat map at. If none, chosen based off seed
    seed: int, the seed for theta_choice if applicable
    
    Returns
    -------
    test_mesh: ndarray, meshgrid of x values to generate the heat map 
    y_sim: ndarray, The simulated values for test_mesh
    gp_mean: ndarray, The gp mean values for test_mesh
    gp_var: ndarray, The gp variance values for test_mesh
    theta_value: ndarray, the parameter set evaluated
    exp_data.x_vals: ndarray, experimental x data
    idcs_to_plot: list of str, all parameter names
    
    """
    run_num -= 1
    bo_iter -= 1
    loaded_results = open_file_helper(file_path)
    #get exp_data and theta_opt
    exp_data = loaded_results[run_num].exp_data_class
    gp_emulator = loaded_results[run_num].list_gp_emulator_class[bo_iter]
#     print(gp_emulator.train_data.y_vals)
    #Create gp emulator copy, alter the training data to be correct and
    gp_emulator = copy.deepcopy(gp_emulator)
    length_diff = len(gp_emulator.train_data.y_vals) - len(gp_emulator.feature_train_data)
    if length_diff > 0:
        gp_emulator.train_data.y_vals = gp_emulator.train_data.y_vals[:-length_diff]
        gp_emulator.train_data.theta_vals = gp_emulator.train_data.theta_vals[:-length_diff]
        gp_emulator.train_data.x_vals = gp_emulator.train_data.x_vals[:-length_diff]
    
    #Set new hp param info
    gp_emulator.lenscl = None
    gp_emulator.outputscl = 1000
    gp_emulator.kernel = Kernel_enum(1)
    gp_emulator.retrain_GP = 25
    
    #Set New GP Model:
    gp_model_new = gp_emulator.set_gp_model()
    train_data = gp_emulator.feature_train_data
    trainy = gp_emulator.train_data.y_vals
    
#     K_inv = np.linalg.inv(gp_model_new.kernel(train_data))  # Assuming initial constant value is 1.0
# #     print(K_inv.shape, train_data.shape)
#     Tau2 = (train_data.T @ K_inv @ train_data) / len(train_data)
#     gp_model_new.kernel.k1.k1.constant_value = np.average(Tau2)
#     gp_model_new.kernel.k1.k1.constant_value_bounds = (np.min(trainy), np.max(trainy))
#     print(gp_model_new.kernel.k1.k1.constant_value_bounds)
    
    #Train new GP model
    gp_emulator.train_gp(gp_model_new)     
    print(gp_model_new.kernel_)
        
    #Change gp hps if applicable
#     if gp_hp_alt is not None:
#         gp_emulator.fit_gp_model.kernel_.k1.k2.length_scale = gp_hp_alt
#         gp_emulator.fit_gp_model.kernel.k1.k2.length_scale = gp_emulator.fit_gp_model.kernel_.k1.k2.length_scale
    
    simulator = loaded_results[run_num].simulator_class
    sep_fact = loaded_results[run_num].configuration["Separation Factor"]
    method = GPBO_Methods(Method_name_enum(loaded_results[run_num].configuration["Method Name Enum Value"]))
    
    enum_ep = Ep_enum(loaded_results[run_num].configuration["Exploration Bias Method Value"])
    ep_at_iter = loaded_results[run_num].results_df["Exploration Bias"].iloc[bo_iter]
    ep_bias = Exploration_Bias(None, ep_at_iter, enum_ep, None, None, None, None, None, None, None)
    
    if loaded_results[0].configuration["Normalize"] == True:
        theta_true = loaded_results[run_num].simulator_class.theta_true_norm
    else:
        theta_true = loaded_results[run_num].simulator_class.theta_true
        
    theta_obj_min =  loaded_results[run_num].results_df["Theta Min Obj Cum."].iloc[bo_iter]
    theta_ei_max = loaded_results[run_num].results_df["Theta Max EI"].iloc[bo_iter]
    train_theta = loaded_results[run_num].list_gp_emulator_class[bo_iter].train_data.theta_vals
    
    if loaded_results[run_num].heat_map_data_dict is not None:
        param_names = list(loaded_results[run_num].heat_map_data_dict.keys())[0]
    else:
        cs_params, method, gen_meth_theta = get_driver_dependencies_from_results(loaded_results, run_num)
        driver = GPBO_Driver(cs_params, method, simulator, exp_data, gp_emulator.gp_sim_data, gp_emulator.gp_sim_data, gp_emulator.gp_val_data, gp_emulator.gp_val_data, gp_emulator, ep_bias, gen_meth_theta)
        loaded_results[run_num].heat_map_data_dict = driver.create_heat_map_param_data()
        param_names = list(loaded_results[run_num].heat_map_data_dict.keys())[0]
        
    idcs_to_plot = [loaded_results[run_num].simulator_class.theta_true_names.index(name) for name in param_names]
    idcs_to_plot = [loaded_results[run_num].simulator_class.theta_true_names.index(name) for name in param_names]
    
    #Generate simulation data for x given 1 theta
    simulator.seed = seed
    sim_data_x = simulator.gen_sim_data(1, x_val_num, Gen_meth_enum(1), Gen_meth_enum(2), sep_fact, False)
    if theta_choice is not None:
        sim_data_x.theta_vals[:] = theta_choice
        sim_data_x.y_vals = simulator.gen_y_data(sim_data_x, 0, 0)
    
    
    theta_value = sim_data_x.theta_vals[0]
    featurized_sim_x_data = gp_emulator.featurize_data(sim_data_x)

    sim_data_x.gp_mean, sim_data_x.gp_var = gp_emulator.eval_gp_mean_var_misc(sim_data_x, featurized_sim_x_data)
    
    #Create a meshgrid with x and y values fron the uniwue theta values of that array
    test_mesh = sim_data_x.x_vals.reshape(x_val_num, x_val_num,-1).T

    #Calculate valus
    y_sim = sim_data_x.y_vals.reshape(x_val_num, x_val_num).T
    gp_mean = sim_data_x.gp_mean.reshape(x_val_num, x_val_num).T
    gp_var = sim_data_x.gp_var.reshape(x_val_num, x_val_num).T
    
    if method.emulator == False and method.obj.value ==2:
        gp_mean = np.exp(sim_data_x.gp_mean.reshape(x_val_num, x_val_num).T)
        gp_var  =  np.exp(sim_data_x.gp_var.reshape(x_val_num, x_val_num).T)

    #Set param info
    param_info_dict = {"true":theta_true, "min_sse":theta_obj_min, "max_ei":theta_ei_max, "train":train_theta,
                                "names":param_names, "idcs":idcs_to_plot} 
    all_z_data = [y_sim, gp_mean, gp_var]
    
    return test_mesh,  all_z_data, theta_value, param_info_dict

In [None]:
#Plot gp comparison for all methods
analyzer.save_csv = False
file_path = job_list_best[i].fn("BO_Results.gz")
test_mesh,  all_z_data, theta_value, param_info_dict = compare_muller_heat_map_hps(file_path, df_best["Run Number"].iloc[0], 
                                                                                   df_best["BO Iter"].iloc[0], 
                                                                                   5, None, 1, None, 
                                                                                   gen_meth_theta = Gen_meth_enum(1))
levels = [100, 100, 100]
pair = 0
title = "Theta = " + str(theta_value)
z_choice = ["sse_sim", "sse_mean", "sse_var"]
z_titles = ["sse_sim", "sse_mean", "sse_var"]
plotters.plot_nlr_heat_maps(test_mesh, all_z_data, z_titles, levels, param_info_dict, False, title = None)

In [None]:
# #Make Heat maps for all pairs
# z_choices = ["sse_sim", "sse_mean", "sse_var", "ei"]
# levels = [100,100,100,100]

# #Loop over best jobs
# for i in range(len(job_list_best)):   
#     #Get jobs, runs, and iters to examine
#     job = job_list_best[i]
#     run_num = df_best["Run Number"].iloc[i]
#     bo_iter = df_best["BO Iter"].iloc[i]
    
#     #Back out number of parameters
#     string_val = df_best["Theta Min Obj"].iloc[0]
#     try:
#         numbers = [float(num) for num in string_val.replace('[', '').replace(']', '').split()]
#     except:
#         numbers = [float(num) for num in string_val]
        
#     #Create list of parameter pair combinations
#     dim_theta = len(np.array(numbers).reshape(-1, 1))
#     dim_list = np.linspace(0, dim_theta-1, dim_theta)
#     pairs = len((list(combinations(dim_list, 2))))
    
#     #Loop over parameter pairs
#     for pair in range(pairs):
#         plotters.plot_hms_gp_compare(job, run_num, bo_iter, pair, z_choices, levels)