In [None]:
# Setup to analyse an MDP Playground experiment
from mdp_playground.analysis import MDPP_Analysis

# Set the following to True to save PDFs of plots that you generate below
save_fig = False

In [None]:
# Data loading
mdpp_analysis = MDPP_Analysis()

# load multiple experiments data

# experiments = { <exp_name_1>: <dir_name_1>, ... }
# For each experiment (also works in case of a single experiment):
# Set dir_name to the location where the CSV files from running an experiment were saved
# Set exp_name to the name that was given to the experiment when running it, i.e., with the -e option
experiments = {
#     "dqn_p_r_noises" : "/home/rajanr/spider_plots_data_discrete/",
#     "a3c_p_r_noises" : "/home/rajanr/spider_plots_data_discrete/",
#                "dqn_qbert_del" :"/home/rajanr/mdpp_8780992/",
#                 "rainbow_qbert_del": "/home/rajanr/mdpp_8815604",
#                     "a3c_qbert_del": "/home/rajanr/mdpp_3214031",
    "rainbow_hydra_0": "/home/rajanr/mdpp_9485031_temp/",
}

# Remember to set load_eval=False in case evaluation stats were not recorded and only training stats were recorded, otherwise there will be errors in loading the data in this cell.
list_exp_data = mdpp_analysis.load_data(experiments, load_eval=True, exp_type='random')

In [None]:
# 1-D: Plots showing reward after 20k timesteps when varying a single meta-feature
# Plots across runs: Training: with std dev across the runs
mdpp_analysis.plot_1d_dimensions(list_exp_data, save_fig, plot_type = "agent")
mdpp_analysis.plot_1d_dimensions(list_exp_data, save_fig, use_aucs=True, plot_type = "agent")

In [None]:
# Plots across runs: Evaluation: with std dev across the runs
mdpp_analysis.plot_1d_dimensions(list_exp_data, save_fig, train=False, plot_type = "agent")
mdpp_analysis.plot_1d_dimensions(list_exp_data, save_fig, train=False, use_aucs=True, plot_type = "agent")

In [None]:
# 1-D: Plots showing reward after 20k timesteps when varying a single meta-feature
# Plots across runs: Training: with std dev across the runs
mdpp_analysis.plot_1d_dimensions(list_exp_data, save_fig, plot_type = "metric")
mdpp_analysis.plot_1d_dimensions(list_exp_data, save_fig, use_aucs=True, plot_type = "metric")

In [None]:
# Plots across runs: Evaluation: with std dev across the runs
mdpp_analysis.plot_1d_dimensions(list_exp_data, save_fig, train=False, plot_type = "metric")
mdpp_analysis.plot_1d_dimensions(list_exp_data, save_fig, use_aucs=True, train=False, plot_type = "metric")

In [None]:
# This and the next cell do the same as cells 3 and 4 but plot episode mean lengths instead of episode reward
mdpp_analysis.plot_1d_dimensions(list_exp_data, save_fig, metric_num=-1, plot_type = "agent")

In [None]:
mdpp_analysis.plot_1d_dimensions(list_exp_data, save_fig, train=False, metric_num=-1, plot_type = "agent")

In [None]:
# 2-D heatmap plots across 10 runs: Training runs: with std dev across the runs
# There seems to be a bug with matplotlib - x and y axes tick labels are not correctly set even though we pass them. Please feel free to look into the code and suggest a correction if you find it.
mdpp_analysis.plot_2d_heatmap(list_exp_data, save_fig)

In [None]:
# 2-D heatmap plots across 10 runs: Evaluation runs: with std dev across the runs
mdpp_analysis.plot_2d_heatmap(list_exp_data, save_fig, train=False)

In [None]:
# Plot learning curves: Training: Each curve corresponds to a different seed for the agent
mdpp_analysis.plot_learning_curves(list_exp_data, save_fig)

In [None]:
# Plot learning curves: Evaluation: Each curve corresponds to a different seed for the agent
mdpp_analysis.plot_learning_curves(list_exp_data, save_fig, train=False)

In [None]:
weights = {}
# for continuous_experiments
weights['reward_noise'] = [.2, .2, .2, .2, .2, .0, .0, .0]
weights['action_loss_weight'] = [.33, .33, .33, .0, .0, .0]

# Plot radar(spider) plot: Training: across different meta-features
mdpp_analysis.plot_radar(list_exp_data, save_fig=save_fig, weights=weights)


In [None]:
# Plot radar(spider) plot: Evaluation: across different meta-features
mdpp_analysis.plot_radar(list_exp_data, save_fig=save_fig, train=False)

In [None]:
exp_data = list_exp_data[0]
train_stats = exp_data['train_stats']
eval_stats = exp_data['eval_stats']
train_curves = exp_data['train_curves']
eval_curves = exp_data['eval_curves']
train_aucs = exp_data['train_aucs']
eval_aucs = exp_data['eval_aucs']

In [None]:
import matplotlib.pyplot as plt
frc = [-1] + mdpp_analysis.final_rows_for_a_config
print(len(frc), frc, type(train_curves))
j = 20
for i in range(5):
    plt.plot(train_curves[frc[j+i]+1:frc[j+i+1], -2])
# plt.ylim([200, 250])
plt.grid()

In [None]:
print(train_aucs)

In [None]:
stats_data = train_stats
metric_num = -2
import numpy as np
import matplotlib.pyplot as plt
mean_data_ = np.mean(stats_data[..., metric_num], axis=-1) # the slice sub-selects the metric written in position metric_num from the "last axis of diff. metrics that were written" and then the axis of #seeds becomes axis=-1 ( before slice it was -2).
to_plot_ = np.squeeze(mean_data_)
std_dev_ = np.std(stats_data[..., metric_num], axis=-1) #seed
to_plot_std_ = np.squeeze(std_dev_)

#fig_width = len(self.tick_labels[0])
fig_width = 5
# plt.figure()
plt.figure(figsize=(fig_width, 1.5))

print(to_plot_.shape)
plt.bar([i for i in range(to_plot_.shape[0])], to_plot_, yerr=to_plot_std_)
plt.ylim([-5, 0])
plt.grid()
# plt.bar(self.tick_labels[0], to_plot_[:, 0], yerr=to_plot_std_[:, 0])

In [None]:
print(train_stats.shape, eval_stats.shape, train_curves.shape, eval_curves.shape)
print(train_stats[:,0:4,:,:])
ts_copy = train_stats.copy()
ts_copy[:,1:,:,:] = train_stats[:,:-1,:,:]
ts_copy[:,0,:,:] = train_stats[:,4,:,:]

tc_copy = train_curves.copy()
tc_copy[14955:,:] = train_curves[:-14955,:]
tc_copy[:14955,:] = train_curves[-14955:,:]
# 14955
# mdpp_analysis.tick_labels[0][1:5], mdpp_analysis.tick_labels[0][0] = mdpp_analysis.tick_labels[0][0:4], mdpp_analysis.tick_labels[0][4]
# mdpp_analysis.dims_values[1][1:5], mdpp_analysis.dims_values[1][0] = mdpp_analysis.dims_values[1][0:4], mdpp_analysis.dims_values[1][4]
print(ts_copy)
print(dir(mdpp_analysis))
print(mdpp_analysis.metric_names, mdpp_analysis.tick_labels)
print(mdpp_analysis.dims_values[1], mdpp_analysis.config_names, mdpp_analysis.dims_varied)
print(train_curves[1,:])

In [None]:
# some more analysis (for tune HPs)
import os
import sys
import numpy as np
from scipy.stats import spearmanr as spm
from scipy.stats import pearsonr as prs
import matplotlib.pyplot as plt

dir_name_config = 'experiments/'
file_ = dir_name_config + exp_name

config_file_path = os.path.abspath('/'.join(file_.split('/')[:-1]))
# print(file_.split('/')[:-1])
print("config_file_path:", config_file_path)
sys.path.insert(1, config_file_path) #hack
import importlib
config = importlib.import_module(file_.split('/')[-1], package=None)
print("Number of seeds for environment:", config.num_seeds)

value_tuples = []
for config_type, config_dict in config.var_configs.items():
    for key in config_dict:
        if 'seed' in key:
            print("Found seed axis:", key)
            pass
        else:
            assert type(config.var_configs[config_type][key]) == list, "var_config should be a dict of dicts with lists as the leaf values to allow each configuration option to take multiple possible values"
            value_tuples.append(config.var_configs[config_type][key])
print("value_tuples", value_tuples)

import itertools
cartesian_product_configs = list(itertools.product(*value_tuples))
print("Total number of configs. to run:", len(cartesian_product_configs))
print("Varying dims in mdpp_analysis.axis_labels (will have dummy_seed in there as 1st dim and may not show actual last varying dim because mdpp_analysis assumes last varying dim is seed (and ignores last config_name) and here seed is always the 1st dim):", mdpp_analysis.axis_labels)
# import itertools
# cartesian_product_configs = list(itertools.product(*config_vals))
for i in range(len(train_stats.shape)):
    if train_stats.shape[i] > 1:
        dummy_seeds_axis = i
        break
print("dummy_seeds_axis, train_stats.shape:", dummy_seeds_axis, len(train_stats.shape))

# dummy_seeds_axis = -1

def analysis(train_stats):
    mean_data_ = np.mean(train_stats[..., -2], axis=dummy_seeds_axis)
    std_data_ = np.std(train_stats[..., -2], axis=dummy_seeds_axis)
    print("Mean shape (after slice), Sliced shape:", mean_data_.shape, train_stats[..., -2].shape)
    flattened_mean = np.ravel(mean_data_)
    flattened_std = np.ravel(std_data_)
    ranks = np.argsort(flattened_mean)[::-1]
    print('sort of indices:\n', ranks)
    ranks_with_std = np.argsort(flattened_mean - flattened_std)[::-1]
    print('sort of indices (with std taken into account):\n', ranks_with_std)
    sorted_vals = np.sort(flattened_mean)[::-1]
    print('sort of values:\n', sorted_vals)
    sorted_vals_with_std = np.sort(flattened_mean - flattened_std)[::-1]
    print('sort of values (with std taken into account):\n', sorted_vals_with_std)
    print("TOP 3 configs (with std taken into account):")
    print(cartesian_product_configs[np.argsort(flattened_mean - flattened_std)[-1]]) 
    print(cartesian_product_configs[np.argsort(flattened_mean - flattened_std)[-2]])
    print(cartesian_product_configs[np.argsort(flattened_mean - flattened_std)[-3]])
    print("\nBOTTOM 3 configs (with std taken into account):")
    print(cartesian_product_configs[np.argsort(flattened_mean - flattened_std)[0]]) 
    print(cartesian_product_configs[np.argsort(flattened_mean - flattened_std)[1]])
    print(cartesian_product_configs[np.argsort(flattened_mean - flattened_std)[2]])
    plt.figure(figsize=(30, 1.5))
    plt.bar([i for i in range(len(flattened_mean))], flattened_mean, yerr=flattened_std)
    plt.show()
    return flattened_mean, flattened_mean - flattened_std

sorted_vals_t, sorted_vals_with_std_t = analysis(train_stats)
sorted_vals_e, sorted_vals_with_std_e = analysis(eval_stats)
print(spm(sorted_vals_t, sorted_vals_e))
print(spm(sorted_vals_t, sorted_vals_with_std_t))
print(spm(sorted_vals_with_std_t, sorted_vals_with_std_e))
print(spm(sorted_vals_e, sorted_vals_with_std_e))

In [None]:
print(config.var_configs)
print(train_stats.shape)

In [None]:
mdpp_analysis.config_names, mdpp_analysis.config_counts

In [None]:
import numpy as np
print(np.mean(train_stats, axis=2))
print(np.mean(train_aucs, axis=2))
from scipy import stats
print(stats.spearmanr([4,3,2,1,0], [4,3,2,1,0]))
print(np.mean(eval_stats, axis=2))
print(np.mean(eval_aucs, axis=2))
print(train_stats)
print(eval_stats)

In [None]:
# Setup to analyse an MDP Playground experiment
from mdp_playground.analysis import MDPP_Analysis

# Set the following to True to save PDFs of plots that you generate below
save_fig = False

In [None]:
# %%capture out1
mdpp_analysis = MDPP_Analysis()

experiments = {
#     "rainbow_hydra_0": "/home/rajanr/mdpp_9485031_temp/",
}

num_env_configs = 1000
for i in range(num_env_configs):
    experiments['rainbow_hydra_' + str(i)] = "/home/rajanr/mdpp_9485031/"

# Remember to set load_eval=False in case evaluation stats were not recorded and only training stats were recorded, otherwise there will be errors in loading the data in this cell.
list_exp_data_reward_scales = mdpp_analysis.load_data(experiments, load_eval=False, exp_type='random')

In [None]:
# print(len(list_exp_data))
# del list_exp_data[259]
print(list_exp_data_reward_scales[1]['train_stats'].to_numpy())

In [None]:
# Save configs in list_exp_data_ (hacky variable name)
import pickle
pik = "mdpp_hydra_reward_scales_pickle.dat"

import os.path
if not os.path.exists(pik):
    with open(pik, "wb") as f:
        pickle.dump(list_exp_data_, f)
    print("Saved file.")
else:
    print("File already exists!")


In [None]:
# Save configs in list_exp_data_reward_scales
import pickle
pik = "mdpp_hydra_reward_scales_pickle.dat"

import os.path
if not os.path.exists(pik):
    with open(pik, "wb") as f:
        pickle.dump(list_exp_data_reward_scales, f)
    print("Saved file.")
else:
    print("File already exists!")


In [None]:
# Save performance metrics in list_exp_data (usual filename)
import pickle
pik = "mdpp_hydra_pickle.dat"

import os.path
if not os.path.exists(pik):
    with open(pik, "wb") as f:
        pickle.dump(list_exp_data, f)
    print("Saved file.")
else:
    print("File already exists!")
    

In [None]:
# Load above 3 saved pickles
import pickle
pik = "mdpp_hydra_pickle.dat"
# pik = "mdpp_hydra_pickle_train.dat"
pik_conf = "mdpp_hydra_configs_pickle.dat"
pik_reward_scales = "mdpp_hydra_reward_scales_pickle.dat"


with open(pik, "rb") as f:
    list_exp_data = pickle.load(f)
#     print()

with open(pik_conf, "rb") as f:
    list_exp_data_with_configs = pickle.load(f)

with open(pik_reward_scales, "rb") as f:
    list_exp_data_reward_scales = pickle.load(f)

# print(list_exp_data)

del list_exp_data[259]
# del list_exp_data_with_configs[259]
del list_exp_data_reward_scales[259]

In [None]:
contents = []
for key in list_exp_data[0]:
    contents.append(key)
print(contents)
# print(list_exp_data[0]['train_stats'])
print(len(list_exp_data[0]['dims_values']))

print(len(list_exp_data_with_configs))
# print(list_exp_data_with_configs[0])

print(len(list_exp_data_reward_scales))
print(list_exp_data_reward_scales[0]['train_stats'])

In [None]:
#scratch
# print(list_exp_data_with_configs[0]['train_stats'])
i=0
print(list_exp_data[i]['train_stats']['episode_reward_mean']/list_exp_data_reward_scales[i]['train_stats'])
print(list_exp_data[i]['train_stats']['episode_reward_mean'].to_numpy()/list_exp_data_reward_scales[i]['train_stats'].to_numpy()) # ['reward_scale'] not needed since it's not a DataFrame anymore I think

In [None]:
num_env_configs = 999
num_agent_configs = 996

rows = []
cols = []
for i in range(num_env_configs):
    rows.append(list_exp_data[i]['train_stats'].shape[0])
    cols.append(list_exp_data[i]['train_stats'].shape[1])
    if rows[-1] != num_agent_configs:
        print(i)
# print(lens)
print(sum(rows)/num_env_configs, sum(cols)/num_env_configs)

In [None]:
# Build lists of top configs for train, eval, etc. below
# Collect performances of all agents on all envs for train, eval, etc.
# Calculate Pearson corr. coeff. for all combinations of train, eval, etc.

import numpy as np
from scipy.stats import pearsonr as prs
from scipy.stats import spearmanr as spm
from collections import Counter

num_env_configs = 999
num_agent_configs = 996
normalise_rewards = True

perf_sets = ['train', 'eval', 'train_auc', 'eval_auc']
top_configs = {}
top_configs_mins = {}
perfs_all_envs = {}

print("env x agent grid size:", num_env_configs, num_agent_configs)
for perf_set in perf_sets:
    top_configs[perf_set] = []
    top_configs_mins[perf_set] = []
    perfs_all_envs[perf_set] = np.zeros(shape=(num_env_configs, num_agent_configs))

corrs = {}
corrs_spm = {}
import itertools
corr_sets = ['train', 'eval', 'train_auc', 'eval_auc']
corr_combos = list(itertools.combinations(corr_sets, 2))

# corr_sets = ['train_eval', 'train_auc_eval_auc', 'eval_eval_auc', 'train_eval_auc', 'train_train_auc', 'eval_train_auc']
for corr_combo in corr_combos:
    corrs[corr_combo[0] + ' and ' + corr_combo[1]] = []
    corrs_spm[corr_combo[0] + ' and ' + corr_combo[1]] = []
    
for i in range(num_env_configs):
#     if i == 259:
#         continue
    perfs = {}
    perfs['train'] = list_exp_data[i]['train_stats']['episode_reward_mean'].to_numpy().copy()
    perfs['eval'] = list_exp_data[i]['eval_stats']['episode_reward_mean'].to_numpy().copy()
    perfs['train_auc'] = list_exp_data[i]['train_aucs']['episode_reward_mean'].to_numpy().copy()
    perfs['eval_auc'] = list_exp_data[i]['eval_aucs']['episode_reward_mean'].to_numpy().copy()
    
    if normalise_rewards:
        perfs['train'] /= list_exp_data_reward_scales[i]['train_stats'].to_numpy()
        perfs['eval'] /= list_exp_data_reward_scales[i]['train_stats'].to_numpy()
        perfs['train_auc'] /= list_exp_data_reward_scales[i]['train_stats'].to_numpy()
        perfs['eval_auc'] /= list_exp_data_reward_scales[i]['train_stats'].to_numpy()

    for perf_set in perf_sets:
        top_configs[perf_set].append(np.argmax(perfs[perf_set]))
        top_configs_mins[perf_set].append(np.argmin(perfs[perf_set]))
        perfs_all_envs[perf_set][i, :] = perfs[perf_set]
#     top_configs['eval'].append(np.argmax(perfs['eval']))
#     top_configs['train_auc'].append(np.argmax(perfs['train_auc']))
#     top_configs['eval_auc'].append(np.argmax(perfs['eval_auc']))
        
    for combo in corr_combos:
        corr_ = prs(perfs[combo[0]], perfs[combo[1]])[0]
        corrs[combo[0] + ' and ' + combo[1]].append(corr_)
        
        corr_ = spm(perfs[combo[0]], perfs[combo[1]])[0]
        corrs_spm[combo[0] + ' and ' + combo[1]].append(corr_)
        

#     corrs['train_eval']
#     corrs['train_auc_eval_auc'].append(prs(perfs['train_auc'], perfs['eval_auc']))
#     corrs['eval_eval_auc'].append(prs(perfs['eval'], perfs['eval_auc']))
#     corrs['train_eval_auc'].append(prs(perfs['train'], perfs['eval_auc']))
#     corrs['train_train_auc'].append(prs(perfs['train'], perfs['train_auc']))
#     corrs['eval_train_auc'].append(prs(perfs['eval'], perfs['train_auc']))




In [None]:
# Build Hydra portfolios and analyse

%matplotlib inline
# %matplotlib notebook

top_perfs_on_envs = [0.0] * num_env_configs
top_norm_perfs_on_envs = [0.0] * num_env_configs

sum_over_maxes = {}
sum_across_envs = {}
hydra_perfs = {}
portfolio = {}
hydra_perfs_mins = {}
portfolio_mins = {}


num_portfolio_configs = 10

sorted_agents = {}
for perf_set in perf_sets:
    sorted_agents[perf_set] = -np.sort(-perfs_all_envs[perf_set], axis=1)
#     print(perf_set, sorted_agents[perf_set])


for perf_set in perf_sets: # ['train']
    counts = Counter(top_configs[perf_set])
    counts_mins = Counter(top_configs_mins[perf_set])
# print(top_configs['train'])
# print(type(counts))
# counts.items()
#     print("Top configs on " + perf_set + ":", counts.most_common(num_portfolio_configs), len(counts))
    print("Frequency of being top agent config on an env:" + perf_set + ":", counts, len(counts))
    # mins:
    print("\nFrequency of being bottom agent config on an env:" + perf_set + ":", counts_mins, len(counts_mins))

    sum_across_envs[perf_set] = np.sum(perfs_all_envs[perf_set], axis=0)
    sum_over_maxes[perf_set] = np.sum(sorted_agents[perf_set], axis=0)
    best_conf_index = np.argmax(sum_across_envs[perf_set])
    hydra_perfs[perf_set] = [perfs_all_envs[perf_set][:, best_conf_index]]
    print("\nFirst portfolio member:", best_conf_index)
    portfolio[perf_set] = [best_conf_index]
    port_perfs = [np.sum(hydra_perfs[perf_set])]
    num_places_improvementss = [perfs_all_envs[perf_set].size]
    print("Current portfolio perf.:", port_perfs[0])
    # print("hydra_perfs[perf_set]", len(hydra_perfs[perf_set]), hydra_perfs[perf_set])
    # print("Sums:", sum_across_envs[perf_set])
    
    # Hydra using mins
    worst_conf_index = np.argmin(sum_across_envs[perf_set])
    hydra_perfs_mins[perf_set] = [perfs_all_envs[perf_set][:, worst_conf_index]]
    portfolio_mins[perf_set] = [worst_conf_index]
    port_perfs_mins = [np.sum(hydra_perfs_mins[perf_set])]
    num_places_improvementss_mins = [perfs_all_envs[perf_set].size]
    
    for i in range(num_portfolio_configs - 1):
    #     print("\nIteration counter, i + 2:", i + 2)
        curr_portfolio_perfs = np.array(hydra_perfs[perf_set]).reshape(-1, 1)
        improvements = perfs_all_envs[perf_set] - curr_portfolio_perfs
    #     print(improvements)
        improvements = np.clip(improvements, a_min=0.0, a_max=None) # Only improve where current portfolio is not better
        improvements_per_agent = np.sum(improvements, axis=0)
        num_places_improvements = np.count_nonzero(improvements)
    #     print("Improvements in:", num_places_improvements, "out of num_env_configs x num_agent_configs =", improvements.size)
        num_places_improvementss.append(num_places_improvements)
    #     print(improvements, improvements.shape)
        best_conf_index = np.argmax(improvements_per_agent)
        port_perfs.append(np.max(improvements_per_agent))
    #     print("Next portfolio member:", best_conf_index)
        portfolio[perf_set].append(best_conf_index)
        hydra_perfs[perf_set] = hydra_perfs[perf_set] + improvements[:, best_conf_index]
    #     print("Current portfolio perf.:", np.sum(hydra_perfs[perf_set]), "Improvement:", port_perfs[-1])
    
        # Hydra using mins
        curr_portfolio_perfs_mins = np.array(hydra_perfs_mins[perf_set]).reshape(-1, 1)
        improvements_mins = perfs_all_envs[perf_set] - curr_portfolio_perfs_mins
    #     print(improvements)
        improvements_mins = np.clip(improvements_mins, a_min=None, a_max=0.0) # Only improve where current portfolio is not better
        improvements_per_agent_mins = np.sum(improvements_mins, axis=0)
        num_places_improvements_mins = np.count_nonzero(improvements_mins)
    #     print("Improvements in:", num_places_improvements, "out of num_env_configs x num_agent_configs =", improvements.size)
        num_places_improvementss_mins.append(num_places_improvements_mins)
    #     print(improvements, improvements.shape)
        worst_conf_index = np.argmin(improvements_per_agent_mins)
        port_perfs_mins.append(np.min(improvements_per_agent_mins))
    #     print("Next portfolio member:", best_conf_index)
        portfolio_mins[perf_set].append(worst_conf_index)
        hydra_perfs_mins[perf_set] = hydra_perfs_mins[perf_set] + improvements_mins[:, worst_conf_index]



    print("Final portfolio:", portfolio[perf_set])
    print("Final portfolio perf.:", np.sum(hydra_perfs[perf_set]))
    print("Oracle perf.:", sum_over_maxes[perf_set][0])
    print("Final portfolio mins:", portfolio_mins[perf_set])

    import matplotlib.pyplot as plt

    plt.plot(port_perfs, label="Perf. improvements")
    # plt.show()

    plt.plot(num_places_improvementss, label="No. of config improvements")
    plt.legend()
    plt.xlabel('Portfolio building iter.')
    plt.ylabel('Reward or number of configs.')
    plt.yscale('log')
    plt.grid(which='both')
    plt.show()

#     print(port_perfs_mins, sum(port_perfs_mins))
#     print(num_places_improvementss_mins)
#     plt.plot(port_perfs_mins, label="Perf. improvements min")
#     plt.plot(num_places_improvementss_mins, label="No. of config improvements min")
#     plt.legend()
#     plt.show()

    
#     print("Max over sums")
    max_over_sums = -np.sort(-sum_across_envs[perf_set])
    plt.plot(sum_over_maxes[perf_set], label="Sum over maxes")
    plt.plot(max_over_sums, label="Max over sums")
    plt.legend()
    plt.grid(which='both')
    plt.show()
    
    
# for i in range(num_portfolio_configs):
    
#     for j in range(1, num_agent_configs):
#         for k in range(num_env_configs):
#             if hydra_perfs[perf_set][k] < perfs_all_envs[perf_set][k, j]:
                
    
    
# print(np.max(perfs_train_all_envs, axis=1))
# print(perfs_all_envs)
for combo in corr_combos:
#     print("Corr. on " + str(combo[0] + ' and ' + combo[1]), corrs[combo[0] + ' and ' + combo[1]])
    print("Max (across envs) corr. on " + str(combo[0] + ' and ' + combo[1]), max(corrs[combo[0] + ' and ' + combo[1]]))
    print("Min corr. on " + str(combo[0] + ' and ' + combo[1]), min(corrs[combo[0] + ' and ' + combo[1]]))
    print("Max spm corr. on " + str(combo[0] + ' and ' + combo[1]), max(corrs_spm[combo[0] + ' and ' + combo[1]]))
    print("Min spm corr. on " + str(combo[0] + ' and ' + combo[1]), min(corrs_spm[combo[0] + ' and ' + combo[1]]))
    
#     corrs[combo[0] + ' and ' + combo[1]]

# for i in range(num_env_configs):
#     corrs[combo[0] + ' and ' + combo[1]]


In [None]:
# Spearman correlation of agent configs on 100 random pairs of envs
import random

random.seed(0)

# From https://stackoverflow.com/a/48581219/11063709
n = 1000
A = list(range(n))
k = 2
m = 100

samples = set()
tries = 0
while len(samples) < m:
    samples.add(tuple(sorted(random.sample(A, k))))
    tries += 1

samples = list(samples)
# print(samples)
# print(tries)

corrs_spm_agents_on_envs = {}
for perf_set in perf_sets:
    corrs_spm_agents_on_envs[perf_set] = []

print("Spearman correlation of agent configs on 100 random pairs of envs:")
print("Mean, std, max, min")
for perf_set in perf_sets:    
    for i in range(len(samples)):
#         print(perfs[perf_set])
        env_0_perfs = perfs_all_envs[perf_set][samples[i][0], :]
        env_1_perfs = perfs_all_envs[perf_set][samples[i][1], :]
    
        corr_spm = spm(env_0_perfs, env_1_perfs)[0]
        corrs_spm_agents_on_envs[perf_set].append(corr_spm)
        
#     print(corrs_spm_agents_on_envs[perf_set])

    print(perf_set, np.mean(corrs_spm_agents_on_envs[perf_set]), np.std(corrs_spm_agents_on_envs[perf_set]), np.max(corrs_spm_agents_on_envs[perf_set]), np.min(corrs_spm_agents_on_envs[perf_set]))

In [None]:
configs = list_exp_data_with_configs[0]['train_stats']

# Not needed since it's only on env 0:
# if normalise_rewards:
#     configs['episode_reward_mean'] /= list_exp_data_with_configs[0]['train_stats']['reward_scale'].to_numpy()

num_cols_b = -11 # agent config begins at -11
num_cols_e = -3 # agent config ends at -3

portfolio_eval_auc_normalised = [634, 287, 488, 542, 570, 123, 471, 177, 465, 848]
portfolio_eval_auc_normalised_mins = [149, 864, 869, 478, 470, 306, 218, 735, 113, 349]

agent_conf_eval_auc_normalised = configs.iloc[portfolio_eval_auc_normalised, num_cols_b:num_cols_e]
print("portfolio_eval_auc_normalised\n", agent_conf_eval_auc_normalised)
print("\nMean:", agent_conf_eval_auc_normalised.mean(axis=0))

agent_conf_eval_auc_normalised_mins = configs.iloc[portfolio_eval_auc_normalised_mins, num_cols_b:num_cols_e]
# print("portfolio_eval_auc_normalised_mins\n", agent_conf_eval_auc_normalised_mins)
print("\nMean:", agent_conf_eval_auc_normalised_mins.mean(axis=0))

for perf_set in perf_sets: # ['train']
    print("\nperf_set:", perf_set)
    agent_conf = configs.iloc[portfolio[perf_set], num_cols_b:num_cols_e]
    print(agent_conf)
    print("\nMean:", agent_conf.mean(axis=0))
#     print("\nCorr.:", agent_conf.corr(method='pearson'))
#     print(portfolio[perf_set])
print(type(agent_conf['buffer_size']), agent_conf['buffer_size'].dtype)

In [None]:
# Save top and bottom Hydra configs to disk
agent_confs_top = agent_conf_eval_auc_normalised.to_dict(orient='records')
agent_confs_bottom = agent_conf_eval_auc_normalised_mins.to_dict(orient='records')

# print(agent_confs_top)
# print(agent_confs_bottom)

import pickle
pik = "mdpp_hydra_agent_configs_pickle.dat"

import os.path
if not os.path.exists(pik):
    with open(pik, "wb") as f:
        pickle.dump([agent_confs_top, agent_confs_bottom], f)
    print("Saved file.")
else:
    print("File already exists!")

agent_conf_eval_auc_normalised_mins.to_numpy().tolist()
agent_conf_eval_auc_normalised_mins.to_dict()
# agent_conf_eval_auc_normalised.to_records(index=False).tolist() + \
# agent_conf_eval_auc_normalised_mins.to_records(index=False).tolist()

In [None]:
import pickle
pik = "mdpp_hydra_agent_configs_pickle.dat"

with open(pik, "rb") as f:
    agent_confs_top, agent_confs_bottom = pickle.load(f)

print(agent_confs_top, agent_confs_bottom)

In [None]:
perfs_train = list_exp_data[0]['train_stats']['episode_reward_mean'].to_numpy()
perfs_eval = list_exp_data[0]['eval_stats']['episode_reward_mean'].to_numpy()
perfs_train_auc = list_exp_data[0]['train_aucs']['episode_reward_mean'].to_numpy()
perfs_eval_auc = list_exp_data[0]['eval_aucs']['episode_reward_mean'].to_numpy()

from scipy.stats import pearsonr as prs
from scipy.stats import spearmanr as spm
print("Corr.:", prs(perfs_train, perfs_eval))
print("Corr.:", prs(perfs_train, perfs_train_auc))
print("Corr.:", prs(perfs_eval, perfs_eval_auc))
print("Corr.:", prs(perfs_train, perfs_eval_auc))
print("Corr.:", prs(perfs_train_auc, perfs_eval_auc))
print("Corr.:", prs(perfs_eval, perfs_train_auc))

# print(perfs)
# print(perfs_eval)
import matplotlib.pyplot as plt
import numpy as np
plt.plot(perfs_train)
# plt.show()
plt.plot(perfs_eval)
plt.show()

top_10_train = np.argsort(perfs_train)[-10:-1]
top_10_eval = np.argsort(perfs_eval)[-10:-1]
print(top_10_train)
print(perfs[top_10_train])
print(top_10_eval)
print(perfs_eval[top_10_eval])
print(list_exp_data[0]['train_stats'].iloc[top_10_train, :])
print(list_exp_data[0]['eval_stats'].iloc[top_10_eval, :])


print(type(list_exp_data[0]['train_stats']))
print(perfs_train.shape)
print(perfs_eval.shape)
print(list_exp_data[0]['train_stats'].columns)
# list_exp_data[0]['train_stats'].groupby(['col_a', 'col_b']).ngroups