In [None]:

notebook_path = %pwd
import sys
sys.path.append(notebook_path)


In [None]:

import os
import glob

import pickle
import pprint
import gym
import copy
import pickle
import random
import warnings

import numpy as np
import pandas as pd
import itertools as it
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from pprint import pprint
from datetime import datetime

from sklearn.mixture import GaussianMixture, BayesianGaussianMixture

from puddle_world.envs import *
from explicit_env.soln import value_iteration, q_from_v, OptimalPolicy, policy_evaluation
from unimodal_irl.utils import empirical_feature_expectations

from multimodal_irl import bv_em_maxent, mixture_ll_maxent, responsibilty_matrix_maxent
from multimodal_irl.metrics import *

from unimodal_irl import sw_maxent_irl
from unimodal_irl.utils import pad_terminal_mdp
from unimodal_irl.metrics import ile_evd

from experiments.pw_exp import (
    TransitionType,
    NumGTModes,
    ExperimentConfig,
    get_experiment_fixed_inputs,
    get_experiment_train_inputs,
    get_experiment_test_inputs,
    get_experiment_outputs
) 


In [None]:

# Load the experimental static inputs
config = ExperimentConfig(TransitionType.DETERMINISTIC, NumGTModes.TWO)
fixed_inputs = get_experiment_fixed_inputs(config)

# Load the experimental results
result_filename = f"CanonicalPuddleWorld-{config.transition_type.value}-{config.num_gt_modes.value}mode-experiments.csv"
df = pd.read_csv(result_filename, index_col=False)

del df["Unnamed: 0"]

# Add columns for evaluation metrics
df["Negative Log Likelihood (Test Set)"] = np.nan
df["Negative Log Likelihood (Train Set)"] = np.nan
df["Normalized Information Distance (Test Set)"] = np.nan
df["Normalized Information Distance (Train Set)"] = np.nan
df["Adjusted Normalized Information Distance (Test Set)"] = np.nan
df["Adjusted Normalized Information Distance (Train Set)"] = np.nan
df["Min Cost Flow ILE"] = np.nan
df["Min Cost Flow EVD"] = np.nan

df


In [None]:


vals = []

for _exp in tqdm(range(len(df))):
    exp = df.iloc[_exp]
#     print("Evaluating... {}/{} ({}-{} {}-{}) ".format(
#         _exp,
#         len(df),
#         exp["Transition Type"],
#         int(exp["Num GT Clusters"]),
#         exp["Initialisation"],
#         int(exp["Num Learned Clusters"])
#     ), end="")
    
#     if exp["Num Rollouts"] != 100 or exp["Initialisation"] != 'gmm':
#         continue

    #print(exp)
    
    # Get training and test inputs
    train_inputs = get_experiment_train_inputs(fixed_inputs, exp)
    test_inputs = get_experiment_test_inputs(fixed_inputs, exp)
    
    # Get experiment outputs
    outputs = get_experiment_outputs(fixed_inputs, exp, test_inputs)
    
    # Compute the log-likelihood and clustering performance of this model on training data
    df.at[_exp, "Negative Log Likelihood (Train Set)"] = -1.0 * mixture_ll_maxent(
        fixed_inputs["environment_noreward"],
        train_inputs["rollouts"],
        outputs["mode_weights_learned"],
        outputs["state_reward_parameters_learned"]
    )
    df.at[_exp, "Normalized Information Distance (Train Set)"] = normalized_information_distance(
        outputs["responsibility_matrix_train"],
        train_inputs["responsibility_matrix_gt"]
    )
    df.at[_exp, "Adjusted Normalized Information Distance (Train Set)"] = adjusted_normalized_information_distance(
        outputs["responsibility_matrix_train"],
        train_inputs["responsibility_matrix_gt"]
    )
    
    # Compute the log-likelihood and clustering performance of this model on testing data
    df.at[_exp, "Negative Log Likelihood (Test Set)"] = -1.0 * mixture_ll_maxent(
        fixed_inputs["environment_noreward"],
        test_inputs["rollouts"],
        outputs["mode_weights_learned"],
        outputs["state_reward_parameters_learned"]
    )
    
    df.at[_exp, "Normalized Information Distance (Test Set)"] = normalized_information_distance(
        outputs["responsibility_matrix_test"],
        test_inputs["responsibility_matrix_gt"]
    )
    df.at[_exp, "Adjusted Normalized Information Distance (Test Set)"] = adjusted_normalized_information_distance(
        outputs["responsibility_matrix_test"],
        test_inputs["responsibility_matrix_gt"]
    )
    
    # Build error matrices
    ile_mat = np.zeros((train_inputs["num_learned_modes"], train_inputs["num_gt_modes"]))
    evd_mat = np.zeros_like(ile_mat)
    for learned_mode_idx in range(train_inputs["num_learned_modes"]):
        env_learned = copy.deepcopy(fixed_inputs["environment_noreward"])
        env_learned._state_rewards = outputs["state_reward_parameters_learned"][learned_mode_idx]
        for gt_mode_idx in range(train_inputs["num_gt_modes"]):
            env_gt = fixed_inputs["environments"][gt_mode_idx]
            gt_optimal_policy_state_value_function_gt = fixed_inputs["mode_optimal_policy_state_value_functions"][gt_mode_idx]
            
            ile_mat[learned_mode_idx, gt_mode_idx], evd_mat[learned_mode_idx, gt_mode_idx] = ile_evd(
                env_gt,
                env_learned,
                optimal_policy_value=gt_optimal_policy_state_value_function_gt
            )
    
    df.at[_exp, "Min Cost Flow ILE"], _ = min_cost_flow_error_metric(
        outputs["mode_weights_learned"],
        train_inputs["mode_weights_gt"],
        ile_mat
    )
    df.at[_exp, "Min Cost Flow EVD"], _ = min_cost_flow_error_metric(
        outputs["mode_weights_learned"],
        train_inputs["mode_weights_gt"],
        evd_mat
    )


In [None]:

filename_out = result_filename.replace(".csv", "-metrics.csv")
df.to_csv(filename_out, index=False)
