In [None]:

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from scipy.stats import norm
from IPython.display import display


In [None]:

def mean_ci(values, confidence_level=0.9):
    """Compute mean and symmetric confidence interval for a list of values
    
    Args:
        values (list): List of float
        confidence_level (float): Confidence level
    
    Returns:
        (float): Lower confidence interval
        (float): Mean value
        (float): Upper confidence interval
    """
    mean = np.mean(values)
    std = np.std(values)
    num_repeats = len(values)
    
    # Compute Z-factor
    critical_value = 1.0 - confidence_level
    z_factor = norm().ppf(1 - critical_value / 2)
    
    # Compute CI
    ci = z_factor * std / np.sqrt(num_repeats)
    
    return mean - ci, mean, mean + ci
    

def plot_suite(df, confidence_level=0.9):
    
    # Hue and line stype is initialization method
    # Marker style is number of learned modes
    # Plot number of demonstrations on X axis
    # Plot NID on y-axis
    # For each datapoint, plot mean and 90% confidence interval
    # Connect datapoints with linear lines
    
    metric_values = [
        "Iterations",
        "Runtime",
        "NLL",
        "aNID",
        "MCFILE",
        #"MILE"
    ]
    
    init_method_values = sorted(df["Initialisation"].unique(), reverse=True)
    num_learned_modes_values = sorted(df["Num Learned Clusters"].unique().astype(int))
    num_rollouts_values = sorted(df["Num Rollouts"].unique())
    
    sns.set_context('poster')
    fig, axes = plt.subplots(
        len(num_learned_modes_values),
        len(metric_values),
        sharex=True,
        figsize=(7 * len(metric_values), 6 * len(num_learned_modes_values)),
        dpi=300,
        gridspec_kw=dict(wspace=0.15, hspace=0.1)
    )
    
    # Sweep different metrics (horizontal plot identity)
    for metric_idx, metric in enumerate(metric_values):
        
        # Sweep number learned modes (y plot location)
        for learned_modes_idx, num_learned_modes in enumerate(num_learned_modes_values):

            plt.sca(axes[learned_modes_idx][metric_idx])

            # Sweep initialization methods
            for init_idx, init_method in enumerate(init_method_values):
                color_str = f"C{init_idx}"
                line_style = ['-', '--', ':'][init_idx]
                legend_str = {
                    "uniform": 'Uniform',
                    "kmeans": 'K-Means',
                    "gmm": 'GMM',
                }[init_method]
                marker_style = ['o', '^', 'D'][init_idx]
                
                # Sweep number of rollouts (x-axis)
                x = []
                y = []
                yerr = []
                for num_rollouts in num_rollouts_values:
                    
                    # Slice this experiment
                    exp = df.query("""
                    `Initialisation` == @init_method &"""
                    """`Num Learned Clusters` == @num_learned_modes &"""
                    """`Num Rollouts` == @num_rollouts
                    """)
                    x.append(num_rollouts)
                    
                    # Print title for this plot? (only for first row)
                    if metric == "Iterations":
                        ylow, ybar, yhigh = mean_ci(exp["Iterations"], confidence_level=confidence_level)
                        y.append(ybar)
                        yerr.append(yhigh - ybar)
                        title_str = "Iterations"
                    elif metric == "Runtime":
                        ylow, ybar, yhigh = mean_ci(exp["Runtime (s)"], confidence_level=confidence_level)
                        y.append(ybar)
                        yerr.append(yhigh - ybar)
                        plt.semilogy()
                        title_str = "$\log_{10}$ Runtime in seconds"
                    elif metric == "NLL":
                        nll = exp["Negative Log Likelihood"] / num_rollouts
                        #nll = exp["Negative Log Likelihood"]
                        ylow, ybar, yhigh = mean_ci(exp["Negative Log Likelihood"], confidence_level=confidence_level)
                        y.append(ybar)
                        yerr.append(yhigh - ybar)
                        title_str = "Negative Log Likelihood"
                    elif metric == "aNID":
                        ylow, ybar, yhigh = mean_ci(exp["Adjusted Normalized Information Distance"], confidence_level=confidence_level)
                        y.append(ybar)
                        yerr.append(yhigh - ybar)
                        plt.ylim(-0.05, 1.05)
                        title_str = "Normalized Information Distance\n(Adjusted for chance)"
                    elif metric == "MCFILE":
                        ylow, ybar, yhigh = mean_ci(exp["Min Cost Flow ILE"], confidence_level=confidence_level)
                        y.append(ybar)
                        yerr.append(yhigh - ybar)
                        title_str = "Min Cost Flow Inverse Learning Error"
                    elif metric == "MILE":
                        ylow, ybar, yhigh = mean_ci(exp["Mean ILE"], confidence_level=confidence_level)
                        y.append(ybar)
                        yerr.append(yhigh - ybar)
                        title_str = "Mean Inverse Learning Error"
                    else:
                        raise ValueError
                    
                    if learned_modes_idx == 0:
                        plt.title(title_str, pad=10)
                
                x = np.array(x)
                y = np.array(y)
                yerr = np.array(yerr)
                
                plt.plot(
                    x,
                    y,
                    color=color_str,
                    label=legend_str,
                    marker=marker_style
                )
                plt.fill_between(
                    x,
                    y - yerr,
                    y + yerr,
                    color=color_str,
                    alpha=0.2
                )
                
                plt.xticks(x)
                if learned_modes_idx == len(num_learned_modes_values) - 1:
                    plt.xlabel("Number of demonstrations")
                
                if metric_idx == 0:
                    row_label = f"{num_learned_modes} learned mode{'s' if num_learned_modes > 1 else ''}"
                    if num_learned_modes < exp["Num GT Clusters"].unique()[0]:
                        row_label += "\n(under-clustering)"
                    elif num_learned_modes > exp["Num GT Clusters"].unique()[0]:
                        row_label += "\n(over-clustering)"
                    plt.ylabel(row_label, labelpad=10)
            
            plt.gca().yaxis.tick_right()
            plt.tick_params(axis='y', length=0)


In [None]:
    
confidence_level = 0.90
df_filename = "CanonicalPuddleWorld-stochastic-2mode-experiments-metrics.csv"

df = pd.read_csv(df_filename)

sns.set()
plot_suite(df, confidence_level=confidence_level)
plt.legend(loc="upper right")
plt.suptitle(
    f"Stochastic PuddleWorld (2 GT modes) - Means and ± {int(confidence_level * 100)}% CIs",
    y=0.95
)

plt.savefig(df_filename.replace(".csv", ".pdf"), bbox_inches='tight', pad_inches=0)
plt.show()
plt.close()


In [None]:
    
# confidence_level = 0.9
# df_filename = "NChain-experiments-metrics.csv"

# df = pd.read_csv(df_filename)

# plot_suite(df, confidence_level=confidence_level)
# plt.legend(loc="upper right")
# plt.suptitle(
#     f"NChain (2 GT modes) - Means and ± {int(confidence_level * 100)}% CIs",
#     y=0.94
# )

# plt.savefig(df_filename.replace(".csv", ".pdf"), bbox_inches='tight', pad_inches=0)
# plt.show()
# plt.close()
