In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter, FuncFormatter, MaxNLocator
import gymnasium as gym
import math
import mlflow
from mlflow.tracking import MlflowClient
import optuna
import os

from algorithms import *
from model_functions import *
from policy_utils import *
from experiment_result_utils import *
from constants import *
np.random.seed(constants.SEEDS[3])


In [None]:
nS = 20
nrows = 20
num_bins = 10
experiment_name = f"FrozenLake_{nrows}x{nrows}_{num_bins}"
#experiment_name = f"RiverSwim{nS}"

mlflow.set_tracking_uri(MLFLOW_URI)
experiment_id = get_or_create_experiment(experiment_name)
mlflow.set_experiment(experiment_name)


In [None]:
results = get_parent_artifacts(experiment_id=experiment_id)

In [None]:
from matplotlib.ticker import ScalarFormatter, FuncFormatter

In [None]:
def plot_experiment_results(results, title=None, figsize=(8, 5), 
                            reduce:bool=False, extend:bool=False,
                            conf_int:bool=False, x_scale:int=500):
    plt.close('all')
    rew = [r["tests_rewards"] for r in results]
    reduced_len = np.min([len(r[0]) for r in rew])
    fig, ax = plt.subplots(figsize=figsize)

    title = "Average Return" if title is None else title
    for i, result in enumerate(results):
        rewards = result["tests_rewards"]
        label = result["label"]
        avg_rewards = np.average(rewards, axis=0)
        if reduce:
            avg_rewards = avg_rewards[:reduced_len]

        x_values = np.arange(len(avg_rewards)) * x_scale
        ax.plot(x_values, avg_rewards, label=label, c=COLORS[i], marker = MARKERS[i%len(MARKERS)],
                markevery=MARKER_FREQUENCY[i%len(MARKER_FREQUENCY)],
                linestyle=LINE_STYLES[i%len(LINE_STYLES)],
                )
        
        if conf_int:
            std_dev = np.std(rewards, axis=0)
            if reduce:
                std_dev = std_dev[:reduced_len]
            
            n_samples = len(rewards)
            std_err = std_dev / np.sqrt(n_samples)
            ci = 1.96
            upper_bound = avg_rewards + ci * std_err
            lower_bound = avg_rewards - ci * std_err
            plt.fill_between(x_values, lower_bound, upper_bound, color=COLORS[i], alpha=0.2)
    
   
    ax.legend(loc="lower right", framealpha=1.)
    ax.set_title(title)
    ax.set_xlabel('Million Episodes')
    ax.set_ylabel('Avg Return')
    ax.grid()
    plt.show()
    return fig


In [None]:
fig = plot_experiment_results(results, reduce=True, conf_int=True)
