In [None]:
import numpy as np
import matplotlib.pyplot as plt
import gymnasium as gym
import math
import mlflow
from mlflow.tracking import MlflowClient
import optuna
import os

from TMDP import TMDP
from algorithms import *
from model_functions import *
from policy_utils import *
from experiment_result_utils import *
from constants import *

from RiverSwim import *
from CurriculumQ import CurriculumQ

In [None]:
#River Swim Environment
nS = 100
uniform_restart = False
num_runs = 2


small = 5e-3
large = 1.
nA = 2
gamma = 0.999

if uniform_restart:
    mu = np.ones(nS) / nS
else:
    mu = np.zeros(nS)
    mu[1] = 1 

xi = np.ones(nS) * 1/nS

episodes = 15000000
checkpoint_step=500
test_episodes = 10000

param_decay=True
debug = False

lam = 1
experiment_results = []
tests_returns = []

In [None]:
model_lr = 0.15869281717397965
tau = 0.6
batch_size = 20
exp_rate = 0.4
eps_model = compute_eps_model(gamma, tau, episodes/10)

In [None]:

run_name = f"CurrQ_{uniform_restart}"

mlflow.set_tracking_uri(MLFLOW_URI)
experiment_name = f"RiverSwim_{nS}_{uniform_restart}"
experiment_id = get_or_create_experiment(experiment_name)
mlflow.set_experiment(experiment_name)

save_path = f"results/{experiment_name}/run_{run_name}"
label = run_name.split("_")[0]

In [None]:
def run_experiment(index, seed, run_name):
    sub_run_name = f"{run_name}_{index}"
    
    with mlflow.start_run(nested=True, run_name=sub_run_name):
        # Environment specific configuration   
        
        set_policy_seed(seed)
        env = RiverSwim(nS, mu, small=small, large=large, seed=seed)
        
        # Environment independent configuration
        tmdp = TMDP(env, xi, tau=tau, gamma=gamma, seed=seed)
        tmdp.update_tau(tau)
        curr_Q = CurriculumQ(tmdp, checkpoint_step=checkpoint_step)

        curr_Q.train(model_lr, batch_size=batch_size, 
                lam=lam, exp_rate=exp_rate,
                episodes=episodes,
                eps_model=eps_model,
                param_decay=param_decay,
                debug=debug,)
    
        avg_return = np.average(curr_Q.reward_records[-10:])/batch_size
        
        mlflow.log_metric("Avg Return", avg_return)
        
        test_policies_return = test_Q_policies(tmdp, curr_Q.Qs, test_episodes)
        
        result_dict = {
            "Qs" : curr_Q.Qs,
            "taus" : curr_Q.taus,
            "reward_records" : curr_Q.reward_records,
            "test_policies_return" : test_policies_return,
            "index" : index,
        }

        tests_returns.append(test_policies_return)
        experiment_results.append(result_dict)
       


In [None]:
def run_experiments(num_runs=10):
    with mlflow.start_run(run_name=run_name):
        
        for i in range(num_runs):
            seed = constants.SEEDS[i]
            run_experiment(i, seed, run_name)
        
        experiment_dict = {
            "tests_returns": tests_returns,
            "num_runs": num_runs,
            "label": label,
            "checkpoint_step": checkpoint_step,
        }
        mlflow.set_tags(tags={
            "seed": seed,
            "tau": tau,
            "gamma": gamma,
            "checkpoint_step": checkpoint_step,
            "test_episodes": test_episodes,
            "uniform_restart": uniform_restart,
            "episodes": episodes,
            "model_lr": model_lr,
            "batch_size": batch_size,
            "lam": lam,
            "eps_model": eps_model,
            "exp_rate": exp_rate,
        })
        
        try:
            save_to_mlflow(experiment_dict)
        except Exception as e:
            print(e)
            print("Something went wrong saving the experiment results to MLFlow.")
            print("Saving locally instead.")
            time.sleep(5)
            save(save_path, experiment_dict)

        rewards_fig = plot_avg_test_return(tests_returns, f"{run_name[:-3]} Avg Return on {num_runs} runs")
        try:
            mlflow.log_figure(figure=rewards_fig, artifact_file="reward_image.png")
        except Exception as e:
            print(e)
            print("Something went wrong saving the figure to MLFlow.")
            print("Saving locally instead.")
            time.sleep(5)
            rewards_fig.savefig(save_path+"/reward_image.png")

In [None]:
def run_experiments(num_runs=10):
    with mlflow.start_run(run_name=run_name):
        
        for i in range(num_runs):
            seed = constants.SEEDS[i]
            run_experiment(i, seed, run_name)
        
        experiment_dict = {
            "tests_returns": tests_returns,
            "num_runs": num_runs,
            "label": label,
            "checkpoint_step": checkpoint_step,
        }
        mlflow.set_tags(tags={
            "seed": seed,
            "tau": tau,
            "gamma": gamma,
            "checkpoint_step": checkpoint_step,
            "test_episodes": test_episodes,
            "uniform_restart": uniform_restart,
            "episodes": episodes,
            "model_lr": model_lr,
            "batch_size": batch_size,
            "lam": lam,
            "eps_model": eps_model,
            "exp_rate": exp_rate,
        })
        
        try:
            save_to_mlflow(experiment_dict)
        except Exception as e:
            print(e)
            print("Something went wrong saving the experiment results to MLFlow.")
            print("Saving locally instead.")
            time.sleep(5)
            save(save_path, experiment_dict)

        rewards_fig = plot_avg_test_return(tests_returns, f"{run_name[:-3]} Avg Return on {num_runs} runs")
        try:
            mlflow.log_figure(figure=rewards_fig, artifact_file="reward_image.png")
        except Exception as e:
            print(e)
            print("Something went wrong saving the figure to MLFlow.")
            print("Saving locally instead.")
            time.sleep(5)
            rewards_fig.savefig(save_path+"/reward_image.png")

In [None]:
run_experiments(num_runs=num_runs)