Run a Test that replicates the general outline of the class handouts. Sarsa($\lambda$) used in the example.

In [None]:
import gymnasium as gym

from src.LambdaSARSA import LambdaSARSA

env = gym.make('LunarLander-v2')
lam_sarsa = LambdaSARSA(env)
lam_sarsa.train(num_replications=5, num_episodes=200)
lam_sarsa.get_results()
lam_sarsa.show_results()

Run a LHS Experiment replicating the type shown in the handouts. Example uses One-Step Sarsa on Mountain Car

In [None]:
import gymnasium as gym

from src.LHS_functions import LHS_Experiment
from src.SARSA import SARSA

env = gym.make('MountainCar-v0')
features = ["alpha_a", "alpha_b", "eps_a", "eps_b"]
sarsa_experiment = LHS_Experiment(SARSA,env,features)
sarsa_experiment.parallel_lhs()
sarsa_experiment.export_results()
sarsa_experiment.plot_results()
sarsa_experiment.plot_param_comparison()
sarsa_experiment.anova()

Example sweep using wandb API. 

*NOTE* This will require loggin into Wandb API. Do so using the CLI login function first, 
then it can run in a notebook, but I recommend running the contents of the cell below as a
separate *script*.py, as it seems less risky from the perspective of a crashing kernel.

Don't forget to update params

In [None]:
import wandb
from LambdaSARSA import LambdaSARSA

sweep_config = {
    "method": "random",
    "name": "sweep",
    "metric": {"goal": "maximize", "name": "Score"},
    'parameters': {
        'alpha_a': {'distribution': 'uniform', 'min': 0, 'max': 1},
        'alpha_b': {'distribution': 'uniform', 'min': 0, 'max': 1},
        'eps_a': {'distribution': 'uniform', 'min': 0, 'max': 1},
        'eps_b': {'distribution': 'uniform', 'min': 0, 'max': 1},
        'lam_a': {'distribution': 'uniform', 'min': 0, 'max': 1},
        'lam_b': {'distribution': 'uniform', 'min': 0, 'max': 1},
        'clip': {'distribution': 'uniform', 'min': 5, 'max': 15},
    }
}

sweep_id = wandb.sweep(sweep=sweep_config, project="DSOR646-LambdaSARSA")

import gymnasium as gym
env = gym.make('LunarLander-v2')

def main():
    run = wandb.init()
    model = LambdaSARSA(env,
        alpha_a = wandb.config.alpha_a,
        alpha_b = wandb.config.alpha_b,
        eps_a = wandb.config.eps_a,
        eps_b = wandb.config.eps_b,
        lam_a = wandb.config.lam_a,
        lam_b = wandb.config.lam_b,
        clip = wandb.config.clip
        )
    model.train(num_replications=30,num_episodes=300)
    maxETDR, maxETDRhw, meanMaxTestEETDR, maxTestHW, meanAULC, hwAULC, time = model.get_results()
    alg_score = 0.6*(meanMaxTestEETDR-maxTestHW) + 0.4*(meanAULC-hwAULC)
    wandb.log({
            "maxETDR": maxETDR,
            "maxETDRhw": maxETDRhw,
            "meanMaxTestEETDR": meanMaxTestEETDR,
            "maxTestHW": maxTestHW,
            "meanAULC": meanAULC,
            "hwAULC": hwAULC,
            "time": time,
            "Score": alg_score
        })

# Start sweep job.
wandb.agent(sweep_id, function=main, count=5)