In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
module_path = "~/github/qhack_2023/"
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import os

import gym
import numpy as np
import matplotlib.pyplot as plt

from stable_baselines3 import DQN, PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common import results_plotter
from stable_baselines3.common.results_plotter import load_results, ts2xy


from algorithms.monitor_callback import SaveOnBestTrainingRewardCallback
from experiments.learning_curve import moving_average, plot_results

In [None]:
# picking the winning parameters

# model_parameters = {
#     'learning_rate': 0.7,
#     'learning_starts': 50_000,
#     'gamma': 0.99,
#     'max_grad_norm': 10
# }
# model_parameters = {
#     'learning_rate': 2.1114943100278494e-05, 
#     'n_steps': 2**8, 
#     'gamma': 0.00035659217767805687
# }
model_parameters = {}

# MODEL = DQN
MODEL = PPO

algo = "PPO"
environment = "Mountain Car"

In [None]:
ENV_ID = "FrozenLake-v1"
#ENV_ID = "MountainCar-v0"

# Create log dir
log_dir = "/tmp/gym/"
os.makedirs(log_dir, exist_ok=True)

# Create and wrap the environment
env = gym.make(ENV_ID)
# Logs will be saved in log_dir/monitor.csv
env = Monitor(env, log_dir)

In [None]:
# Create the callback: check every 1000 steps
callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=log_dir)
# Create RL model
model = MODEL("MlpPolicy", env, **model_parameters)
# Train the agent
model.learn(total_timesteps=int(1e7), callback=callback)

In [None]:
# Helper from the library
results_plotter.plot_results([log_dir], 1e5, results_plotter.X_TIMESTEPS, f"{algo} {environment}")

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

eval_env = gym.make(ENV_ID)
evaluate_policy(model, eval_env, 200, True)

In [None]:
plot_results(
    log_dir,
    title=f"{alog} {environment} Learning Curve Smoothed",
    path_save=f"../images/{algo.lower()}_learning_curve_{environemnt.lower().replace(" ", "_")}.png"
)

In [None]:
import gym

from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env
from huggingface_sb3 import package_to_hub

## Define a repo_id
model_name = f"{algo.lower()}-{env_id}"
repo_id = f"{username}/{algo.lower()}-{env_id}"

commit_message = f"Upload trained {algo} {environment}"

# Create the evaluation env
eval_env = DummyVecEnv([lambda: gym.make(env_id)])

# PLACE the package_to_hub function you've just filled here
package_to_hub(model=model, # Our trained model
               model_name=model_name, # The name of our trained model 
               model_architecture=algo, # The model architecture we used: in our case PPO
               env_id=ENV_ID, # Name of the environment
               eval_env=eval_env, # Evaluation Environment
               repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
               commit_message=commit_message)
