In [None]:
if 'google.colab' in str(get_ipython()):
    !pip install -r https://raw.githubusercontent.com/abbbe/eye-on-stick/main/requirements.txt
    !git clone https://github.com/abbbe/eye-on-stick
    %cd eye-on-stick

In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np

import os, urllib, time
os.environ["MLFLOW_TRACKING_URI"] = "sqlite:///mlruns/db.sqlite"
import mlflow, git
mlflow_client = mlflow.tracking.MlflowClient()

from stable_baselines.common.cmd_util import make_vec_env
from stable_baselines.common.vec_env import VecNormalize
from stable_baselines import PPO2, SAC

import matplotlib.pyplot as plt
%matplotlib inline

from IPython import display

from lib import eos
from lib.eos import EyeOnStickEnv

In [None]:
with git.Repo() as repo:
    git_info = f'{repo.active_branch.name}/{repo.git.rev_parse(repo.head.object.hexsha, short=4)}'
    if repo.is_dirty():
        git_info = f'*{git_info}'

In [None]:
from lib.run import run_env_nsteps

def log_metrics(metrics, step):
    # log the content of metrics dict as mlflow metrics
    for key, value in metrics.items():
        mlflow.log_metric(key=key, value=value, step=step)

def save_and_register_model(model, saved_models_dir, era, model_name, mlflow_run):    
    # save the trained models, each era separately
    model_fname = f'{saved_models_dir}/{era}'
    model.save(model_fname)

    # register the trained model
    return mlflow_client.create_model_version(name=model_name, source=model_fname, run_id=mlflow_run.info.run_id)

In [None]:
def learn_and_run(n_joints, params, gym_policy_class=SAC, gym_model_name='MlpPolicy', name=None, display=None):
    """
    1. Instanciates environment with n_joints and agent with given policy class and model name.
    2. Step through the environment for N_STEPS steps, collecting metrics and rendering it (if render is True).
    3. Log metrics into mlflow child per-era run and the parent run.
    4. Train the policy model with the policy.
    5. Save the model as mlflow artefact.
    6. Repeat 2-5 N_ERAS times.
    7. Returns file name to load the model from.
    """
    env = make_vec_env(lambda: EyeOnStickEnv(n_joints, params), n_envs=N_ENVS)
    #env = VecNormalize(env)
        
    run_name = f'eos.{n_joints}J'
    model_name = run_name
    if name is not None:
        run_name += f' {name}'

    # create new mlflow run which will become a parent of per-era runs
    with mlflow.start_run(run_name=run_name) as parent_run:
        # log gym  params
        mlflow.log_param("gym_policy_class", gym_policy_class.__name__)
        mlflow.log_param("gym_model_name", gym_model_name)        
        for key, value in params.items():
            mlflow.log_param(key, value)

        # arrange tensorboard logs
        mlflow_artifacts_dir = urllib.request.url2pathname(urllib.parse.urlparse(mlflow.get_artifact_uri()).path)
        tensorboard_logdir = os.path.join(mlflow_artifacts_dir, "tensorboard_log")
        os.makedirs(tensorboard_logdir, exist_ok=False)

        # create gym model and directory to save it
        model = gym_policy_class(gym_model_name, env, tensorboard_log=tensorboard_logdir)
        saved_models_dir = os.path.join(mlflow_artifacts_dir, "saved_models")
        os.makedirs(saved_models_dir, exist_ok=False)

        ## run eras loop
        metrics = None
        for era in range(N_ERAS):
            child_run_name = f'era={era}'
            
            with mlflow.start_run(run_name=child_run_name, nested=True) as child_run:            
                env.reset()
                model.learn(N_LEARN_EPOCHS)
                registered_model = save_and_register_model(model, saved_models_dir, era, model_name, child_run)
                mlflow.log_metric("model_version", registered_model.version)

                metrics = run_env_nsteps(env, model, N_STEPS, display=display)
                log_metrics(metrics, step=era)
                    
        # log to the parent run
        if metrics:
            log_metrics(metrics, step=None)
            
    env.close()

In [None]:
# we run N_ERAS eras (=mlflow runs), each era consists of N_STEPS steps
# at the end of each era we report metrics to mlflow and learn for N_LEARN_EPOCHS epochs
N_ERAS = 25 # eras 
N_STEPS = 1000 # steps each
N_LEARN_EPOCHS = 10000
N_ENVS = 1

In [None]:
#arams = {
#       'REWARD_AIM_WEIGHT': 1,
#       'REWARD_LEVEL_WEIGHT': 0,
#       'REWARD_ACTION_WEIGHT': 0,
#       'GEAR_FUNC_NOISE': 0
#

In [None]:
learn_and_run(n_joints=3, params={}, name='1651', display=display)

#for round in range(N_RUN_ROUNDS):
#    run(n_joints=4, policy_class=SAC, model_name='MlpPolicy', name=f'{git_info} {round+1}/{N_RUN_ROUNDS}')