In [1]:
from src.environment import NormalMoveGatedEnv
from src.experiment import Experiment
from src.nn import BetaModel, Model, GeneralModel
from src.rl import Agent, Trajectory

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

%matplotlib agg
# cmd -> cd .\notebooks\
# cmd -> mlflow ui

In [2]:
experiment_name = 'Gated Envs'
experiment_description = 'Comparação entre uso de n modelos com o uso de um modelo generalista em um mesmo ambiente com portas'
num_runs = 5
num_episodes = 1

In [3]:
import mlflow
from mlflow import MlflowException

try:
    exp_id = mlflow.create_experiment(experiment_name, tags={'mlflow.note.content':experiment_description})
    exp = mlflow.get_experiment(exp_id)
except MlflowException as e:
    exp = mlflow.get_experiment_by_name(experiment_name)
    exp_id = exp.experiment_id

print('experiment_name:', experiment_name)
print('experiment_id:', exp_id)

experiment_name: Gated Envs
experiment_id: 902287542146906075


In [4]:
start = ([0.,-9.9],[9.9,-5.])
goals = [([-9.,8.],[-8.,9.])]
env_params= env_params = {
    'walls': [([-8.,-1.],[10., 1.]), ([-1.,-8.],[1., 8.])],
    'goals': [([2.,2.],[3.,3.])],
    'start': ([7.,-9.9],[9.9,-7.]),
    'gates': [([-1.,-10.],[1., -8.]), ([-10.,-1.],[-8., 1.]), ([-1.,8.],[1., 10.])],
    'buttons': [([2.,-3.],[3.,-2.]), ([-3.,-3.],[-2.,-2.]), ([-3.,2.],[-2.,3.])],
    'beta': lambda s: (s[0]>0 and s[1]<0) or (s[0]<0 and s[1]>0)
}
model_params = {
    "lr":1e-6, 
    'momentum':.9,
    'td_model_steps': 10, 
    'memory_size': 0, 
    'max_plan_size': 100, 
}
k_model_params = {
    'k':5, 
    'model': Model
}
general_params = {
    'model': GeneralModel
}

In [5]:
def gen_env():
    mlflow.log_params(env_params)
    env = NormalMoveGatedEnv(**env_params)
    mlflow.log_figure(env.plot().get_figure(), f"env.png")
    return env

def run(model, params, episode_size_limit=1000):
    model.reset()
    mlflow.log_params(model_params)
    mlflow.log_params(params)
    model.episode(size_limit=episode_size_limit)
    
    mlflow.log_figure(model.plot(kind='probs'), "probs.png")
    mlflow.log_figure(model.plot(), "params.png")
    for plan_id, plan in enumerate(model.plans):
        mlflow.log_figure(model.plot(kind='plan', plan=plan_id), f"plans/plan-{plan_id:03d}.png")

    mlflow.log_metric("Trajectory Size", model.trajectory.run.shape[0])
    mlflow.log_metric("Final Likelyhood", model.lls[-1])

In [6]:
for i in range(num_runs):
    with mlflow.start_run(experiment_id=exp_id, run_name=f'{i}'):
        env = gen_env()
        k_models = Agent(env, **k_model_params, **model_params)
        general = Agent(env, **general_params, **model_params)
        for e in range(num_episodes):
            with mlflow.start_run(experiment_id=exp_id, run_name=f'epi-{e}', nested=True):
                with mlflow.start_run(experiment_id=exp_id, run_name='K-Models', nested=True):
                    mlflow.log_param("run", i)
                    mlflow.log_param("episode", e)
                    mlflow.log_figure(env.plot().get_figure(), f"env.png")
                    run(k_models, k_model_params)
                with mlflow.start_run(experiment_id=exp_id, run_name='General', nested=True):
                    mlflow.log_param("run", i)
                    mlflow.log_param("episode", e)
                    mlflow.log_figure(env.plot().get_figure(), f"env.png")
                    run(general, general_params)

  fig, ax = plt.subplots(figsize=(5, 5))


-------Erro no Treinamento-----------
Parametros estimados para cada estado.
