In [11]:
from tqdm import tqdm
from omegaconf import  OmegaConf
from gymnasium.spaces.utils import flatdim
from pathlib import Path

from corerl.agent.factory import init_agent
from corerl.environment.factory import init_environment
from corerl.state_constructor.factory import init_state_constructor
from corerl.interaction.factory import init_interaction
from corerl.utils.evaluator import Evaluator
from hydra import compose, initialize

import corerl.utils.freezer as fr

In [12]:
def prepare_save_dir(cfg):
    save_path = (Path(cfg.experiment.save_path) / cfg.experiment.exp_name
                 / ('param-' + str(cfg.experiment.param)) / ('seed-' + str(cfg.experiment.seed)))
    save_path.mkdir(parents=True, exist_ok=True)
    with open(save_path / "config.yaml", "w") as f:
        OmegaConf.save(cfg, f)

    return save_path

def update_pbar(pbar, stats):
    pbar_str = ''
    for k, v in stats.items():
        if isinstance(v, float):
            pbar_str += '{key} : {val:.1f}, '.format(key=k, val=v)
        else:
            pbar_str += '{key} : {val} '.format(key=k, val=v)
    pbar.set_description(pbar_str)

In [13]:
with initialize(version_base=None, config_path="config/"):
    cfg = compose(config_name="config")

In [14]:
env = init_environment(cfg.env)
sc = init_state_constructor(cfg.state_constructor, env) # only give part of the config file that is needed
interaction = init_interaction(cfg.interaction, env, sc)
action_dim = flatdim(env.action_space)

In [15]:
state, info = env.reset()
state_dim = sc.get_state_dim(state)  # gets state_dim dynamically
agent = init_agent(cfg.agent, state_dim, action_dim)

In [16]:
evaluator = Evaluator(cfg.evaluator)

In [18]:
max_steps = cfg.experiment.max_steps
pbar = tqdm(range(max_steps))
for _ in pbar:
    action = agent.get_action(state)
    next_state, reward, done, truncate, env_info = interaction.step(action)
    transition = (state, action, reward, next_state, done, truncate)
    agent.update_buffer(transition)
    agent.update()
    state = next_state
    
    # for logging
    evaluator.update(transition)
    stats = evaluator.get_stats()
    update_pbar(pbar, stats)

num_episodes : 32 avg_reward : 1.0, avg_return : 1.0, avg_reward (100) : n/a avg_return (100) : n/a :   1%|          | 32/5000 [00:08<22:26,  3.69it/s]


KeyboardInterrupt: 

# Freezer Demo


In [20]:
save_path = prepare_save_dir(cfg)
fr.init_freezer(save_path / 'logs')

In [21]:
fr.freezer['Test'] = 'Something!'
fr.freezer.save()

In [22]:
max_steps = cfg.experiment.max_steps
pbar = tqdm(range(max_steps))
for _ in pbar:
    action = agent.get_action(state)
    next_state, reward, done, truncate, env_info = interaction.step(action)
    transition = (state, action, reward, next_state, done, truncate)
    agent.update_buffer(transition)
    agent.update()
    state = next_state
    
    # freezer example
    fr.freezer['transition'] = transition
    fr.freezer.save()
    fr.freezer.increment()
    fr.freezer.clear()  # Optionally clearing the log

  1%|          | 34/5000 [00:09<23:31,  3.52it/s]


KeyboardInterrupt: 