In [None]:
import json
from IPython.display import display, Javascript
from luxai_s3.wrappers import LuxAIS3GymEnv, RecordEpisode

def render_episode(episode: RecordEpisode) -> None:
    data = json.dumps(episode.serialize_episode_data(), separators=(",", ":"))
    display(Javascript(f"""
var iframe = document.createElement('iframe');
iframe.src = 'https://s3vis.lux-ai.org/#/kaggle';
iframe.width = '100%';
iframe.scrolling = 'no';

iframe.addEventListener('load', event => {{
    event.target.contentWindow.postMessage({data}, 'https://s3vis.lux-ai.org');
}});

new ResizeObserver(entries => {{
    for (const entry of entries) {{
        entry.target.height = `${{Math.round(320 + 0.3 * entry.contentRect.width)}}px`;
    }}
}}).observe(iframe);

element.append(iframe);
    """))

In [None]:
def evaluate_agents(agent_1_cls, agent_2_cls, seed=42, games_to_play=3, replay_save_dir="replays"):
    env = RecordEpisode(
        LuxAIS3GymEnv(numpy_output=True), save_on_close=True, save_on_reset=True, save_dir=replay_save_dir
    )
    
    obs, info = env.reset(seed=seed)
    for i in range(games_to_play):
        # obs: {p0:..., p1: ...}
        obs, info = env.reset()
        env_cfg = info["params"] # only contains observable game parameters
        player_0 = agent_1_cls("player_0", env_cfg)
        player_1 = agent_2_cls("player_1", env_cfg)
    
        # main game loop
        game_done = False
        step = 0
        print(f"Running game {i}")
        while not game_done:
            actions = dict()
            for agent in [player_0, player_1]:
                actions[agent.player] = agent.act(step=step, obs=obs[agent.player])
            # actions: {p0:..., p1: ...}    
            obs, reward, terminated, truncated, info = env.step(actions)
            # info["state"] is the environment state object, you can inspect/play around with it to e.g. print
            # unobservable game data that agents can't see
            dones = {k: terminated[k] | truncated[k] for k in terminated}
            if dones["player_0"] or dones["player_1"]:
                game_done = True
            step += 1
        render_episode(env)
    env.close() # free up resources and save final replay

In [None]:
from kits.python.agent import Agent
from luxai_s3.wrappers import SingleAgentWrapper

In [None]:
#evaluate_agents(Agent,Agent)

In [None]:
from stable_baselines3 import PPO
rl_agent = PPO.load('../rl/models/ppo_baseline.bin')

In [None]:
from rl.wrappers import RLWrapper

In [None]:
def evaluate_single_agents(seed=42, games_to_play=10, replay_save_dir="logs/replays"):
    env = RLWrapper(
        RecordEpisode(
            LuxAIS3GymEnv(numpy_output=True),
            save_on_close=True, save_on_reset=True, save_dir=replay_save_dir
        )
    )

    obs, info = env.reset(seed=seed)
    for i in range(games_to_play):
        obs, info = env.reset()
        
        # env_cfg = info["params"]  # only contains observable game parameters
        # agent = Agent("player_0", env_cfg)
        # player_1 = agent_2_cls("player_1", env_cfg)

        # main game loop
        game_done = False
        step = 0
        print(f"Running game {i}")
        while not game_done:
            # actions = dict()
            # for agent in [player_0, player_1]:
            #     actions[agent.player] = agent.act(step=step, obs=obs[agent.player])
            # actions: {p0:..., p1: ...}
            
            # random action: 
            action = env.action_space.sample()

            # sample agent action
            # action = agent.act(step=step, obs=env.backout_obs(obs))

            ## rl agent action
            # action, _ = rl_agent.predict(obs)
            
            obs, reward, terminated, truncated, info = env.step(action)
            # info["state"] is the environment state object, you can inspect/play around with it to e.g. print
            # unobservable game data that agents can't see
            game_done = terminated or truncated
            step += 1
        render_episode(env.env)
    env.close()  # free up resources and save final replay

evaluate_single_agents()