## Training a CartPole agent using PPO

In [None]:
import ray
from ray.tune import run

In [None]:
ray.init()

In [None]:
result = run(
    "PPO",
    name="cartpole",
    local_dir="/tmp/ray-results",
    checkpoint_at_end=True,
    stop={"episode_reward_mean": 195.0},
    config={"env": "CartPole-v0", "framework": "torch",},
    verbose=1,
)

In [None]:
checkpoint_path = result.get_best_trial("episode_reward_mean").checkpoint.value

In [None]:
ray.shutdown()

## Collect a rollout

In [None]:
!rllib rollout \
--run PPO \
--use-shelve \
--no-render \
--episodes 5 \
--out /tmp/cartpole.ray_rollout \
$checkpoint_path

## Calculate attributations

In [None]:
config_content = f"""
import json
from pathlib import Path

import numpy as np
import ray
from ray.rllib.agents.ppo import PPOTrainer

from rld.attributation import AttributationTarget, AttributationNormalizationMode
from rld.config import Config
from rld.model import Model, RayModelWrapper
from rld.typing import ObsLike


def get_model() -> Model:
    checkpoint_path = Path(r"{checkpoint_path}")
    params_path = checkpoint_path.parents[1] / "params.json"
    with open(params_path) as f:
        params = json.load(f)
    ray.init()
    trainer = PPOTrainer(config=params)
    trainer.restore(str(checkpoint_path))
    model = RayModelWrapper(trainer.get_policy().model)
    ray.shutdown()
    return model


def baseline_builder(obs: ObsLike):
    return np.zeros_like(obs)


model = get_model()


config = Config(
    model=model,
    baseline=baseline_builder,
    target=AttributationTarget.ALL,
    normalize_sign=AttributationNormalizationMode.POSITIVE,
)

"""
with open("/tmp/cartpole_config.py", "w") as f:
    f.write(config_content)

In [None]:
!rld attribute \
--rllib \
--out /tmp/cartpole.rld \
/tmp/cartpole_config.py \
/tmp/cartpole.ray_rollout

## Visualize results

In [None]:
!rld start --viewer cartpole /tmp/cartpole.rld