## Agent Behavior Visualization

this notebook serves to visualize agent behavior after training
we visualize the behavior of 4 agents:
- agent without training
- agent trained on default environment
- agent trained on changed environment after 1st phase of training
- agent trained on changed environment after 2nd phase of training

In [2]:
from __future__ import annotations
import random
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
from torch.distributions.normal import Normal
from stable_baselines3 import PPO, TD3
import time
import gymnasium as gym
import pickle
import warnings
warnings.filterwarnings("ignore")


plt.rcParams["figure.figsize"] = (10, 5)

### Default Env no training

In [3]:
env_test = gym.make("Humanoid-v4", render_mode = "human", healthy_z_range=(0.9,2.0))
wrapped_env_test = gym.wrappers.RecordEpisodeStatistics(env_test)
agent = PPO('MlpPolicy', wrapped_env_test, verbose=0)
seed = 42

for episode in range(10):
    obs, info = wrapped_env_test.reset(seed=seed)
    done = False

    while not done:
        action, _states = agent.predict(obs)
        obs, reward, terminated, truncated, info = wrapped_env_test.step(action)
        done = terminated or truncated
        wrapped_env_test.render()
            
env_test.close()

### Default Env Agent 1

In [12]:
env_test = gym.make("Humanoid-v4", render_mode = "human", healthy_z_range=(0.9,2.0))
wrapped_env_test = gym.wrappers.RecordEpisodeStatistics(env_test)
agent = PPO.load("agents//agent_default_1", env = wrapped_env_test)
seed = 42

for episode in range(10):
    obs, info = wrapped_env_test.reset(seed=seed)
    done = False

    while not done:
        action, _states = agent.predict(obs)
        obs, reward, terminated, truncated, info = wrapped_env_test.step(action)
        done = terminated or truncated
        wrapped_env_test.render()
            
env_test.close()

### Changed Env 1st Phase of Training

In [13]:
env_test = gym.make("Humanoid-v4", render_mode = "human", healthy_z_range=(0.9,2.0))
wrapped_env_test = gym.wrappers.RecordEpisodeStatistics(env_test)
agent = PPO.load("agents//agent_changed_1_1", env = wrapped_env_test)
seed = 42

for episode in range(10):
    obs, info = wrapped_env_test.reset(seed=seed)
    done = False

    while not done:
        action, _states = agent.predict(obs)
        obs, reward, terminated, truncated, info = wrapped_env_test.step(action)
        done = terminated or truncated
        wrapped_env_test.render()
            
env_test.close()

### Changed Env 2nd Phase

In [14]:
env_test = gym.make("Humanoid-v4", render_mode = "human", healthy_z_range=(0.9,2.0))
wrapped_env_test = gym.wrappers.RecordEpisodeStatistics(env_test)
agent = PPO.load("agents//agent_changed_2_1", env = wrapped_env_test)
seed = 42

for episode in range(10):
    obs, info = wrapped_env_test.reset(seed=seed)
    done = False

    while not done:
        action, _states = agent.predict(obs)
        obs, reward, terminated, truncated, info = wrapped_env_test.step(action)
        done = terminated or truncated
        wrapped_env_test.render()
            
env_test.close()