# Performance Benchmarking: PID vs RL Controller

This notebook benchmarks the PID and RL controllers on the `OT2Env` environment.
We evaluate each controller over multiple episodes and compare performance on key metrics:
- Reward
- Steps
- Success Rate

We visualize results using **line plots, bar charts, and boxplots**.

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from stable_baselines3 import PPO
from pid_controller import PIDController
from ot2_gym_wrapper_2 import OT2Env

In [None]:
# Configuration
EPISODES = 20
PID_GAINS = (0.5, 0.01, 0.1)  # Example PID gains
RL_MODEL_PATH = "models/0jfld8sq/final_model.zip"
RESULTS_CSV = "benchmark_results.csv"

In [11]:
def evaluate_pid(env, pid, episodes=20):
    rewards, steps, success = [], [], []
    for _ in range(episodes):
        obs, _ = env.reset()
        pid.set_target(env.goal_position)
        done, ep_reward, t = False, 0, 0
        while not done:
            action = pid.update(env.get_current_position())
            obs, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            ep_reward += reward
            t += 1
        rewards.append(ep_reward)
        steps.append(t)
        success.append(terminated)
    return rewards, steps, success

def evaluate_rl(env, model, episodes=20):
    rewards, steps, success = [], [], []
    for _ in range(episodes):
        obs, _ = env.reset()
        done, ep_reward, t = False, 0, 0
        while not done:
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            ep_reward += reward
            t += 1
        rewards.append(ep_reward)
        steps.append(t)
        success.append(env.success)
    return rewards, steps, success

In [12]:
# Initialize environment
env = OT2Env()

# PID Benchmark
pid = PIDController(*PID_GAINS)
pid_rewards, pid_steps, pid_success = evaluate_pid(env, pid, EPISODES)

# RL Benchmark
model = PPO.load(RL_MODEL_PATH, env=env, device='cpu')
rl_rewards, rl_steps, rl_success = evaluate_rl(env, model, EPISODES)

# Collect results
df = pd.DataFrame({
    'controller': ['PID'] * EPISODES + ['RL'] * EPISODES,
    'reward': pid_rewards + rl_rewards,
    'steps': pid_steps + rl_steps,
    'success': pid_success + rl_success,
})
df.to_csv(RESULTS_CSV, index=False)
df.head()

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


AttributeError: 'OT2Env' object has no attribute 'success'

In [None]:
# --- Line Plots ---
plt.figure(figsize=(8,5))
plt.plot(range(EPISODES), pid_rewards, label='PID')
plt.plot(range(EPISODES), rl_rewards, label='RL')
plt.xlabel('Episode')
plt.ylabel('Reward')
plt.title('Reward per Episode')
plt.legend()
plt.show()

plt.figure(figsize=(8,5))
plt.plot(range(EPISODES), pid_steps, label='PID')
plt.plot(range(EPISODES), rl_steps, label='RL')
plt.xlabel('Episode')
plt.ylabel('Steps')
plt.title('Steps per Episode')
plt.legend()
plt.show()

In [None]:
# --- Bar Chart: Success Rate ---
success_rate = df.groupby('controller')['success'].mean()
success_rate.plot(kind='bar', ylabel='Success Rate', title='Controller Success Rate', rot=0)
plt.show()

In [None]:
# --- Boxplots ---
df.boxplot(column='reward', by='controller', grid=False)
plt.title('Reward Distribution')
plt.suptitle('')
plt.show()

df.boxplot(column='steps', by='controller', grid=False)
plt.title('Steps Distribution')
plt.suptitle('')
plt.show()

In [None]:
# --- Summary Statistics ---

summary = df.groupby('controller').agg(
    reward_mean=('reward', 'mean'),
    reward_std=('reward', 'std'),
    steps_mean=('steps', 'mean'),
    steps_std=('steps', 'std'),
    success_rate=('success', 'mean')
)

# Display with rounded values for readability
summary.round(2)