## **Comparison Notebook between a PID controller and the RL (PPO) Policy Network the SinusoidLaneEnv**

In [1]:
import os, torch
import numpy as np
from collections import deque
from networks import ActorNet
from custom_env import SinusoidLaneEnv
from ppo import evaluate

### PID Controller Implementation

For A PID controller:

$u_t = f_p(e_t) + f_i(e_t) + f_d(e_t)$

Where:
$e_t = y_t - \hat{y}_t$

$f_p(e_t) = k_p \cdot e_t$

$f_d(e_t) = k_i \cdot  (\sum_{i=t-h}^h e_i) \cdot dt$

$f_d(e_t) = k_d \cdot \frac {e_t - e_{t-1}} {dt}$

$u_t$ is the estimated control input at time $t$, $e_t$ is the error at time $t$, $y_t$ is the setpoint at time $t$ and $\hat{y}_t$ is the output from the estimated control input at time $t$



In [2]:
class PID:
    def __init__(
            self, 
            kp: float, 
            ki: float, 
            kd: float, 
            dt: float, 
            integral_horizon: int=10):
        
        self.kp = kp; self.ki = ki; self.kd = kd; self.dt = dt
        self.__past_errors = deque(maxlen=integral_horizon)
        self.__last_e = 0

    def proportional(self, e: float):
        return self.kp * e
    
    def integral(self, e: float):
        self.__past_errors.append(e)
        ie = np.sum(self.__past_errors) * self.dt
        return self.ki * ie
    
    def derivative(self, e: float):
        de = (e - self.__last_e) / self.dt
        self.__last_e = e
        return self.kd * de
    
    def pid(self, e: float):
        p = self.proportional(e)
        i = self.integral(e)
        d = self.derivative(e)
        u =  p + i + d
        return u

### **PID Controller Evaluation**

In [3]:
env = SinusoidLaneEnv()
kp = 2; ki = 1; kd = 1; dt = env.dx; u = np.random.randn(1); done = False

state, info = env.reset()
controller = PID(kp, ki, kd, dt, integral_horizon=20)
total_reward = 0

while not done:
    state, reward, terminate, truncate, info = env.step(u)
    env.render()
    done = terminate or truncate
    total_reward += reward
    setpoint = info["setpoint"]
    e = (setpoint - state).mean()
    u = controller.pid(e)
    u = np.array([u])

env.close()

print(f"PID controller reward: {total_reward}")


PID controller reward: 105.55127744306726


### **RL (PPO) Policy Evaluation**

In [4]:
hidden_size = 128

train_env = SinusoidLaneEnv()
obs_space = train_env.observation_space._shape[0]
action_space = train_env.action_space._shape[0]

policy_agent = ActorNet(obs_space, action_space, hidden_size)
policy_agent.load_state_dict(torch.load(os.path.join(f"params/policy.pth.tar"), map_location="cpu"))
policy_agent.eval()

test_env = SinusoidLaneEnv()
total_reward, _ = evaluate(test_env, policy_agent, render_env=True, close_env=True)
print(f"RL (PPO) policy reward: {total_reward}")


RL (PPO) controller reward: 121.19168768178076
