# <span style="color:#2E86C1;">Extended Evaluation of DDPG, SAC and PPO</span>

## <span style="color:#E74C3C;">Notebook Purpose</span>

This notebook is designed to visualize key aspects of the trained actor-critic algorithms:

- <span style="color:#E74C3C;"><b>Trajectory Analysis</b></span>  
  - A <span style="color:#8E44AD;">visual representation of trajectories</span> generated by the three actor-critic algorithms.
  - A plot visualising the <span style="color:#8E44AD;">azimuth/elevation alignment</span> of the agent with the target as well as the <span style="color:#8E44AD;">normalised distance to the target</span> over time.
  - A plot visualising the <span style="color:#8E44AD;">smoothness of control inputs</span> (i.e the change of the L2-Norm of angular position in consecutive timesteps). 
  

- <span style="color:#E74C3C;"><b>Termination Flags Distribution</b></span>  
  - Analyze the reasons for episode termination, such as:  
    - Flying <span style="color:#8E44AD;">out of bounds</span> of the flight dome `<out_of_bounds>`  
    - <span style="color:#8E44AD;">Navigating the target successfully</span> `<env_complete>`  
    - <span style="color:#8E44AD;">Crashing</span> into the ground `<collision>`  
    - Becoming <span style="color:#8E44AD;">unstable due to exceeding angular position</span> thresholds along any of the 3 dimensions `<unstable>`
    

- <span style="color:#E74C3C;"><b>Waypoint Success Visualization</b></span>  
  - Identify which waypoints the agents successfully navigate to.


<span style="color:#E67E22; font-size:18px; font-weight:bold;">
⚠ WARNING: Executing the code below will DELETE existing visualizations in the "VisualsBA" directory!
</span>  

- If you want to **rerun the visualizations without deleting previous results**, change the variable **`save_dir`** to a different directory.
- Double-check before running to **avoid unintentional data loss**.


## <span style="color:#27AE60;">Imports:</span>

In [1]:
import numpy as np
from tqdm import tqdm
from stable_baselines3 import PPO, DDPG, SAC
import gymnasium as gym
import os 
import shutil

from Evaluation.vis_model import aggregate_eval, plot_multiple_eval, plotly_vector_field, id_nav_failures, plot_flags

## <span style="color:#27AE60;">Load best models for angular- and thrust-control:</span>

In [2]:
num_eps = 2_000  # The number of episodes to evaluate each configuration on
env_id = "SingleWaypointQuadXEnv-v0"

# Load the best trained models
ddpg_angular_path, ddpg_thrust_path = "../../models/angular_control/ddpg_angular_best", "../../models/thrust_control/ddpg_thrust_best"
ddpg_angular, ddpg_thrust = DDPG.load(ddpg_angular_path, deterministic=True), DDPG.load(ddpg_thrust_path, deterministic=True)

ppo_angular_path, ppo_thrust_path = "../../models/angular_control/ppo_angular_best", "../../models/thrust_control/ppo_thrust_best"
ppo_angular, ppo_thrust = PPO.load(ppo_angular_path, deterministic=True), PPO.load(ppo_thrust_path, deterministic=True)

sac_angular_path, sac_thrust_path = "../../models/angular_control/sac_angular_best", "../../models/thrust_control/sac_thrust_best"
sac_angular, sac_thrust = SAC.load(sac_angular_path, deterministic=True), SAC.load(sac_thrust_path, deterministic=True)

# All models
models = [ddpg_angular, ddpg_thrust, ppo_angular, ppo_thrust, sac_angular, sac_thrust]
model_names = ["ddpg_angular", "ddpg_thrust", "ppo_angular", "ppo_thrust", "sac_angular", "sac_thrust"]

# save_dir = "../../VisualsBA/"
save_dir = ""  # Current directory

# Create subdirectories to save trajectories and termination flags 
directories = ["angular_control", "thrust_control"]
for directory in directories:
    if os.path.exists(save_dir + directory):
        shutil.rmtree(save_dir + directory)
    os.makedirs(save_dir + directory)

complete_term_flags = {}  # Here all of the termination flags will be saved, together with mean rewards and episode lenghts for success navigations

## <span style="color:#27AE60;">Generate plots:</span>

In [None]:
for model_idx in range(len(models)):
    env = gym.make(env_id, render_mode=None, reward_shift=0.75, flight_mode=-1 if "thrust" in model_names[model_idx] else 1)
    model = models[model_idx]
    result, _, _ = aggregate_eval(model, env, num_eps//10, render=False, include_waypoints=True)
    algo_name, ctrl_mode = tuple(model_names[model_idx].split("_"))
    
    for ep in tqdm(range(num_eps//10)):
        if result["env_complete"][ep][-1]:
            path = save_dir + f"{ctrl_mode}_control/trajectories/{algo_name}/successful/"
        else:
            path = save_dir + f"{ctrl_mode}_control/trajectories/{algo_name}/failed/"
    
        linear_positions = result["linear_position"][ep]
        linear_velocities = result["linear_velocity"][ep]
        target_vector = result["waypoints"][ep]
    
        plotly_vector_field(linear_positions, linear_velocities, target_vector, size=40.0, save_path=path + f'{ep}_{algo_name}_traj')
    
        smoothness_result = {
            "smoothness": [result["smoothness"][ep]]
        }
    
        plot_multiple_eval(smoothness_result, average=False, title="Smoothness of control inputs over time", save_path=path + f'{ep}_{algo_name}_smoothness')
    
        distance = result["distance_to_target"][ep]
        distance = (distance - np.min(distance)) / (np.max(distance) - np.min(distance))
    
        angles_result = {
            "azimuth": [result["azimuth_angle"][ep]],
            "elevation": [result["elevation_angle"][ep]],
            "distance": [distance]
        }
    
        plot_multiple_eval(angles_result, average=False, title="Azimuth and Elevation Angles over time", save_path=path + f'{ep}_{algo_name}_angles')
    
    model.set_env(env)
    
    term_flags, mean_episode_length, mean_rewards = id_nav_failures(model, num_eps=num_eps, save_path= save_dir + f"{ctrl_mode}_control/trajectories/error_vis/{algo_name}/")

    complete_term_flags[model_names[model_idx]] = term_flags
    complete_term_flags[model_names[model_idx]]["rewards"] = mean_rewards
    complete_term_flags[model_names[model_idx]]["num_steps"] = mean_episode_length

## <span style="color:#27AE60;">Save termination flags:</span>

In [4]:
fig_thrust, fig_angular = plot_flags(term_flags=complete_term_flags, save_path_angular=save_dir + "angular_control/", save_path_thrust=save_dir + "thrust_control/")

Figures saved at angular_control/term_flags_angular.pdf
Figures saved at thrust_control/term_flags_thrust.pdf


## <span style="color:#27AE60;">Print overall performance:</span>

In [None]:
for key in complete_term_flags.keys():
    print("############################################")
    print(f'Model: {key}')
    print(f'Successful episodes: {complete_term_flags[key]["env_complete"]/num_eps}')
    print(f'Rewards for successful episodes: {np.mean(complete_term_flags[key]["rewards"])}')
    print(f'Average episode length(s): {np.mean(complete_term_flags[key]["num_steps"])/30.0}')  # The refresh rate of the simulator is 30.0 hz