In [7]:
import numpy as np
import gymnasium as gym
from gymnasium.wrappers import GrayscaleObservation, ResizeObservation, RecordEpisodeStatistics, RecordVideo, TimeLimit
from stable_baselines3 import DQN, PPO
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.callbacks import EvalCallback, CheckpointCallback
import os
import gc
from eval import *
from custom_cr import EnhancedCarRacing

### 3.1. Task Specific Metrics

### 3.2. Robustness and Adaptability

1. Function Definition:
   The function `evaluate_robustness` takes a trained model, an environment, and some parameters for evaluation (number of episodes, noise standard deviation, and perturbation probability).

2. Results Structure:
   It initializes a dictionary `results` to store rewards for three different robustness scenarios: observation noise, environment perturbations, and different initial states.

3. Robustness to Observation Noise:
   - It runs `num_episodes` episodes with added Gaussian noise to the observations.
   - For each step, it adds noise to the observation before predicting an action.
   - It accumulates the total reward for each episode and stores it in `noise_rewards`.

4. Robustness to Environment Perturbations:
   - It runs another set of episodes, this time applying random perturbations to the environment.
   - With probability `perturbation_prob`, it adds uniform random noise to the observation.
   - It accumulates the total reward for each episode and stores it in `perturbation_rewards`.

5. Robustness to Different Initial States:
   - It runs a final set of episodes, each time resetting the environment with random initial conditions.
   - It uses a custom method `set_random_initial_conditions()` (which should be implemented in the environment) to vary the starting state.
   - It accumulates the total reward for each episode and stores it in `initial_state_rewards`.

6. Results Computation and Output:
   - For each robustness scenario, it calculates and prints the mean and standard deviation of the rewards.
   - Finally, it returns the `results` dictionary containing all the collected rewards.

This function is designed to evaluate how well the trained model performs under different types of perturbations and variations, which is crucial for assessing the robustness and generalization capabilities of the reinforcement learning agent.

In [None]:
def evaluate_robustness(model, env, num_episodes=10, noise_std=0.1, perturbation_prob=0.1):
    """
    Evaluate the robustness of a trained model under various challenging conditions.

    This function tests the model's ability to handle noisy observations, random perturbations, 
    and diverse initial states in the environment. Results include performance metrics such as 
    mean rewards and standard deviations under each condition.

    Args:
        model (BaseAlgorithm): ThKeysView(NpzFile './best_model/best_model_2.1.1.zip' with keys: data, pytorch_variables.pth, policy.pth, policy.optimizer.pth, _stable_baselines3_version...)e trained model to evaluate. Should support `.predict()` for action selection.
        env (gym.Env): The environment in which the model will be tested.
        num_episodes (int, optional): The number of episodes to run for each robustness scenario. Defaults to 10.
        noise_std (float, optional): Standard deviation of Gaussian noise added to observations. Defaults to 0.1.
        perturbation_prob (float, optional): Probability of applying random perturbations to observations. Defaults to 0.1.

    Returns:
        dict: A dictionary with keys:
            - "noise_rewards": List of total rewards for episodes with noisy observations.
            - "perturbation_rewards": List of total rewards for episodes with random perturbations.
            - "initial_state_rewards": List of total rewards for episodes starting from diverse initial states.
        Each list includes rewards from `num_episodes` episodes.
    """
    results = {
        "noise_rewards": [],
        "perturbation_rewards": [],
        "initial_state_rewards": []
    }
    
    # Evaluate robustness to observation noise
    for _ in range(num_episodes):
        obs, _ = env.reset()
        total_reward = 0
        done = False
        while not done:
            # Add Gaussian noise to observation
            noisy_obs = obs + np.random.normal(0, noise_std, obs.shape)
            action = model.predict(noisy_obs, deterministic=True)[0]
            obs, reward, done, _, _ = env.step(action)
            total_reward += reward
        results["noise_rewards"].append(total_reward)

    # Evaluate robustness to environment perturbations
    for _ in range(num_episodes):
        obs, _ = env.reset()
        total_reward = 0
        done = False
        while not done:
            action = model.predict(obs, deterministic=True)[0]
            
            # Apply random perturbations
            if np.random.random() < perturbation_prob:
                obs = obs + np.random.uniform(-0.5, 0.5, obs.shape)
                
            obs, reward, done, _, _ = env.step(action)
            total_reward += reward
        print(f"Perturbation episode: Total Reward = {total_reward}")  # For debugging purposes only, remove in production code.  # Evaluate robustness to environment perturb
        results["perturbation_rewards"].append(total_reward)

    # Evaluate robustness across different initial states
    for _ in range(num_episodes):
        env.reset()
        # Set custom initial conditions (e.g., random car position)
        env.set_random_initial_conditions()
        obs, _ = env.reset()
        total_reward = 0
        done = False
        while not done:
            action = model.predict(obs, deterministic=True)[0]
            obs, reward, done, _, _ = env.step(action)
            total_reward += reward
        print(f"Initial State episode: Total Reward = {total_reward}")  # For debugging purposes only, remove in production code.  # Evaluate robustness across different initial states
        results["initial_state_rewards"].append(total_reward)

    # Compute and return mean and standard deviation for all scenarios
    for key in results:
        rewards = np.array(results[key])
        print(f"{key}: Mean = {rewards.mean()}, Std Dev = {rewards.std()}")
    return results


#### 3.2.1 Baseline - DQN

In [14]:
best_model_baseline_dqn = DQN.load('./best_model/best_model_2.1.1.zip', allow_pickle=True)

Exception: code expected at most 16 arguments, got 18
Exception: code expected at most 16 arguments, got 18


In [9]:
env = gym.make("CarRacing-v3", continuous=False)

In [None]:
robustness_results_baseline_dqn = evaluate_robustness(best_model_baseline_dqn, env, num_episodes=10)

In [None]:
robustness_results_baseline_dqn

#### 3.2.2 Baseline - PPO

#### 3.2.3 Custom Environment - DQN

#### 3.2.4 Custom Environment - PPO