In [1]:
import gym
from stable_baselines3 import A2C
from stable_baselines3 import PPO
from stable_baselines3.ppo.policies import CnnPolicy
from stable_baselines3.ppo.policies import MlpPolicy
from utils.adversary_env import AdversaryEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

# Load target model
target_model = A2C.load("../a2c/model/lunarlander_v2_a2c_3M_to_11M")




In [9]:
# Test target model - for info on why the env is wrapped with Monitor check the evaluate_policy function
mean_reward, std_reward = evaluate_policy(target_model, Monitor(gym.make('LunarLander-v2')), n_eval_episodes=100)
print(f"mean_reward: {mean_reward:.2f} +/- {std_reward:.2f}")

mean_reward: 246.88 +/- 69.14


In [3]:
# Create adversary with adversary env
# adversary_env = AdversaryEnv("LunarLander-v2", target_model)
# adversary = PPO(MlpPolicy, adversary_env, verbose=1)

In [4]:
# Train adversary
# log_dir = "./out/training-log/"
# log_path = log_dir + f"adversary-lunarlander-v2/"
# save_dir = "./models/adversary-lunarlander-v2/"

# timesteps = 50000

# adversary.learn(
#         total_timesteps=timesteps
#         )

# # Save adversary
# adversary.save(save_dir + f"adversary_lunarlander_v2_{timesteps}")

In [5]:
# Load adversary
log_dir = "./out/training-log/"
log_path = log_dir + f"adversary-lunarlander-v2/"
save_dir = "./models/adversary-lunarlander-v2/"

adversary = PPO.load(save_dir + f"adversary_lunarlander_v2_50000")
adversary_env = AdversaryEnv('LunarLander-v2', target_model)

In [8]:
import numpy as np
from uniform_attack import UniformAttack

# Evaluate uniform adversarial attack

all_episodes_rewards = []
all_episodes_perturbation = []
all_episodes_timesteps = []


n_episodes = 100

for i in range(0, n_episodes):
    # Run attack on a2c model
    env = gym.make("LunarLander-v2")
    ua = UniformAttack(env=env, model=target_model, attack=adversary, epsilon=0.25)
    ua.perform_attack()
    all_episodes_rewards.append(ua.reward_total)
    all_episodes_perturbation.append(ua.perturbation_total)
    all_episodes_timesteps.append(ua.frames_count)
    ua.reset_attack()

print(f"Mean reward achieved over {n_episodes} episodes: {np.mean(all_episodes_rewards):.2f}")
print(f"Mean perturbation applied over {n_episodes} episodes: {np.mean(all_episodes_perturbation):.2f}")
print(f"Mean number of timesteps over {n_episodes} episodes: {np.mean(all_episodes_timesteps):.2f}")

Mean reward achieved over 100 episodes: -98.00
Mean perturbation applied over 100 episodes: 20.41
Mean number of timesteps over 100 episodes: 82.44


### Todo

- Run with different epsilon
- implement strategically timed attack, etc.