#### Build

In [17]:
# Upload the Github Zipfile
# Unzip the Github Zipfile
# Create python 3.8             conda create --name cse297 python=3.8.10
# Activate the environment      source activate cse297
# Within env install ipyk       pip install ipykernel
# Register kernel w/ jupy       python -m ipykernel install --user --name=capstone
# Change to package dir         cd CybORG-Competitive/CybORG/
# Install packages within env   pip install -e .
# Select 'capstone' kernel
# Confirm python version is 3.8.10
import sys
print(sys.version)

3.8.11 (default, Aug  6 2021, 09:57:55) [MSC v.1916 64 bit (AMD64)]


#### Import

In [14]:
import ray
import os, sys, shutil, time
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from time import sleep

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
import logging
logging.disable(logging.WARNING)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf
logger = logging.getLogger(__name__)

#### Verify GPU

In [15]:
physical_devices = tf.config.list_physical_devices('GPU')
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)

#### Train Competitive Red Agent

In [None]:
# Load proper agent
from environments import build_blue_agent, build_red_agent, sample

tolerance = 3 # number of batches without improvement before ending training
generations = 100

# Create Initial Policies
ray.init(ignore_reinit_error=True, log_to_driver=False)
blue = build_blue_agent(opponent=True)
red = build_red_agent()

blue_scores = []
red_scores = []

print()
print("+--------------------------------+")
print("| Red Competitive Training Start |")
print("+--------------------------------+")
print()

for g in range(1, generations+1):

    if (g < 10):
        dashes = 14
    elif (g < 100):
        dashes = 15
    else:
        dashes = 16
    print('+'+'-'*dashes+'+')            
    print(f"| Generation {g} |")
    print('+'+'-'*dashes+'+')
    print()

    
    red.restore("./policies/red_competitive_pool/competitive_red_0/checkpoint_000000")

    b = 0
    red_max = 0
    tol = tolerance
    while True:
        b += 1
        result = red.train()
        red_score = result["sampler_results"]["episode_reward_mean"]
        entropy = result['info']['learner']['default_policy']['learner_stats']['entropy']
        vf_loss = result['info']['learner']['default_policy']['learner_stats']['vf_loss']
        print(f"Batch {b} -- Red Score: {red_score:0.2f}    Entropy: {entropy:0.2f}    VF_loss: {vf_loss:0.2f}")
        if b > 1:
            if (red_score > red_max):
                red_max = red_score
                tol = tolerance
                checkpoint_path = red.save(checkpoint_dir=f"./policies/red_competitive_pool/competitive_red_{g}")   
                path_file = open(f"./policies/red_competitive_pool/competitive_red_{g}/checkpoint_path", "w")
                path_file.write(checkpoint_path)
                path_file.close()
            elif(tol > 1):
                tol -= 1
             # when agent is no longer improving, break and save the new best-response agent
            else:
                red_scores.append(red_max)
                red.restore(checkpoint_path)
                print(checkpoint_path)
                break

    pool_size = g
    pool_file = open("./policies/red_competitive_pool/pool_size", "w")
    pool_file.write(str(pool_size))
    pool_file.close()
    print()

    blue.restore("./policies/blue_opponent_pool/opponent_blue_0/checkpoint_000000")

    b = 0 # b tracks the batches of training completed
    blue_min = float('inf')
    tol = tolerance
    while True:
        b += 1
        result = blue.train()
        blue_score = -result["sampler_results"]["episode_reward_mean"]
        entropy = result['info']['learner']['default_policy']['learner_stats']['entropy']
        vf_loss = result['info']['learner']['default_policy']['learner_stats']['vf_loss']
        print(f"Batch {b} -- Blue Score: {blue_score:0.2f}    Entropy: {entropy:0.2f}    VF Loss: {vf_loss:0.2f}") 
        if b > 1:
            if (blue_score < blue_min):
                blue_min = blue_score
                tol = tolerance
                checkpoint_path = blue.save(checkpoint_dir=f"./policies/blue_opponent_pool/opponent_blue_{g}")
                path_file = open(f"./policies/blue_opponent_pool/opponent_blue_{g}/checkpoint_path", "w")
                path_file.write(checkpoint_path)
                path_file.close()
            elif(tol > 1):
                tol -= 1
            # when agent is no longer improving, break and save the new competitive agent
            else:
                blue_scores.append(blue_min)
                blue.restore(checkpoint_path) 
                print(checkpoint_path)
                break

    pool_file = open("./policies/blue_opponent_pool/pool_size", "w")
    pool_file.write(str(pool_size))
    pool_file.close()
    print()

    print(f'Blue Scores so far {["%.2f" % i for i in blue_scores]}')
    print(f'Red Scores so far {["%.2f" % i for i in red_scores]}')
    print()
    
    print(f'-------- Sample Game for Generation {g} --------')
    sample(red, blue, verbose=True, show_policy=True)
    print()

#### Evaluate Red MinMax Performance

In [None]:
r_min = [float('inf')]*(generations+1) # red agent minimum scores
r_min_op = [0]*(generations+1) # id of blue opponent that got max score
r_best_score = 0
r_best_id = 0

# evaluate existing pool of agents
print('Evaluating Agents...')
# iteration through red agents
for r in range(generations,0,-1):
    path_file = open(f"./policies/red_competitive_pool/competitive_red_{r}/checkpoint_path", "r")
    red_restore_path = path_file.read()
    path_file.close()
    red.restore(red_restore_path)

    # iterate through blue opponents
    for b in range(generations,0,-1):
        path_file = open(f"./policies/blue_opponent_pool/opponent_blue_{b}/checkpoint_path", "r")
        blue_restore_path = path_file.read()
        path_file.close()
        blue.restore(blue_restore_path)

        score = sample(red, blue, games=50)
        if score < r_min[r]:
            r_min[r] = score
            r_min_op[r] = b
    
    print(f'Red Agent {r} expects a minimum of {r_min[r]:0.2f} points, against Blue Opponent {r_min_op[r]}.')
    if r_min[r] > r_best_score:
        r_best_score = r_min[r]
        r_best_id = r


print()
print(f'Top performing Red Agent is generation {r_best_id}')

path_file = open(f"./policies/red_competitive_pool/competitive_red_{r_best_id}/checkpoint_path", "r")
red_competitive_path = path_file.read()
path_file.close()
path_file = open("./policies/competitive_red_policy", "w")
path_file.write(red_competitive_path)
path_file.close()

ray.shutdown()

#### Observe Exploitability of each Red Policy

In [None]:
r_exp = [] # exploitability of each Red Policy
for r in r_min[1:]:
    r_exp.append(r_best_score-r)
print(r_exp)

#### Plot Red Training Scores

In [None]:
# Scores During Training
id_plot = [id for id in range(1,len(red_scores)+1)]
data_plot = pd.DataFrame({"Generation":id_plot, "Training Score":red_scores, "Blue Scores":blue_scores})
plt.figure()
sns.lineplot(x = "Generation", y = "Training Score", data=data_plot, color='red', label="Red Agent Training Score")
sns.lineplot(x = "Generation", y = "Blue Scores", data=data_plot, color='blue', label="Blue Opponent Score")
plt.show()

# Minmax Evaluation
id_plot = [id for id in range(len(red_scores))]
data_plot = pd.DataFrame({"Generation":id_plot, "Min Expected Score":r_min[1:]})
plt.figure()
sns.regplot(x = "Generation", y = "Min Expected Score", data=data_plot, color='red', scatter_kws={'s':5}, label="Red Policy Min Expected Score")
plt.legend()
plt.show()

# Exploitability
id_plot = [id for id in range(len(red_scores))]
data_plot = pd.DataFrame({"Generation":id_plot, "Exploitability":r_exp})
plt.figure()
sns.regplot(x = "Generation", y = "Exploitability", data=data_plot, color='red', scatter_kws={'s':5}, label="RedPolicy Exploitability")
plt.legend()
plt.show()

In [None]:
r_min = [float('inf')]*(generations+1) # red agent minimum scores
r_min_op = [0]*(generations+1) # id of blue opponent that got max score
r_best_score = 0
r_best_id = 0

# evaluate existing pool of agents
print('Evaluating Agents...')
# iteration through red agents
for r in range(generations,0,-1):
    path_file = open(f"./policies/red_competitive_pool/competitive_red_{r}/checkpoint_path", "r")
    red_restore_path = path_file.read()
    path_file.close()
    red.restore(red_restore_path)

    # iterate through blue opponents
    for b in range(generations,0,-1):
        path_file = open(f"./policies/blue_opponent_pool/opponent_blue_{b}/checkpoint_path", "r")
        blue_restore_path = path_file.read()
        path_file.close()
        blue.restore(blue_restore_path)

        score = sample(red, blue, games=50)
        if score < r_min[r]:
            r_min[r] = score
            r_min_op[r] = b
    
    print(f'Red Agent {r} expects a minimum of {r_min[r]:0.2f} points, against Blue Opponent {r_min_op[r]}.')
    if r_min[r] > r_best_score:
        r_best_score = r_min[r]
        r_best_id = r


print()
print(f'Top performing Red Agent is generation {r_best_id}')

path_file = open(f"./policies/red_competitive_pool/competitive_red_{r_best_id}/checkpoint_path", "r")
red_competitive_path = path_file.read()
path_file.close()
path_file = open("./policies/competitive_red_policy", "w")
path_file.write(red_competitive_path)
path_file.close()

ray.shutdown()

In [None]:
r_min = [float('inf')]*(generations+1) # red agent minimum scores
r_min_op = [0]*(generations+1) # id of blue opponent that got max score
r_best_score = 0
r_best_id = 0

# evaluate existing pool of agents
print('Evaluating Agents...')
# iteration through red agents
for r in range(generations,0,-1):
    path_file = open(f"./policies/red_competitive_pool/competitive_red_{r}/checkpoint_path", "r")
    red_restore_path = path_file.read()
    path_file.close()
    red.restore(red_restore_path)

    # iterate through blue opponents
    for b in range(generations,0,-1):
        path_file = open(f"./policies/blue_opponent_pool/opponent_blue_{b}/checkpoint_path", "r")
        blue_restore_path = path_file.read()
        path_file.close()
        blue.restore(blue_restore_path)

        score = sample(red, blue, games=50)
        if score < r_min[r]:
            r_min[r] = score
            r_min_op[r] = b
    
    print(f'Red Agent {r} expects a minimum of {r_min[r]:0.2f} points, against Blue Opponent {r_min_op[r]}.')
    if r_min[r] > r_best_score:
        r_best_score = r_min[r]
        r_best_id = r


print()
print(f'Top performing Red Agent is generation {r_best_id}')

path_file = open(f"./policies/red_competitive_pool/competitive_red_{r_best_id}/checkpoint_path", "r")
red_competitive_path = path_file.read()
path_file.close()
path_file = open("./policies/competitive_red_policy", "w")
path_file.write(red_competitive_path)
path_file.close()

ray.shutdown()

In [11]:
r_exp = [] # exploitability of each Red Policy
for r in r_min[1:]:
    r_exp.append(r_best_score-r)
print(r_exp)

NameError: name 'r_min' is not defined

In [12]:
# Scores During Training
id_plot = [id for id in range(1,len(red_scores)+1)]
data_plot = pd.DataFrame({"Generation":id_plot, "Training Score":red_scores, "Blue Scores":blue_scores})
plt.figure()
sns.lineplot(x = "Generation", y = "Training Score", data=data_plot, color='red', label="Red Agent Training Score")
sns.lineplot(x = "Generation", y = "Blue Scores", data=data_plot, color='blue', label="Blue Opponent Score")
plt.show()

# Minmax Evaluation
id_plot = [id for id in range(len(red_scores))]
data_plot = pd.DataFrame({"Generation":id_plot, "Min Expected Score":r_min[1:]})
plt.figure()
sns.regplot(x = "Generation", y = "Min Expected Score", data=data_plot, color='red', scatter_kws={'s':5}, label="Red Policy Min Expected Score")
plt.legend()
plt.show()

# Exploitability
id_plot = [id for id in range(len(red_scores))]
data_plot = pd.DataFrame({"Generation":id_plot, "Exploitability":r_exp})
plt.figure()
sns.regplot(x = "Generation", y = "Exploitability", data=data_plot, color='red', scatter_kws={'s':5}, label="RedPolicy Exploitability")
plt.legend()
plt.show()

NameError: name 'red_scores' is not defined