# Plot results for Aintelope
Run these blocks for all tests, then scroll to the title you're interested in:


In [4]:
%%capture
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
from matplotlib import cm
import matplotlib.collections as mcoll
import matplotlib.path as mpath

from torch import Tensor

import dateutil.parser as dparser
import sys
import os

import seaborn as sns

from aintelope.training.lightning_trainer import DQNLightning
from aintelope.agents.memory import ReplayBuffer
import aintelope.agents
from aintelope.agents import get_agent_class
#from aintelope.agents.inference_agent import InferenceAgent
from aintelope.environments.savanna_gym import SavannaGymEnv
from omegaconf import DictConfig, OmegaConf
from hydra import initialize, initialize_config_module, initialize_config_dir, compose
import hydra
from hydra.core import global_hydra

In [5]:
root_dir = os.path.split(os.path.split(os.getcwd())[0])[0]

if root_dir not in sys.path:
    sys.path.append(root_dir)

In [19]:
outputs_dir = root_dir+'/outputs/' 
available_records = os.listdir(outputs_dir)
print("Existing training runs", available_records)

dirs = [os.path.join(outputs_dir, f) for f in available_records] # add path to each file
dirs.sort(key=lambda x: os.path.getmtime(x))

global_hydra.GlobalHydra.instance().clear()
with initialize(version_base=None, config_path="../config"):
    #conf_dir = root_dir+'/aintelope/config/config_experiment.yaml'
    conf_dir = 'config_experiment.yaml'
    cfg = compose(config_name=conf_dir, overrides=[])  #OmegaConf.load(conf_dir) 
    cfg_base = compose(config_name=conf_dir, overrides=["hparams.agent_id=q_agent","hparams.agent_params.target_instincts=[]"])
    cfg_inst = compose(config_name=conf_dir, overrides=["hparams.agent_id=instinct_agent","hparams.agent_params.target_instincts=['smell']"])

print(cfg_inst)
print(cfg)
assert len(dirs) > 0, "No trainings have been run! make run-training* first"

Existing training runs []
{'timestamp': '${now:%Y%m%d%H%M%S}', 'experiment_name': 'hunger', 'experiment_dir': 'outputs/${experiment_name}_${timestamp}/', 'trainer_params': {'resume_from_checkpoint': False, 'num_workers': 4, 'max_epochs': 10, 'checkpoint': '${experiment_dir}/checkpoints/', 'device': 'cpu', 'verbose': False}, 'hparams': {'batch_size': 16, 'lr': 0.001, 'env': 'savanna-gym-v2', 'env_entry_point': 'aintelope.environments.savanna_gym:SavannaGymEnv', 'env_type': 'gym', 'model': 'dqn', 'agent_id': 'instinct_agent', 'gamma': 0.99, 'sync_rate': 10, 'replay_size': 99, 'warm_start_size': 100, 'eps_last_frame': 1000, 'eps_start': 1.0, 'eps_end': 0.01, 'episode_length': 1010, 'warm_start_steps': 100, 'log_figures_every_n_epochs': 5, 'every_n_epochs': 3, 'env_params': {'num_iters': 1000, 'map_min': 0, 'map_max': 5, 'render_mode': None, 'render_map_max': 5, 'amount_agents': 1, 'amount_grass_patches': 1, 'amount_water_holes': 0}, 'agent_params': {'target_instincts': ['smell']}}}
{'time

AssertionError: No trainings have been run! make run-training* first

# Old plots

In [None]:
latest_exp_dir = dirs[0]
print(latest_exp_dir)
print(dparser.parse(latest_exp_dir,fuzzy=True))
df = pd.read_csv(latest_exp_dir+"/memory_records.csv")
print(df.columns)

In [None]:
%%capture
'''
WIP, plot what the agent sees (needs changes to InferenceAgent etc.)
Check action values per location. Now expected reward for moving into location, but could also be
eating in any location, or of course mapping where the food/agents are. 
'''
#cfg = OmegaConf.load(conf_dir_base)

# load environment agent
env = SavannaGymEnv(env_params=cfg.hparams.env_params)
env.reset() #this is also init...
# get the brains from memory checkpoints
model = DQNLightning.load_from_checkpoint(latest_exp_dir+"/checkpoints/last.ckpt")
# disable randomness, dropout, etc...
model.eval()

In [None]:
from collections import namedtuple

keys = (["agent_coords"] + 
        [f"grass_patch_{i}" for i in range(env.metadata["amount_grass_patches"])] + 
        [f"water_hole_{i}" for i in range(env.metadata["amount_water_holes"])])
StateTuple = namedtuple("StateTuple", {k: np.ndarray for k in keys})

x = []
y = []
event_x = []
event_y = []
event_type = []
food_x = []
food_y = []
water_x = []
water_y = []
for _ ,row in df.iterrows():
    
    state = eval(row['state'])
    #print(state)
    x.append(state[0][0])
    y.append(state[0][1])
    
    #refactor
    food_x.append(state[1][0])
    food_y.append(state[1][1])
    #food_x.append(state[2][0])
    #food_y.append(state[2][1])    

    if row['instinct_events'] != '[]':
        event_x.append(x[-1])
        event_y.append(y[-1])
        event_type.append(row['instinct_events'])

In [None]:
agent_df = pd.DataFrame(data={'x':x, 'y':y})
print(agent_df.head(), len(agent_df))

food_df = pd.DataFrame(data={'x':food_x, 'y':food_y})
print(food_df.head(), len(food_df))

#water_df = pd.DataFrame(data={'x':water_x, 'y':water_y})
#print(water_df.head(), len(water_df))

In [None]:
event_df = pd.DataFrame(data={'x':event_x, 'y':event_y, 'event_type':event_type})
print(len(event_df))
print(event_df.head())

In [None]:
plt.plot(agent_df['x'], agent_df['y'], '.r-')
plt.plot(food_df['x'], food_df['y'], '.g', markersize=15)
#plt.plot(water_df['x'], water_df['y'], '.b', markersize=15)
plt.show()

In [None]:
# plot reward received over time
df['reward'].plot()


In [None]:
color_map = 'autumn' # starts yellow, goes orange, then red
n_points = len(agent_df)

fig = plt.figure()
ax1 = fig.add_subplot(111) 
cm = plt.get_cmap(color_map)
for i in range(10):
    ax1.set_prop_cycle('color', cm(np.linspace(0, 1, n_points - 1, endpoint=False)))
    for i in range(n_points - 1):
        plt.plot(agent_df['x'][i:i+2], agent_df['y'][i:i+2])
plt.plot(food_df['x'], food_df['y'], '.g', markersize=15)
#plt.plot(water_df['x'], water_df['y'], '.b', markersize=15)
plt.show()

# Valuemaps for actions
Note, these maps are new ones and don't correlate with the above ones as we randomly regenerate them.

In [None]:
%%capture
'''
WIP, plot what the agent sees (needs changes to InferenceAgent etc.)
Check action values per location. Now expected reward for moving into location, but could also be
eating in any location, or of course mapping where the food/agents are. 
'''
#cfg = OmegaConf.load(conf_dir)

# load environment agent
env = SavannaGymEnv(env_params=cfg.hparams.env_params)
env.reset() #this is also init...
# get the brains from memory checkpoints
model = DQNLightning.load_from_checkpoint(latest_exp_dir+"/checkpoints/last.ckpt")
# disable randomness, dropout, etc...
model.eval()

In [None]:
# move the agent into each square and ask for its values for each action, then add that direction into the map
valuemap = np.zeros((env.metadata['map_max']+2,env.metadata['map_max']+2,4))
agent = env.agents[0]

ACTION_MAP = np.array([[0, 1], [1, 0], [0, -1], [-1, 0]]) # This is a copy from savanna.py, should be an accessible param
for x in range(0, env.metadata['map_max']):
    for y in range(0, env.metadata['map_max']):
        if (env.grass_patches == [x,y]).all(1).any():
            continue
        if (env.water_holes == [x,y]).all(1).any():
            continue
        env.set_agent_position(agent, np.array([x,y]))
        observation = env.observe(agent)
        #print(env.agent_states[agent])
        action_vals = model(Tensor(observation)).detach().numpy()
        offset = ACTION_MAP
        for action in range(len(ACTION_MAP)):
            x_ = offset[action][0]+x
            y_ = offset[action][1]+y
            valuemap[x_,y_,action] = action_vals[action]
            
valuemap = np.sum(valuemap,2)/len(ACTION_MAP)

#print(valuemap)

In [None]:
sns.heatmap(valuemap[1:-1,1:-1])

In [None]:
#env.render() isnt working atm
maps = np.zeros((env.metadata['map_max'],env.metadata['map_max']))
for grs in env.grass_patches:
    print(grs[0])
    maps[int(grs[0]),int(grs[1])] = 2.0
for wtr in env.water_holes:
    maps[int(wtr[0]),int(wtr[1])] = 4.0
sns.heatmap(maps)
# RED FOOD, LIGHT water

In [None]:
env.set_agent_position(agent, np.array([2,2]))
observation = env.observe(agent)
action_vals = model(Tensor(observation)).detach().numpy()
print(action_vals)

# Model performance plots
These plots don't have the exploration bonus as confabulators (such as epsilon-greedy).

Train the model N times, then change n_latest to this N and run the block.

In [None]:
# Run each model for 10 different resets, with 10 different locations on the map.
# Gather the cumulative reward, -1 has to be given on each step though
# reset each time the food is found
def testrun(model,env,cfg):
    agent = get_agent_class(cfg.hparams.agent_id)(
        env,
        model,
        ReplayBuffer(cfg.hparams.replay_size),
        0,
        cfg.hparams.agent_params,
    )
    epsilon = 0.0
    device = "cpu"
    start_pos = [[0,0],
                [env.metadata['map_max'],env.metadata['map_max']],
                [env.metadata['map_max'],0],
                [0,env.metadata['map_max']]
                ] #list of starting positions for agent, to test robustly each model
    scores = []
    rewards = []
    for j in range(len(start_pos)):
        agent.reset() # !!!
        env.set_agent_position(agent, np.array(start_pos[j]))
        for i in range(20):
            reward, done, score = agent.play_step(model, epsilon, device) # !!!
            rewards.append(reward) 
            scores.append(score)
            
    return sum(scores), sum(rewards), agent


In [None]:
%%capture
# Load the models from checkpoints and run them through test envs to measure their performance 
# without training parameters (usually exploration, like epsilon). statistical significance per run
def calc_results_per_model(test_dirs, test_cfg):
    models = []
    for exp_dir in test_dirs:
        mod_dir = os.listdir(exp_dir+"/checkpoints/")
        runs_dir = [os.path.join(exp_dir+"/checkpoints/", m) for m in mod_dir]
        runs_dir.sort(key=lambda x: os.path.getmtime(x))
        models.append(runs_dir)

    env = SavannaGymEnv(env_params=test_cfg.hparams.env_params)
    scores = np.zeros([len(models[0]),len(models)]) 
    rewards = np.zeros([len(models[0]),len(models)])
    for i in range(len(models)): # different runs for statistical significance
        for j in range(len(models[0])): # model as epochs progress
            model = DQNLightning.load_from_checkpoint(models[i][j])
            model.eval()
            # run model
            result, reward, agent = testrun(model, env, test_cfg)
            rewards[j,i] = reward
            scores[j,i] = result
    return scores, rewards, agent

In [None]:
%%capture
# Split here for which ones in outputs are from smell and which are baseline
base_dirs = dirs[0:1]
inst_dirs = dirs[1:2]
b_scores, b_rewards, b_agent = calc_results_per_model(base_dirs, cfg_base)
i_scores, i_rewards, i_agent = calc_results_per_model(inst_dirs, cfg_inst)

In [None]:
print(b_agent,i_agent.target_instincts)

In [None]:
# Plot results
X = np.arange(0, b_scores.shape[0]) 

plt.plot(X, b_rewards, color='r', label='baseline') 
plt.plot(X, i_rewards, color='g', label='instinct') 
plt.boxplot(data, labels=labels)
plt.boxplot(data, labels=labels)

plt.xlabel("Epoch") 
plt.ylabel("Reward") 
plt.title("Reward comparison") 
plt.legend() 
plt.show() 

In [None]:
# Add statsign boxes
# figure out what the rewards should be from agent
# figure out what the score should be
# do 10 epochs
# https://matplotlib.org/3.1.1/gallery/statistics/boxplot.html

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# fake data
np.random.seed(19680801)
data = np.random.lognormal(size=(37, 4), mean=1.5, sigma=1.75)
labels = list('ABCD')
fs = 10  # fontsize

fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(6, 6), sharey=True)
axs[0, 0].boxplot(data, labels=labels)
axs[0, 0].set_title('Default', fontsize=fs)

axs[0, 1].boxplot(data, labels=labels, showmeans=True)
axs[0, 1].set_title('showmeans=True', fontsize=fs)

axs[0, 2].boxplot(data, labels=labels, showmeans=True, meanline=True)
axs[0, 2].set_title('showmeans=True,\nmeanline=True', fontsize=fs)

axs[1, 0].boxplot(data, labels=labels, showbox=False, showcaps=False)
tufte_title = 'Tufte Style \n(showbox=False,\nshowcaps=False)'
axs[1, 0].set_title(tufte_title, fontsize=fs)

axs[1, 1].boxplot(data, labels=labels, notch=True, bootstrap=10000)
axs[1, 1].set_title('notch=True,\nbootstrap=10000', fontsize=fs)

axs[1, 2].boxplot(data, labels=labels, showfliers=False)
axs[1, 2].set_title('showfliers=False', fontsize=fs)

for ax in axs.flat:
    ax.set_yscale('log')
    ax.set_yticklabels([])

fig.subplots_adjust(hspace=0.4)
plt.show()
