# DQN notebook

In [None]:
%load_ext autoreload
%autoreload 2

import numpy as np
import neat
import matplotlib.pyplot as plt
from matplotlib import cm

import pickle
import multimodal_mazes
from tqdm import tqdm

import itertools
import torch
import torch.nn as nn
import torch.optim as optim

## Ideas 
* Optuna: https://colab.research.google.com/github/araffin/tools-for-robotic-rl-icra2022/blob/main/notebooks/optuna_lab.ipynb#scrollTo=E0yEokTDxhrC 

## Working

In [None]:
wm_flags = np.array(list(itertools.product([0,1], repeat=7)))
agnt = multimodal_mazes.AgentDQN(location=[5,5], channels=[1,1], sensor_noise_scale=0.05, n_hidden_units=8, wm_flags=wm_flags[0])
agnt

In [None]:
maze = multimodal_mazes.TrackMaze(size=11, n_channels=2)

maze.generate(number=60000, noise_scale=0.0, gaps=0)
# agnt.generate_policy(maze, n_steps=6)

In [None]:
plt.imshow(maze.mazes[100][:,:,1])

In [None]:
n = 0
time, path = multimodal_mazes.maze_trial(mz=maze.mazes[n], mz_start_loc=maze.start_locations[n], mz_goal_loc=maze.goal_locations[n], channels=[1,1], sensor_noise_scale=0.05, drop_connect_p=0.0, n_steps=6, agnt=agnt)
print(time, path)

multimodal_mazes.plot_path(path, mz=maze.mazes[n], mz_goal_loc=maze.goal_locations[n], n_steps=6)

In [None]:
maze = multimodal_mazes.TrackMaze(size=11, n_channels=2)
maze.generate(number=1000, noise_scale=0.0, gaps=0)

fitness = multimodal_mazes.eval_fitness(genome=None, config=None, channels=[1,1], sensor_noise_scale=0.05, drop_connect_p=0.0, maze=maze, n_steps=6, agnt=agnt)
print(fitness)

In [None]:
plt.plot(agnt.gradient_norms)

In [None]:
plt.imshow(agnt.output_to_output.weight.detach())
plt.colorbar()

## Fitness vs noise

In [None]:
# Fitness vs noise
noises = np.linspace(start=0.0, stop=0.5, num=21)

# Small set
# wm_flags = np.array([[0,0,0,0,0,0,0], [0,0,0,0,0,0,0], [0,0,0,1,0,0,0],[0,0,1,0,0,0,0]])
# colors = ["xkcd:gray", [0.0, 0.0, 0.0, 0.5], [0.039, 0.73, 0.71, 1], list(np.array([24, 156, 196, 255]) / 255)]

# Full set 
wm_flags = np.array(list(itertools.product([0,1], repeat=7)))
wm_flags = np.vstack((wm_flags[0], wm_flags))
colors = cm.get_cmap("plasma", len(wm_flags)).colors.tolist()

results = np.zeros((len(noises), len(wm_flags)))

# Generate mazes
maze = multimodal_mazes.TrackMaze(size=11, n_channels=2)
maze.generate(number=100, noise_scale=0.0, gaps=0)

maze_test = multimodal_mazes.TrackMaze(size=11, n_channels=2)
maze_test.generate(number=100, noise_scale=0.0, gaps=0)

# Run
for b, wm_flag in enumerate(tqdm(wm_flags)): 

    # Control architecture 
    if b != 1: 
        n_hidden_units = 8
    else:
        n_hidden_units = 34 
    
    # Train
    agnt = multimodal_mazes.AgentDQN(location=[5,5], channels=[1,1], sensor_noise_scale=0.05, n_hidden_units=n_hidden_units, wm_flags=wm_flag)
    agnt.generate_policy(maze, n_steps=6) 

    # Test 
    for a, noise in enumerate(noises):
        results[a,b] = multimodal_mazes.eval_fitness(genome=None, config=None, channels=[1,1], sensor_noise_scale=noise, drop_connect_p=0.0, maze=maze_test, n_steps=6, agnt=agnt)

# Plotting
plt.plot([0.05, 0.05], [0,1], ':', color='k', alpha=0.5, label='Training noise')
for b, wm_flag in enumerate(wm_flags): 
    plt.plot(noises, results[:,b], color=colors[b], label=wm_flag)

plt.ylim([0, 1.05])
plt.ylabel('Fitness')
plt.xlabel('Sensor noise')
plt.legend()

In [None]:
# Fitness vs noise AUC 
auc = np.trapz(y=results.T, x=noises, axis=1)
idxs = np.argsort(auc)

fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(5*3,5), sharex=False, sharey=True)
for b, idx in enumerate(idxs): 
    ml, sl, _ = plt.stem(b, auc[idx])
    ml.set_color('k')
    sl.set_color('k')
# plt.xticks(range(len(wm_flags)), policies, rotation='vertical')
plt.ylabel('AUC');

## Loading

In [None]:
import os
paths = ['../Results/test' + str(n) + '/' for n in range(38,48)]
print(paths)

noises = np.linspace(start=0.0, stop=0.5, num=21) # noises,
results = np.zeros((len(noises), 129, len(paths))) * np.nan # noises x architectures x repeats
wm_flags = np.zeros((129, 7, len(paths))) * np.nan # architectures x flags x repeats
n_parameters = np.zeros((129, len(paths))) * np.nan # architectures x repeats
auc = np.zeros((129, len(paths))) * np.nan # architectures x repeats 

for a, path in enumerate(tqdm(paths)):

    # Load data 
    for f in os.listdir(path):
        if f.endswith(".pickle"):
            with open(path + f, 'rb') as file:
                agnt = pickle.load(file)
                idx = int(os.path.splitext(f)[0])

                results[:, idx, a] = agnt.results
                wm_flags[idx, :, a] = agnt.wm_flags
                n_parameters[idx, a] = agnt.n_parameters
                auc[idx, a] = np.trapz(y=agnt.results, x=noises)

In [None]:
# Data 
x = n_parameters.reshape(-1) # networks,
y = auc.reshape(-1) # networks,

y = y[np.isnan(x) == False]
x = x[np.isnan(x) == False]

idx = np.argsort(x)

# Poly fit 
curve = np.poly1d(np.polyfit(x[idx],y[idx],deg=2))
plt.plot(x[idx], curve(x[idx]), color='g')

plt.scatter(x, y, color='k', marker='.', alpha=0.25)

plt.xlabel('Number of parameters')
plt.ylabel('AUC')

In [None]:
plt.scatter(np.nanmax(n_parameters, axis=1), np.nanmax(auc, axis=1), color='k', marker='.', alpha=0.25)

In [None]:
# Fitness vs noise AUC 
from matplotlib.patches import Rectangle

interest = [1,128]
i_cols = ['xkcd:orange', 'xkcd:purple']
wm_f_labels = ['L0', 'L1', 'L2', 'S0', 'S1', 'B0', 'B1']

idxs = np.argsort(np.nanmax(auc,axis=1))
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(5*3,5), sharex=True, sharey=False)

plt.sca(ax[0])
plt.plot([range(129), range(129)], [np.nanmin(auc,axis=1)[idxs], np.nanmax(auc,axis=1)[idxs]], color='k', alpha=0.25);
plt.ylabel('AUC');
plt.xticks([])

plt.sca(ax[1])
for i in range(7):
    plt.scatter(range(129), np.ones(129) * i, c=[(0, 0, 0, alpha) for alpha in np.nanmax(wm_flags, axis=2)[idxs,i]], s=5)
plt.yticks(range(7), wm_f_labels)
plt.xlabel('Architectures')

for a, i in enumerate(interest):
    plt.sca(ax[0])
    plt.plot([np.where(idxs == i)[0], np.where(idxs == i)[0]], [np.nanmin(auc[i]), np.nanmax(auc[i])], color=i_cols[a])  

    ax[1].add_patch(Rectangle((np.where(idxs == i)[0][0] -0.25, -0.95), 0.5, 7.05, color=i_cols[a], alpha=0.5))

In [None]:
# Fitness vs noise 
plt.plot([0.05, 0.05], [0,1], ':', color='k', alpha=0.5, label='Training noise')

plt.plot([], [], 'k', alpha=0.1, label='All architectures')
plt.plot(noises, results, 'k', alpha=0.1);
plt.plot(noises, results[:,idxs[-1]], 'xkcd:dark seafoam', label='Most robust')
plt.plot(noises, results[:,-1], 'xkcd:orange', label='Fully recurrent')
plt.plot(noises, results[:,1], 'xkcd:purple', label='Feedforward (L)')

plt.ylim([0, 1.05])
plt.ylabel('Fitness')
plt.xlabel('Sensor noise')
plt.legend()