# DQN notebook

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import neat
import matplotlib.pyplot as plt
from matplotlib import cm

import pickle
import multimodal_mazes
from tqdm import tqdm

import itertools
import torch
import torch.nn as nn
import torch.optim as optim

## Ideas 
* Optuna: https://colab.research.google.com/github/araffin/tools-for-robotic-rl-icra2022/blob/main/notebooks/optuna_lab.ipynb#scrollTo=E0yEokTDxhrC 

## Working

In [None]:
wm_flags = np.array(list(itertools.product([0,1], repeat=7)))[0]
wm_flags[2] = 1
agnt = multimodal_mazes.AgentDQN(location=[5,5], channels=[1,1], sensor_noise_scale=0.05, n_hidden_units=8, wm_flags=wm_flags)
agnt.parameters

In [None]:
maze = multimodal_mazes.TrackMaze(size=11, n_channels=2)

maze.generate(number=60000, noise_scale=0.0, gaps=1)
agnt.generate_policy(maze, n_steps=6)

In [None]:
n = 0
time, path = multimodal_mazes.maze_trial(mz=maze.mazes[n], mz_start_loc=maze.start_locations[n], mz_goal_loc=maze.goal_locations[n], channels=[1,1], sensor_noise_scale=0.05, drop_connect_p=0.0, n_steps=6, agnt=agnt)
print(time, path)

multimodal_mazes.plot_path(path, mz=maze.mazes[n], mz_goal_loc=maze.goal_locations[n], n_steps=6)

In [None]:
maze = multimodal_mazes.TrackMaze(size=11, n_channels=2)
maze.generate(number=1000, noise_scale=0.0, gaps=1)

fitness = multimodal_mazes.eval_fitness(genome=None, config=None, channels=[1,1], sensor_noise_scale=0.05, drop_connect_p=0.0, maze=maze, n_steps=6, agnt=agnt)
print(fitness)

In [None]:
plt.plot(agnt.gradient_norms)

In [None]:
plt.imshow(agnt.output_to_output.weight.detach())
plt.colorbar()

## Fitness vs noise

In [None]:
# Fitness vs noise
noises = np.linspace(start=0.0, stop=0.5, num=21)

wm_flags = np.array(list(itertools.product([0,1], repeat=7)))
# wm_flags = wm_flags[np.append([0], np.where(np.sum(wm_flags,1) == 1))]
wm_flags = wm_flags[[0,-1]]
colors = cm.get_cmap("plasma", len(wm_flags)).colors.tolist()

# wm_flags = np.array([[0,0,0,0,0,0,0],[0,0,0,1,0,0,0],[0,0,1,0,0,0,0]])
# colors = ["xkcd:gray", [0.039, 0.73, 0.71, 1], list(np.array([24, 156, 196, 255]) / 255)]

results = np.zeros((len(noises), len(wm_flags)))

# Generate mazes
maze = multimodal_mazes.TrackMaze(size=11, n_channels=2)
maze.generate(number=60000, noise_scale=0.0, gaps=1)

maze_test = multimodal_mazes.TrackMaze(size=11, n_channels=2)
maze_test.generate(number=1000, noise_scale=0.0, gaps=1)

# Run
for b, wm_flag in enumerate(tqdm(wm_flags)): 

    # Train
    agnt = multimodal_mazes.AgentDQN(location=[5,5], channels=[1,1], sensor_noise_scale=0.05, n_hidden_units=8, wm_flags=wm_flag)
    agnt.generate_policy(maze, n_steps=6) 

    # Test 
    for a, noise in enumerate(noises):
        results[a,b] = multimodal_mazes.eval_fitness(genome=None, config=None, channels=[1,1], sensor_noise_scale=noise, drop_connect_p=0.0, maze=maze_test, n_steps=6, agnt=agnt)

# Plotting
plt.plot([0.05, 0.05], [0,1], ':', color='k', alpha=0.5, label='Training noise')
for b, wm_flag in enumerate(wm_flags): 
    plt.plot(noises, results[:,b], color=colors[b], label=wm_flag)

plt.ylim([0, 1.05])
plt.ylabel('Fitness')
plt.xlabel('Sensor noise')
plt.legend()