## Background

Attractor networks provide some level of neural plausibility beyond the standard drift-diffusion model
for binary decision making ([Wang 2008](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2710297/)). These nonlinear models represent competing populations
of neurons which are working to come to a decision and are easily extendable to $n$ decision paradigms.
One of the limitations of these models is their lack of capability to learn from experience. Learning,
however, can be addressed through techniques developed in reinforcement learning ([Sutton and Barto
2018](https://drive.google.com/file/d/1xeUDVGWGUUv1-ccUMAZHJLej2C7aAFWY/view)). 

Previous efforts to integrate learning and decision making include [Pederson et al. 2016](https://www.ncbi.nlm.nih.gov/pubmed/27966103) who focused on integrating learning with the drift diffusion model and [Dunovan and Verstynen 2016](https://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=2ahUKEwitu57LovPeAhUr1lkKHW9cAsgQFjAAegQIBxAB&url=https%3A%2F%2Fwww.frontiersin.org%2Farticles%2F10.3389%2Ffnins.2016.00106&usg=AOvVaw1OVlm6Z0LBWt6I1UweF_X9) who introduced the **Believer-Skeptic** model. 

For this project, I propose an algorithm to combine Q learning with attractor networks to
simulate the dynamics of decision making in a learning environment.

## Problem

## Model

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import gym 
import torch
from torch import nn
import seaborn as sns
import pandas as pd
import os
import copy
import sys
from collections import namedtuple, deque


%matplotlib inline

In [27]:
class QNetwork(nn.Module):

    def __init__(self, env, n_hidden_layers=1, 
        n_hidden_nodes=4, learning_rate=0.001, bias=False, 
        activation_function='relu', dqn_algo='vanilla', 
        device='cpu', *args, **kwargs):
        super(QNetwork, self).__init__()
        
        self.env = env
        algo_list = ['vanilla', 'double', 'dueling']
        self.dqn_algo = dqn_algo.lower()
        assert self.dqn_algo in algo_list, \
            "dqn_algo {} not recognized, provide one of: {}.".format(dqn_algo, algo_list)
            
        self.n_inputs = self.env.observation_space.shape[0]        
        self.n_outputs = self.env.action_space.n 
        # Allow custom layer definition
        if len(args) > 0:
            self.n_hidden_layers = len(args)
        else:
            self.n_hidden_layers = n_hidden_layers
        self.n_hidden_nodes = n_hidden_nodes
        self.bias = bias
        self.actions = np.arange(self.n_outputs)
        self.learning_rate = learning_rate
        self.activation_function = activation_function.lower()
        self.device = device

        # Build network
        layer_list = getLayersAndNodes(self, args)
        self.layers = buildNetwork(self, layer_list)
        self.net = nn.Sequential(self.layers)
        self.net.apply(xavierInit)

        if dqn_algo == 'dueling':
            dueling_list = getLayersAndNodes(self, args)
            dueling_list[-1] = 1
            self.dueling_layers = buildNetwork(self, dueling_list)
            self.dueling_net = nn.Sequential(self.dueling_layers)
            self.dueling_net.apply(xavierInit)

        if self.device == 'cuda':
            self.net.cuda()
            if dqn_algo == 'dueling':
                self.dueling_net.cuda()

        self.optimizer = torch.optim.Adam(self.parameters(), 
            lr=self.learning_rate)
     
    def getQValues(self, state):
        state = flattenDict(state)
        try:
            state_t = torch.FloatTensor(state).to(device=self.device)
        except TypeError:
            print(len(state))
            print(state)
        if self.dqn_algo == 'vanilla':
            return self.net(state_t)
        elif self.dqn_algo == 'double':
            return self.net(state_t)
        elif self.dqn_algo == 'dueling':
            A = self.net(state_t)
            V = self.dueling_net(state_t)
            return V + A - A.mean()

    def getAction(self, state):
        qval, action = torch.max(self.getQValues(state), dim=-1)
        return qval.detach(), action.detach()

In [28]:
_parameters = pd.read_csv('project/data/CartPole-v0/dqn/20181128_1928/parameters.txt')
parameters = pd.DataFrame(_parameters['value'].values.reshape(1, -1), 
                          columns=_parameters['parameters'])

In [29]:
env = gym.make(parameters['env'][0])
net = QNetwork(env,
               hl=int(parameters['hl'][0]),
               hn=int(parameters['hn'][0]),
               learning_rate=float(parameters['lr'][0]),
               bias=bool(parameters['bias'][0]), 
               activation_function=parameters['actFunc'][0],
               dqn_algo=parameters['dqnAlgo'][0])

NameError: name 'getLayersAndNodes' is not defined

In [24]:
type(parameters['hl'][0])

str

## Results

## Conclusions