In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
class Maze(object):
        
    staff_y = [0,60]
    staff_x = [0,10]
    bar_y = [50,60]
    bar_x = [-50,60]
    target_x = [-50,-30]
    target_y = [50,60]
    pickup_x = [40,60]
    pickup_y = [50,60]
    
    def __init__(self):
        pass
        
    def valid_x(self,x):
        if self.bar_x[0]<x<self.bar_x[1]:
            inBar_x = True
        else: inBar_x = False
        if self.staff_x[0]<x<self.staff_x[1]:
            inStaff_x = True
        else: inStaff_x = False
        return inBar_x, inStaff_x
        
    def valid_y(self,y):
        if self.bar_y[0]<y<self.bar_y[1]:
            inBar_y = True
        else: inBar_y = False
        if self.staff_y[0]<y<self.staff_y[1]:
            inStaff_y = True
        else: inStaff_y = False
        return inBar_y, inStaff_y
    
    def rough_location(self,position):
        inBar_x, inStaff_x = self.valid_x(position[0])
        inBar_y, inStaff_y = self.valid_y(position[1])
        inBar = inBar_x and inBar_y
        inStaff = inStaff_x and inStaff_y
        return inBar, inStaff
    
    def in_reward(self,position):
        if position[0]<self.target_x[1]:
            return True
        else:
            return False        
    
    def in_pickup(self,position):
        if position[0]>self.pickup_x[0]:
            return True
        else:
            return False
    
    
class Agent(object):
    
    def __init__(self, pos = [5,0], eps = .1):
        self.position = np.array(pos)
        self.maze = Maze()
        self.inBar = False
        self.inStaff = True
        self.alpha = False
        self.rewarded = False
        self.placeCells = self.make_places()
        self.placeFieldSize = 5
        self.numberIn = len(self.placeCells.T)
        self.numberOut = 4 #given as value to start with
        self.inputLayer = np.zeros((2,self.numberIn))
        self.outputLayer = np.zeros(self.numberOut)
        self.weights = np.random.random(size=(len(self.outputLayer),len(self.inputLayer.flatten())))
        self.directions = 2*np.pi*np.arange(0,self.numberOut)/self.numberOut
        self.eps = eps


    def get_direction(self):
        ###this is gonna change
        direction = 2*np.pi*np.random.random()
        return direction
    
    def make_places(self, offset = 2.5, seperation = 5):
        """Create the centers of the place cell activity.
        input:
            offset - how far from the edge the first place cell should
            seperation - the distance between adjacent centers measured in one dimension
        For both the staff and the bar of the T maze meshgrids are created
        (in such a way as to not overlap!) which are flattened and then h- and vstacked
        to create a 2 x #(place cells) array that stores the centers
        The offset is only taken into account on one side
        """
        x = np.arange(offset,self.maze.bar_x[-1],seperation)
        y = np.arange(offset,self.maze.bar_y[-1],seperation)
        XX, YY = np.meshgrid(x,y)
        bargrid = np.vstack((XX.flatten(),YY.flatten()))
        x = np.arange(offset,self.maze.staff_x[-1],seperation)
        y = np.arange(offset,self.maze.bar_y[0],seperation)
        XX, YY = np.meshgrid(x,y)
        staffgrid = np.vstack((XX.flatten(),YY.flatten()))
        cells = np.hstack((bargrid,staffgrid))
        return cells
    
    def input_layer(self):
        """Compute the activation of the input layer for a given position and state and update the variable
        """
        activation = np.zeros(np.shape(self.inputLayer))
        for i in range(len(self.inputLayer)):
            kronecker = bool(i)==self.alpha
            activation[i] = kronecker*np.exp(-np.sum((self.placeCells.T-self.position)**2,axis=1)/(2*self.placeFieldSize**2))
        self.inputLayer = activation
        
    def output_layer(self):
        self.outputLayer = np.dot(self.weights,self.inputLayer.flatten())
    
    def choose_direction(self):
        if np.random.random()<self.eps:
            return self.directions[np.random.randint(self.numberOut)]
        else:
            self.input_layer()
            self.output_layer()
            a = np.argmax(self.outputLayer)
            return self.directions[a]
        
    def valid_move(self,new_position):
        """Checks
        """
        inBar, inStaff = self.maze.rough_location(new_position)
        if self.inBar==inBar or self.inStaff==inStaff:
            if inBar or inStaff:
                self.inBar = inBar
                self.inStaff = inStaff
                return True
            else: return False
        else: return False
        
    def move(self):
        valid = False
        while not valid:
            direction = self.choose_direction()
            distance = np.random.normal(loc=3,scale=1.5)
            move = np.array([np.cos(direction)*distance,np.sin(direction)*distance])
            new_position = self.position+move
            valid = self.valid_move(new_position)
        self.position = new_position
    
        if not self.alpha:
            self.alpha = self.maze.in_pickup(self.position)
        else:
            if self.maze.in_reward(self.position):
                self.rewarded = True
                
                
def simulate():
    mouse = Agent()
    positions = []
    k=0
    a = False
    while not mouse.rewarded:
        positions.append(mouse.position)
        mouse.move()
        if mouse.alpha and not a:
            pickupsteps = k
            print('found reward after %.d steps'%(k))
            a = True
        k+=1
        if k%1000==0:
            print(k)
    print('')
    print('made it to target area after %.d steps'%(k))
    positions = np.asarray(positions)
    plt.scatter(positions[:,0],positions[:,1],marker='.',linewidths=0)
    plt.gca().set_aspect('equal')
    plt.show()
    return pickupsteps, k

In [4]:
mouse = Agent()

In [407]:
mouse.move()
mouse.choose_direction()


3.14159265359
3.14159265359
1.57079632679


3.1415926535897931

In [5]:
simulate()

found reward after 688 steps
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000


KeyboardInterrupt: 