## RDD learning of XOR function

In [1]:
%pylab inline
pylab.rcParams['figure.figsize'] = (10, 6)

#%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import numpy.random as rand
import pandas as pd
import seaborn as sns
from matplotlib import animation, rc
from IPython.display import HTML

from lib.mu_w_2_20 import mu_mean
from lib.lif import LIF, ParamsLIF, LIF_3layer

Populating the interactive namespace from numpy and matplotlib


In [2]:
import numpy as np
import numpy.random as rand
import pandas as pd

from lib.mu_w_2_20 import mu_mean
from lib.lif import LIF, ParamsLIF, LIF_3layer

In [3]:
beta_dim = 3               #Dimension of learnt vector
dt = 0.001                 #Simulation timestep
tsim = 500                 #Total simulation time
DeltaT = 100               #Number of timebins over which learning rule is applied
Tsim = DeltaT*dt           #Block sim time
T = int((tsim/dt)/DeltaT)  #Total number of learning blocks
tinput = 1                 #Time for input to be held fixed
Tinput = int(tinput/Tsim)  #Number of blocks for input to be held fixed
c = 0.01                   #Correlation coefficient
n = 10                     #Number of neurons
q = 1                      #Number of output neurons
sigma = 10                 #Their noise level
mu = 1                     #Spiking threshold
tau = 1                    #Neuron timescale
eta = 1e-1                 #Cost gradient learning rate (RDD)
epsilon = 5e4              #Weight learning rate (RDD)
p = .5                     #Learning window
tau_s = 0.20               #Output filter timescale

alpha = 200                #Cost of spiking when shouldn't
beta = -300                #Reward for spiking when should
wmax = 20                  #Max weight for hidden layer
wmin = 2                   #Min weight for hidden layer
umax = 20                  #Max weight for output layer
umin = 2                   #Min weight for output layer

In [4]:
t_filter = np.linspace(0, 1, 2000)
exp_filter = np.exp(-t_filter/tau_s)
exp_filter = exp_filter/np.sum(exp_filter)
ds = exp_filter[0]

params = ParamsLIF(n = n, tau = tau, sigma = sigma, c = c)
lif = LIF_3layer(params, tau_s = tau_s, t = Tsim, t_total = tsim, alpha = 50)

def xor(x):
    return x[0] != x[1]

def convolve_online(s, h, kernel, t_offset):
    for idx in np.nonzero(h)[0]:
        st = t_offset + idx
        en = min(s.shape[0], st + kernel.shape[0])
        ln = en-st
        #print st,en,ln,idx,t_offset
        s[st:en] += kernel[0:ln]

In [5]:
#Setup neurons
Vo = np.zeros((1, beta_dim))
Vh = np.zeros((n, beta_dim))
dVo = np.zeros(Vo.shape)
dVh = np.zeros(Vh.shape)

inputs = [[0, 0], [0, 1], [1, 0], [1, 1]]
nI = len(inputs)

#mean_cost = np.zeros(T)
#mean_output = np.zeros(T)

Whist = np.zeros((lif.W.shape[0], lif.W.shape[1], T))
Uhist = np.zeros((lif.U.shape[0], lif.U.shape[1], T))
Vohist = np.zeros((Vo.shape[0], Vo.shape[1], T))
Vhhist = np.zeros((Vh.shape[0], Vh.shape[1], T))

x_inputs_idx = np.zeros(T)

bt_o = False
bt_h = [False]*n

s_rdd = np.zeros(int(tsim/dt))

## Learning with RDD

In [None]:
#This is the total time
for j in range(T):
    #Choose input at random for TI bins (say, a second's worth of simulation)
    #x_input = np.random.rand(2)<0.5

    #if x_input[0] == 0:
    #    if x_input[1] == 0:
    #        x_inputs_idx[idx] = 0
    #    else:
    #        x_inputs_idx[idx] = 1
    #else:
    #    if x_input[1] == 0:
    #        x_inputs_idx[idx] = 2
    #    else:
    #        x_inputs_idx[idx] = 3

    #Cycle through inputs
    x_input = np.array(inputs[(j/Tinput)%4])
    
    #Simulate LIF for RDD
    print("t = %d"%j)
    (vh_full, hh, vo_full, ho, uh_full, uo_full) = lif.simulate(x_input)
        
    #Compute cost function
    t_offset = j*DeltaT
    convolve_online(s_rdd, ho, exp_filter, t_offset)
    dcost = alpha if xor(x_input) else beta
    cost = dcost*s_rdd[t_offset+DeltaT]
    #cost = dcost*np.mean(s_rdd[t_offset:t_offset+DeltaT])
        
    um = uh_full.reshape((n, 1, DeltaT))
    uh = np.max(um, 2)    
    um = uo_full.reshape((q, 1, DeltaT))
    uo = np.max(um, 2)

    #Update the hidden layer
    for k in range(n):
        if (uh[k,0] > mu - p) & (uh[k,0] < mu):
            if bt_h[k] == False:
                ahat = np.array([1, 0, 0])
                dVh[k,:] += (np.dot(Vh[k,:], ahat)+cost)*ahat
                bt_h[k] = True
        elif (uh[k,0] < mu + p) & (uh[k,0] >= mu):
            #Only do the update when firing...
            if bt_h[k] == True:
                ahat = np.array([1, 0, 0])
                dVh[k,:] += (np.dot(Vh[k,:], ahat)-cost)*ahat
                Vh[k,:] -= eta*dVh[k,:]
                dVh[k,:] = np.zeros((1,beta_dim))
                bt_h[k] = False

        #Update weights according to W for RDD
        #                             #######################################
        #Need to know the relation between mean input, sigma, and firing rate
        #lif.W[k,:] += epsilon*Vh[k,0]*mu_mean_xor_hidden(lif.W[k,:], x_input)
        lif.W[k,:] = np.maximum(np.minimum(lif.W[k,:], wmax), wmin)
        Whist[k,:,j] = lif.W[k,:]
            
    #Update the output layer
    if (uo[0,0] > mu - p) & (uo[0,0] < mu):
        if bt_o == False:
            #ahat = np.array([1, 0, -(u[k,0]-mu)])
            ahat = np.array([1, 0, 0])
            dVo[0,:] += (np.dot(Vo[0,:], ahat)+cost)*ahat
            bt_o = True
    elif (uo[0,0] < mu + p) & (uo[0,0] >= mu):
        #Only do the update when firing...
        if bt_o == True:
            #ahat = np.array([1, -(u[k,0]-mu), 0])
            ahat = np.array([1, 0, 0])
            dVo[0,:] += (np.dot(Vo[0,:], ahat)-cost)*ahat
            Vo[0,:] -= eta*dVo[0,:]
            dVo[0,:] = np.zeros((1,beta_dim))
            bt_o = False
    
    #Update weights according to U for RDD
    #                        #########################################
    #lif.U += epsilon*Vo[0,0]*mu_mean_xor_output(lif.U, lif.W, x_input)
    lif.U = np.maximum(np.minimum(lif.U, umax), umin)
    Uhist[:,:,j] = lif.U
    Vohist[:,:,j] = Vo
    Vhhist[:,:,j] = Vh