# Primary Value and Learned Value Learning Algorithm

In [None]:
import dotdot
import leabra
import graphs

In [None]:
theta_min, theta_max = 0.20, 0.80

### Layers

In [None]:
# layer whose activity is set directly
timing   = leabra.Layer(5)
stimulus = leabra.Layer(1)
PVe      = leabra.Layer(1)

# layer who compute their own activities
lspec = leabra.LayerSpec(k=1)
PVi = leabra.Layer(1, spec=lspec)
LVe = leabra.Layer(1, spec=lspec)
LVi = leabra.Layer(1, spec=lspec)
DA  = leabra.Layer(1, spec=lspec)

### Connections

In [None]:
PVi_spec = leabra.ConnectionSpec(proj='full', lrule='delta') # FIXME: find lrate
LVe_spec = leabra.ConnectionSpec(proj='full', lrule='delta', lrate=0.05)
LVi_spec = leabra.ConnectionSpec(proj='full', lrule='delta', lrate=0.001)

# stimulus -> PVi, LVe, LVi
conn_stim_PVi = leabra.Connection(stimulus, PVi, spec=PVi_spec)
conn_stim_LVe = leabra.Connection(stimulus, LVe, spec=LVe_spec)
conn_stim_LVi = leabra.Connection(stimulus, LVi, spec=LVi_spec)

# timing -> PVi, LVe, LVi
conn_time_PVi = leabra.Connection(timing,   PVi, spec=PVi_spec)
conn_time_LVe = leabra.Connection(timing,   LVe, spec=LVe_spec)
conn_time_LVi = leabra.Connection(timing,   LVi, spec=LVi_spec)

# magic connections:
# * PVe -> PVi
# * PVi -> LVe
# * PVi -> LVi
# * PVi, LVe, LVi -> DA


### Network

In [None]:
network = leabra.Network(layers=[PVi, LVe, LVi], 
                         connections=[conn_stim_PVi, conn_stim_LVe, conn_stim_LVi,
                                      conn_time_PVi, conn_time_LVe, conn_time_LVi])

### Inputs

In [None]:
def trial():
    """Run a trial"""
    logs = {'PVe': [], 'PVi': [], 'LVe': [], 'LVi': [], 'DA': []}
    
    # inputs for 5 settling periods (15 cycles each)
    stim_in = [[0.5], [1.0], [1.0], [1.0], [0.5]]  # stimulus inputs
    rewd_in = [[0.5], [0.5], [0.5], [1.0], [0.5]]  # reward inputs
    for t in range(5):
        # setting inputs, reward
        timing.set_activities([0.0 for i in range(5)])
        timing.units[t].act = 1.0  # timing inputs
        stimulus.set_activities(stim_in[t])
        PVe.set_activities(rewd_in[t])
        
        # minus phase
        network.settle()
        network.end_minus_phase()
        PVe.units[0].act_m = rewd_in[t][0]
        
        # plus phase
        PVe.set_activities(rewd_in[t])
        PVi.set_activities(rewd_in[t])
        LVe.set_activities(rewd_in[t])
        LVi.set_activities(rewd_in[t])
        network.end_plus_phase()
        
        # PV_filter
        PV_filter = (PVi.activities[0] < theta_min or PVe.activities[0] < theta_min or
                     PVi.activities[0] > theta_max or PVe.activities[0] > theta_max)  # eq. A9
        
        # learning
        conn_stim_PVi.learn()
        conn_time_PVi.learn()
        if PV_filter:
            conn_stim_LVe.learn()
            conn_time_LVe.learn()
            conn_stim_LVi.learn()
            conn_time_LVi.learn()
            
        # computing dopamine
        delta_lv = LVe.units[0].act_m - LVi.units[0].act_m
        delta_pv = PVe.units[0].act_m - PVi.units[0].act_m
        DA.set_activities([delta_lv + (delta_pv if PV_filter else 0.0)])  # eq. A14

        logs['PVe'].append(PVe.units[0].act_m)
        logs['PVi'].append(PVi.units[0].act_m)
        logs['LVe'].append(LVe.units[0].act_m)
        logs['LVi'].append(LVi.units[0].act_m)
        logs['DA'].append(DA.units[0].act)

    return logs

In [None]:
for i in range(50):
    logs = trial()

In [None]:
graphs.dopamine(logs)