In [8]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Implements the supervised gradident-based learning algorithm specified by 
Reyes-Galaviz et al. (2017).

"""

import numpy as np

input_count = 8 
hidden_count = 5 
output_count = 1

eta = 0.8

data = np.load("record_pairs.npy")
data2 = np.nan_to_num(data)

# Quality thresholds
tau_1 = 0.4
tau_2 = 0.7

# The MLP model has 8 inputs and 5 nodes in the hidden layer. It follows 
# the Model 2 setup presented by Reyes-Galaviz et al. Hidden nodes aggregate
# values calculated by running each of 8 comparison functions over 5 data
# fields (date, ISBN, title, creator, contributor)
r = np.random.randn(input_count, hidden_count)
w = np.random.randn(hidden_count, output_count)

# Sigmoid activation function
def activate(a):
    return 1 / (1 + np.exp(-a))

# Partial derivatives for output layer
def Q_tau_1(s):    
    return np.sum(s**2)

def Q_tau_2(s):
    return np.sum((1 - s)**2)

def Q(d, s, tau_1, tau_2):
    if d[5:6, 1:2] == 0:        
        return Q_tau_1(s)
    else:
        return Q_tau_2(s)

def dQ_ds(d, s, tau_1, tau_2):    
    if d[5:6, 1:2] == 0:        
        return 2 * s
    else:
        return 2 * (s - 1)        

def ds_dl(s):
    return s * (1 - s)

# Partial derivatives for hidden layer
def dy_dp(y):
    return y * (1 - y)

# 150 iterations for training model
for i in range(1):      
    
    for u in data2:
    
        # Net input of hidden layer
        p = np.dot(u[:5, :], r)        
        
        # Activation of hidden layer
        y = activate(p)
        
        # Net input of output node
        l = np.dot(y, w)        
        
        # Activation of output node
        s = activate(l)              
        
        # Chain of partial derivatives for Model 2
        # dQ_ds = dQ_ds(u, s, tau_1, tau_2)
        # ds_dl = ds_dl(s)
        dl_dw = y.T
        dl_dy = w.T
        # dQ_dl = dQ_ds * ds_dl
        # dQ_dy = dQ_dl * dl_dy        
        # dy_dp = dy_dp(y)
        dp_dr = u[:5, :].T
        
        # Back propogate                
        dQ_dw = np.dot(dl_dw, (dQ_ds(u, s, tau_1, tau_2) * ds_dl(s)))        
        dQ_dr = np.dot(dp_dr, (dQ_ds(u, s, tau_1, tau_2) * ds_dl(s) * dl_dy * dy_dp(y)))        
                             
        r -= dQ_dr * eta              
        w -= dQ_dw * eta

        print(s, u[5:6, 1:2])
        



[[ 0.5177212 ]
 [ 0.53337066]
 [ 0.5301632 ]
 [ 0.52826543]
 [ 0.52897289]] [[ 0.]]
[[ 0.51389611]
 [ 0.53000428]
 [ 0.51846994]
 [ 0.53000428]
 [ 0.53000428]] [[ 0.]]
[[ 0.51204562]
 [ 0.52754106]
 [ 0.52233601]
 [ 0.52754106]
 [ 0.52754106]] [[ 0.]]
[[ 0.52548058]
 [ 0.52548058]
 [ 0.52129338]
 [ 0.52548058]
 [ 0.52548058]] [[ 0.]]
[[ 0.50930798]
 [ 0.50930798]
 [ 0.50936388]
 [ 0.52350625]
 [ 0.52350625]] [[ 1.]]
[[ 0.52464205]
 [ 0.52464205]
 [ 0.52434609]
 [ 0.54017202]
 [ 0.54017202]] [[ 1.]]
[[ 0.60962141]
 [ 0.60962141]
 [ 0.5998908 ]
 [ 0.59316151]
 [ 0.59316151]] [[ 1.]]
[[ 0.72145268]
 [ 0.72145268]
 [ 0.72056301]
 [ 0.68669116]
 [ 0.68669116]] [[ 1.]]
[[ 0.72075893]
 [ 0.72359086]
 [ 0.72285622]
 [ 0.69243261]
 [ 0.69243261]] [[ 1.]]
[[ 0.72250614]
 [ 0.72494375]
 [ 0.72431542]
 [ 0.6965252 ]
 [ 0.6965252 ]] [[ 1.]]
[[ 0.72585588]
 [ 0.72585588]
 [ 0.72530312]
 [ 0.69956343]
 [ 0.69956343]] [[ 1.]]
[[ 0.72455411]
 [ 0.70189162]
 [ 0.72525863]
 [ 0.72629187]
 [ 0.70189162]] 