In [6]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Implements the supervised gradident-based learning algorithm specified by 
Reyes-Galaviz et al. (2017).

"""

import numpy as np

input_count = 8 
hidden_count = 5 
output_count = 1

eta = 0.8

data = np.load("record_pairs.npy")
data2 = np.nan_to_num(data)

# Quality thresholds
tau_1 = 0.4
tau_2 = 0.7

# The MLP model has 8 inputs and 5 nodes in the hidden layer. It follows 
# the Model 2 setup presented by Reyes-Galaviz et al. Hidden nodes aggregate
# values calculated by running each of 8 comparison functions over 5 data
# fields (date, ISBN, title, creator, contributor)
r = np.random.randn(input_count, hidden_count)
w = np.random.randn(hidden_count, output_count)

# Sigmoid activation function
def activate(a):
    return 1 / (1 + np.exp(-a))

# Partial derivatives for output layer
def Q_tau_1(s):    
    return np.sum(s**2)

def Q_tau_2(s):
    return np.sum((1 - s)**2)

def Q(d, s, tau_1, tau_2):
    if d[5:6, 1:2] == 0:        
        return Q_tau_1(s)
    else:
        return Q_tau_2(s)

def dQ_ds(d, s, tau_1, tau_2):    
    if d[5:6, 1:2] == 0:        
        return 2 * s
    else:
        return 2 * (s - 1)        

def ds_dl(s):
    return s * (1 - s)

# Partial derivatives for hidden layer
def dy_dp(y):
    return y * (1 - y)

# 150 iterations for training model
for i in range(1):      
    
    for u in data2:
    
        # Net input of hidden layer
        p = np.dot(u[:5, :], r)        
        
        # Activation of hidden layer
        y = activate(p)
        
        # Net input of output node
        l = np.dot(y, w)        
        
        # Activation of output node
        s = activate(l)              
        
        # Chain of partial derivatives for Model 2
        # dQ_ds = dQ_ds(u, s, tau_1, tau_2)
        # ds_dl = ds_dl(s)
        dl_dw = y.T
        dl_dy = w.T
        # dQ_dl = dQ_ds * ds_dl
        dQ_dy = dQ_dl * dl_dy        
        # dy_dp = dy_dp(y)
        dp_dr = u[:5, :].T
        
        # Back propogate                
        dQ_dw = np.dot(dl_dw, (dQ_ds(u, s, tau_1, tau_2) * ds_dl(s)))        
        dQ_dr = np.dot(dp_dr, (dQ_ds(u, s, tau_1, tau_2) * ds_dl(s) * dl_dy * dy_dp(y)))        
                             
        r -= dQ_dr * eta              
        w -= dQ_dw * eta

        print(np.sum(ds_dl(s)), u[5:6, 1:2])
        



0.96042710569 [[ 0.]]
0.689329399014 [[ 0.]]
0.592949005904 [[ 0.]]
0.595730561199 [[ 0.]]
0.325755033983 [[ 1.]]
0.607501529846 [[ 1.]]
1.18359586965 [[ 1.]]
0.754880427307 [[ 1.]]
0.604133281499 [[ 1.]]
0.513369905241 [[ 1.]]
0.438007308491 [[ 1.]]
0.423798010243 [[ 1.]]
0.331971913109 [[ 1.]]
0.317166017291 [[ 1.]]
0.327809259275 [[ 1.]]
0.377954840389 [[ 0.]]
0.664247786671 [[ 0.]]
1.17694952985 [[ 0.]]
0.908083708273 [[ 0.]]
0.673624501148 [[ 0.]]
0.592987021345 [[ 0.]]
0.508452342554 [[ 0.]]
0.470070757899 [[ 0.]]
0.434067486949 [[ 0.]]
0.401743957887 [[ 0.]]
0.367233333349 [[ 0.]]
0.354152038701 [[ 0.]]
0.335977176739 [[ 0.]]
0.327755905816 [[ 0.]]
0.294741751495 [[ 0.]]
0.299933982464 [[ 0.]]
0.286423825623 [[ 0.]]
0.278591874664 [[ 0.]]
0.260694204953 [[ 0.]]
0.244352718365 [[ 0.]]
0.237181073596 [[ 0.]]
0.247524940715 [[ 0.]]
0.244910952857 [[ 0.]]
0.222926101175 [[ 0.]]
0.215340319357 [[ 0.]]
0.22271839947 [[ 0.]]
0.222153811647 [[ 0.]]
0.220540274431 [[ 0.]]
0.204539181569 