In [15]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Implements the supervised gradident-based learning algorithm specified by 
Reyes-Galaviz et al. (2017).

"""

import numpy as np

input_count = 8 
hidden_count = 5 
output_count = 1

eta = 0.8

data = np.load("record_pairs.npy")
data2 = np.nan_to_num(data)

# Quality thresholds
tau_1 = 0.4
tau_2 = 0.7

# The MLP model has 8 inputs and 5 nodes in the hidden layer. It follows 
# the Model 2 setup presented by Reyes-Galaviz et al. Hidden nodes aggregate
# values calculated by running each of 8 comparison functions over 5 data
# fields (date, ISBN, title, creator, contributor)
r = np.random.randn(input_count, hidden_count)
w = np.random.randn(hidden_count, output_count)

# Sigmoid activation function
def activate(a):
    return 1 / (1 + np.exp(-a))

# Partial derivatives for output layer
def Q_tau_1(s):    
    return np.sum(s**2)

def Q_tau_2(s):
    return np.sum((1 - s)**2)

def Q(d, s, tau_1, tau_2):
    if d[5:6, 1:2] == 0:        
        return Q_tau_1(s)
    else:
        return Q_tau_2(s)

def dQ_ds(d, s, tau_1, tau_2):    
    if d[5:6, 1:2] == 0:        
        return 2 * s
    else:
        return 2 * (s - 1)        

def ds_dl(s):
    return s * (1 - s)

# Partial derivatives for hidden layer
def dy_dp(y):
    return y * (1 - y)

# 150 iterations for training model
for i in range(1):      
    
    for u in data2:
    
        # Net input of hidden layer
        p = np.dot(u[:5, :], r)        
        
        # Activation of hidden layer
        y = activate(p)
        
        # Net input of output node
        l = np.dot(y, w)        
        
        # Activation of output node
        s = activate(l)              
        
        # Chain of partial derivatives for Model 2
        # dQ_ds = dQ_ds(u, s, tau_1, tau_2)
        # ds_dl = ds_dl(s)
        dl_dw = y.T
        dl_dy = w.T
        # dQ_dl = dQ_ds * ds_dl
        # dQ_dy = dQ_dl * dl_dy        
        # dy_dp = dy_dp(y)
        dp_dr = u[:5, :].T
        
        # Back propogate                
        dQ_dw = np.dot(dl_dw, (dQ_ds(u, s, tau_1, tau_2) * ds_dl(s)))        
        dQ_dr = np.dot(dp_dr, (dQ_ds(u, s, tau_1, tau_2) * ds_dl(s) * dl_dy * dy_dp(y)))        
                             
        r -= dQ_dr * eta              
        w -= dQ_dw * eta

        print(s, u[5:6, 1:2])
        



1.60191980112 [[ 0.]]
1.00998991768 [[ 0.]]
0.789575576728 [[ 0.]]
0.578850477806 [[ 0.]]
0.8033833004 [[ 1.]]
1.66614307161 [[ 1.]]
2.40748805633 [[ 1.]]
3.260725509 [[ 1.]]
3.85033363115 [[ 1.]]
4.10632056523 [[ 1.]]
4.22641119026 [[ 1.]]
4.36597109565 [[ 1.]]
4.4560706356 [[ 1.]]
4.49159460916 [[ 1.]]
4.48259965215 [[ 1.]]
4.47882329673 [[ 0.]]
4.00567618361 [[ 0.]]
3.01688334733 [[ 0.]]
1.56228608563 [[ 0.]]
1.14833726837 [[ 0.]]
0.792424436446 [[ 0.]]
0.711624167528 [[ 0.]]
0.597248416438 [[ 0.]]
0.651585303081 [[ 0.]]
0.472999538125 [[ 0.]]
0.562503039553 [[ 0.]]
0.509911377478 [[ 0.]]
0.35714433494 [[ 0.]]
0.332073830146 [[ 0.]]
0.332938429057 [[ 0.]]
0.301794734218 [[ 0.]]
0.292890086938 [[ 0.]]
0.279715439383 [[ 0.]]
0.367146477257 [[ 0.]]
0.268583595408 [[ 0.]]
0.343918160311 [[ 0.]]
0.31027575835 [[ 0.]]
0.221084444639 [[ 0.]]
0.223333465777 [[ 0.]]
0.219513647042 [[ 0.]]
0.207713997709 [[ 0.]]
0.266435135929 [[ 0.]]
0.246584274011 [[ 0.]]
0.187970210639 [[ 0.]]
0.2341071562