# Rumelhart and Todd network (1993)

### Ethan Blackwood
### September 28, 2020

**Goal**: Simulate the Rumelhart & Todd connectionist semantic memory network shown in Rogers & McClelland (2008)
Figure 1, and replicate the results in Figure 3 regarding the similarity of internal item representations over time.

In [1]:
%matplotlib widget
%config IPCompleter.greedy=True

import itertools
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from scipy.cluster import hierarchy

import ptree

First, build the tree that contains all our inputs and outputs.

In [2]:
# can afford to use doubles for this
torch.set_default_tensor_type(torch.DoubleTensor)

rumeltree = ptree.from_xml('rumeltree.xml')

# Convert to lists so we have a canonical order for items, relations, and attributes.
items = list(rumeltree['items'])
relations = list(rumeltree['relations'])
attributes = list(rumeltree['attributes'])

# Now make our inputs and outputs.
item_vecs = torch.eye(len(items)).split(1)
rel_vecs = torch.eye(len(relations)).split(1)
xs = list(itertools.product(item_vecs, rel_vecs))

y = torch.zeros((len(xs), len(attributes)))

for kI in range(len(items)):
    for kR in range(len(relations)):

        # get attributes to associate
        my_attrs = rumeltree['nodes'][items[kI]].get_related_attributes(relations[kR])
        attr_inds = np.isin(attributes, list(my_attrs))
        y[kI*len(relations) + kR, attr_inds] = 1
            
ys = y.split(1)

# prepare for MultiLabelMarginLoss
y_inds = torch.full((len(ys), len(attributes)), -1, dtype=torch.long)
for i, y_vec in enumerate(ys):
    y_ind = y_vec.nonzero(as_tuple=True)[1]
    y_inds[i, :len(y_ind)] = y_ind

y_inds = y_inds.split(1)

print('Items: ', items)
print('Relations: ', relations)
print('Attributes: ', attributes)
print()
print('Some examples:')
rng = np.random.default_rng()

for k in rng.choice(len(xs), size=4, replace=False):
    x = xs[k]
    item_hot = x[0].numpy().squeeze().nonzero()[0]
    item = items[item_hot[0]]
    rel_hot = x[1].numpy().squeeze().nonzero()[0]
    relation = relations[rel_hot[0]]
    
    attrs = [attributes[i] for i in y_inds[k][0] if i >= 0]
    
    print(f'{item} {relation}: {", ".join(attrs) if len(attrs) > 0 else "<nothing>"}')


Items:  ['sunfish', 'daisy', 'oak', 'pine', 'rose', 'canary', 'robin', 'salmon']
Relations:  ['can', 'is', 'has', 'ISA']
Attributes:  ['plant', 'tree', 'grow', 'wings', 'oak', 'fly', 'pretty', 'canary', 'organism', 'swim', 'move', 'living', 'scales', 'daisy', 'pine', 'leaves', 'flower', 'sing', 'petals', 'robin', 'big', 'skin', 'yellow', 'red', 'bird', 'bark', 'feathers', 'animal', 'roots', 'sunfish', 'branches', 'rose', 'fish', 'gills', 'salmon', 'green']

Some examples:
daisy has: leaves, petals, roots
robin can: grow, fly, move
daisy is: pretty, living, yellow
rose is: pretty, living, red


Now build the network and training function.

In [21]:
class RumelNet(nn.Module):
    def __init__(self, n_items, n_relations, n_attributes):
        super(RumelNet, self).__init__()
        
        self.n_items = n_items
        self.n_relations = n_relations
        self.n_attributes = n_attributes
        
        rep_size = 8
        hidden_size = 15
        
        # define layers
        self.item_to_rep = nn.Linear(n_items, rep_size)
        self.rep_to_hidden = nn.Linear(rep_size, hidden_size)
        self.rel_to_hidden = nn.Linear(n_relations, hidden_size)
        self.hidden_to_attr = nn.Linear(hidden_size, n_attributes)
        
        # make weights start small
        with torch.no_grad():
            for layer in (self.item_to_rep, self.rep_to_hidden, self.rel_to_hidden, self.hidden_to_attr):
                layer.weight /= 100
                layer.bias /= 100

    def forward(self, x):
        # split into item and relation
        item, relation = x
        
        # flow inputs through network
        rep = torch.sigmoid(self.item_to_rep(item))
        hidden = torch.sigmoid(self.rep_to_hidden(rep) + self.rel_to_hidden(relation))
        attr = torch.sigmoid(self.hidden_to_attr(hidden))
        return attr

In [22]:
def train_network(net, optimizer, num_epochs=200, snap_freq=20):
    
    n_snaps = num_epochs // snap_freq
    n_items = net.n_items
    n_rep = net.item_to_rep.out_features
    
    # Holds snapshots of input representation layer after probing with each item
    rep_snapshots = np.ndarray((n_snaps, n_items, n_rep))
    
    criterion = nn.MultiLabelMarginLoss()
    
    for epoch in range(num_epochs):
        # collect snapshot
        if epoch % snap_freq == 0:
            k_snap = epoch // snap_freq
            
            with torch.no_grad():
                for k_item, item in enumerate(item_vecs):
                    act = torch.sigmoid(net.item_to_rep(item))
                    rep_snapshots[k_snap, k_item, :] = act
        
        running_loss = 0.0
        running_accuracy = 0.0
        
        for ind in rng.permutation(len(xs)):
            # train
            optimizer.zero_grad()
            outputs = net(xs[ind])
            loss = criterion(outputs, y_inds[ind])
            loss.backward()
            optimizer.step()
            
            with torch.no_grad():
                running_loss += loss.item()
                accuracy = torch.mean(((outputs > 0.5).to(torch.double) == ys[ind]).to(torch.double))
                running_accuracy += accuracy.item()
            
        print(f'Epoch {epoch} end: mean loss = {running_loss / len(xs):.3f}, mean accuracy = {running_accuracy / len(xs):.3f}')
        
    return rep_snapshots

Moment of truth, time to run it

In [23]:
net = RumelNet(len(items), len(relations), len(attributes))
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

rep_snapshots = train_network(net, optimizer)

Epoch 0 end: mean loss = 2.625, mean accuracy = 0.648
Epoch 1 end: mean loss = 2.516, mean accuracy = 0.674
Epoch 2 end: mean loss = 2.427, mean accuracy = 0.712
Epoch 3 end: mean loss = 2.356, mean accuracy = 0.706
Epoch 4 end: mean loss = 2.298, mean accuracy = 0.706
Epoch 5 end: mean loss = 2.248, mean accuracy = 0.706
Epoch 6 end: mean loss = 2.205, mean accuracy = 0.706
Epoch 7 end: mean loss = 2.167, mean accuracy = 0.706
Epoch 8 end: mean loss = 2.134, mean accuracy = 0.706
Epoch 9 end: mean loss = 2.106, mean accuracy = 0.706
Epoch 10 end: mean loss = 2.081, mean accuracy = 0.706
Epoch 11 end: mean loss = 2.060, mean accuracy = 0.706
Epoch 12 end: mean loss = 2.041, mean accuracy = 0.706
Epoch 13 end: mean loss = 2.025, mean accuracy = 0.706
Epoch 14 end: mean loss = 2.010, mean accuracy = 0.706
Epoch 15 end: mean loss = 1.998, mean accuracy = 0.706
Epoch 16 end: mean loss = 1.987, mean accuracy = 0.706
Epoch 17 end: mean loss = 1.978, mean accuracy = 0.706
Epoch 18 end: mean l

Epoch 155 end: mean loss = 1.882, mean accuracy = 0.706
Epoch 156 end: mean loss = 1.882, mean accuracy = 0.706
Epoch 157 end: mean loss = 1.882, mean accuracy = 0.706
Epoch 158 end: mean loss = 1.882, mean accuracy = 0.706
Epoch 159 end: mean loss = 1.882, mean accuracy = 0.706
Epoch 160 end: mean loss = 1.882, mean accuracy = 0.706
Epoch 161 end: mean loss = 1.882, mean accuracy = 0.706
Epoch 162 end: mean loss = 1.882, mean accuracy = 0.706
Epoch 163 end: mean loss = 1.882, mean accuracy = 0.706
Epoch 164 end: mean loss = 1.881, mean accuracy = 0.706
Epoch 165 end: mean loss = 1.881, mean accuracy = 0.706
Epoch 166 end: mean loss = 1.881, mean accuracy = 0.706
Epoch 167 end: mean loss = 1.881, mean accuracy = 0.706
Epoch 168 end: mean loss = 1.881, mean accuracy = 0.706
Epoch 169 end: mean loss = 1.881, mean accuracy = 0.706
Epoch 170 end: mean loss = 1.881, mean accuracy = 0.706
Epoch 171 end: mean loss = 1.881, mean accuracy = 0.706
Epoch 172 end: mean loss = 1.881, mean accuracy 

In [24]:
z = hierarchy.linkage(rep_snapshots[-1], method='median')
plt.figure()
hierarchy.dendrogram(z, labels=items)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [25]:
# Test trained net
with torch.no_grad():
    ind = 31
    
    item_vec, rel_vec = [t.squeeze().numpy() == 1 for t in xs[ind]]
    item = np.array(items)[item_vec][0]
    relation = np.array(relations)[rel_vec][0]
    
    fig, ax = plt.subplots(figsize=(8, 15))
    h1 = ax.barh(range(len(attributes)), net(xs[ind]).squeeze().numpy(),
                 align='edge', height=0.4, tick_label=attributes)
    h2 = ax.barh(range(len(attributes)), ys[ind].squeeze().numpy(),
                 align='edge', height=-0.4, tick_label=attributes)
    ax.legend([h1, h2], ['Actual', 'Expected'])
    ax.set_title(f'{item} {relation}...', size='x-large')
    fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …