# Rumelhart and Todd network (1993)

### Ethan Blackwood
### September 28, 2020

**Goal**: Simulate the Rumelhart & Todd connectionist semantic memory network shown in Rogers & McClelland (2008)
Figure 1, and replicate the results in Figure 3 regarding the similarity of internal item representations over time.

In [2]:
%matplotlib widget
%config IPCompleter.greedy=True

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import torch
import torch.nn as nn
from scipy.cluster import hierarchy
from sklearn.manifold import MDS, TSNE
from sklearn.decomposition import PCA

import ptree

First, build the tree that contains all our inputs and outputs.

In [3]:
# can afford to use doubles for this
torch.set_default_tensor_type(torch.DoubleTensor)

rumeltree = ptree.from_xml('rumeltree.xml')

# Convert to lists so we have a canonical order for items, relations, and attributes.
items = list(rumeltree['items'])
relations = list(rumeltree['relations'])
attributes = list(rumeltree['attributes'])

# Now make our inputs and outputs.
item_vecs = torch.eye(len(items)).split(1)
rel_vecs = torch.eye(len(relations)).split(1)
xs = torch.cat([torch.cat((item, rel), dim=1)
                  for item in item_vecs for rel in rel_vecs], dim=0)

x_item = xs[:, :len(items)]
x_rel = xs[:, len(items):]

y = torch.zeros((len(xs), len(attributes)))

for kI in range(len(items)):
    for kR in range(len(relations)):

        # get attributes to associate
        my_attrs = rumeltree['nodes'][items[kI]].get_related_attributes(relations[kR])
        attr_inds = np.isin(attributes, list(my_attrs))
        y[kI*len(relations) + kR, attr_inds] = 1

print('Items: ', items)
print('Relations: ', relations)
print('Attributes: ', attributes)
print()
print('Some examples:')
rng = np.random.default_rng()

print('x_item shape: ', x_item.shape)
print('x_rel shape: ', x_rel.shape)
print('y shape: ', y.shape)

for k in rng.choice(len(xs), size=4, replace=False):
    item_hot = x_item[k].numpy().nonzero()[0]
    item = items[item_hot[0]]
    rel_hot = x_rel[k].numpy().nonzero()[0]
    relation = relations[rel_hot[0]]
    
    attrs_hot = y[k].numpy().nonzero()[0]
    attrs = [attributes[i] for i in attrs_hot]
    
    print(f'{item} {relation}: {", ".join(attrs) if len(attrs) > 0 else "<nothing>"}')

Items:  ['oak', 'rose', 'robin', 'sunfish', 'pine', 'canary', 'salmon', 'daisy']
Relations:  ['is', 'has', 'ISA', 'can']
Attributes:  ['flower', 'move', 'branches', 'wings', 'fish', 'tree', 'yellow', 'grow', 'organism', 'scales', 'rose', 'pretty', 'leaves', 'pine', 'animal', 'feathers', 'canary', 'petals', 'big', 'bird', 'bark', 'swim', 'green', 'sing', 'living', 'roots', 'plant', 'gills', 'oak', 'skin', 'red', 'robin', 'sunfish', 'salmon', 'daisy', 'fly']

Some examples:
x_item shape:  torch.Size([32, 8])
x_rel shape:  torch.Size([32, 4])
y shape:  torch.Size([32, 36])
canary has: wings, feathers, skin
salmon ISA: fish, organism, animal, salmon
robin has: wings, feathers, skin
daisy has: leaves, petals, roots


Now build the network and training function.

In [4]:
class RumelNet(nn.Module):
    def __init__(self, n_items, n_relations, n_attributes):
        super(RumelNet, self).__init__()
        
        self.n_items = n_items
        self.n_relations = n_relations
        self.n_attributes = n_attributes
        
        rep_size = 8
        hidden_size = 15
        
        # define layers
        self.item_to_rep = nn.Linear(n_items, rep_size)
        self.rep_to_hidden = nn.Linear(rep_size, hidden_size)
        self.rel_to_hidden = nn.Linear(n_relations, hidden_size, bias=False) # only need one hidden layer bias
        self.hidden_to_attr = nn.Linear(hidden_size, n_attributes)
        
        # make weights/biases start small
        with torch.no_grad():
            for p in self.parameters():
                nn.init.normal_(p.data, std=0.01)
                #nn.init.uniform_(p.data, a=-0.01, b=0.01)

    def forward(self, item, relation):
        rep = torch.sigmoid(self.item_to_rep(item))
        hidden = torch.sigmoid(self.rep_to_hidden(rep) + self.rel_to_hidden(relation))
        attr = torch.sigmoid(self.hidden_to_attr(hidden))
        return attr

In [5]:
def train_network(net, optimizer, num_epochs=200, snap_freq=20, batch_size=4, scheduler=None):   
    n_snaps = num_epochs // snap_freq
    n_items = net.n_items
    n_rep = net.item_to_rep.out_features
    
    # Holds snapshots of input representation layer after probing with each item
    rep_snapshots = np.ndarray((n_snaps, n_items, n_rep))
    
    criterion = nn.BCELoss(reduction='sum')
    
    n_batches = (len(xs)-1) // batch_size + 1
    
    for epoch in range(num_epochs):
        # collect snapshot
        if epoch % snap_freq == 0:
            k_snap = epoch // snap_freq
            
            with torch.no_grad():
                for k_item, item in enumerate(item_vecs):
                    act = torch.sigmoid(net.item_to_rep(item))
                    rep_snapshots[k_snap, k_item, :] = act

        running_loss = 0.0
        running_accuracy = 0.0

        order = rng.permutation(len(xs))
        for k_batch in range(n_batches):
            # train
            batch_inds = order[k_batch*batch_size:(k_batch+1)*batch_size] 
            
            outputs = net(x_item[batch_inds], x_rel[batch_inds])
            loss = criterion(outputs, y[batch_inds])
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
            with torch.no_grad():
                running_loss += loss.item() * len(batch_inds)
                accuracy = torch.mean(((outputs > 0.5).to(torch.double) == y[batch_inds]).to(torch.double))
                running_accuracy += accuracy.item() * len(batch_inds)
        
        if epoch % snap_freq == 0:
            print(f'Epoch {epoch} end: mean loss = {running_loss / len(xs):.3f}, mean accuracy = {running_accuracy / len(xs):.3f}')
            
        if scheduler is not None:
            scheduler.step()
        
    return rep_snapshots

Moment of truth, time to run it

In [6]:
net = RumelNet(len(items), len(relations), len(attributes))
optimizer = torch.optim.SGD(net.parameters(), lr=0.005)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.95)

rep_snapshots = train_network(net, optimizer, batch_size=4, snap_freq=1000, num_epochs=30000, scheduler=scheduler)

Epoch 0 end: mean loss = 92.102, mean accuracy = 0.860
Epoch 1000 end: mean loss = 18.740, mean accuracy = 0.940
Epoch 2000 end: mean loss = 17.339, mean accuracy = 0.944
Epoch 3000 end: mean loss = 12.986, mean accuracy = 0.959
Epoch 4000 end: mean loss = 10.274, mean accuracy = 0.964
Epoch 5000 end: mean loss = 8.777, mean accuracy = 0.977
Epoch 6000 end: mean loss = 7.012, mean accuracy = 0.983
Epoch 7000 end: mean loss = 5.729, mean accuracy = 0.985
Epoch 8000 end: mean loss = 4.876, mean accuracy = 0.984
Epoch 9000 end: mean loss = 4.374, mean accuracy = 0.985
Epoch 10000 end: mean loss = 4.059, mean accuracy = 0.985
Epoch 11000 end: mean loss = 3.840, mean accuracy = 0.987
Epoch 12000 end: mean loss = 3.678, mean accuracy = 0.989
Epoch 13000 end: mean loss = 3.540, mean accuracy = 0.990
Epoch 14000 end: mean loss = 3.410, mean accuracy = 0.990
Epoch 15000 end: mean loss = 3.292, mean accuracy = 0.990
Epoch 16000 end: mean loss = 3.188, mean accuracy = 0.990
Epoch 17000 end: mean 

In [7]:
z = hierarchy.linkage(rep_snapshots[-1])
plt.figure()
hierarchy.dendrogram(z, labels=items)
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [8]:
# Test trained net
with torch.no_grad():
    ind = 5
    
    item_vec = x_item[ind]
    rel_vec = x_rel[ind]

    item = items[item_vec.numpy().nonzero()[0].item()]
    relation = relations[rel_vec.numpy().nonzero()[0].item()]
    
    fig, ax = plt.subplots(figsize=(8, 12))
    y_test = net(item_vec, rel_vec);
    h1 = ax.barh(range(len(attributes)), y_test.squeeze().numpy(),
                 align='edge', height=0.4, tick_label=attributes)
    h2 = ax.barh(range(len(attributes)), y[ind].squeeze().numpy(),
                 align='edge', height=-0.4, tick_label=attributes)
    ax.legend([h1, h2], ['Actual', 'Expected'])
    ax.set_title(f'{item} {relation}...', size='x-large')
    fig.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [9]:
# Try making MDS plot from snapshots (Figure 4 in Rogers/McClelland)

embedding = MDS(n_components=2)
n_snaps, n_items, n_rep = rep_snapshots.shape
all_reprs = rep_snapshots.reshape((n_snaps * n_items, n_rep))

reprs_embedded = embedding.fit_transform(all_reprs)
reprs_embedded = reprs_embedded.reshape((n_snaps, n_items, 2))

fig, ax = plt.subplots()
for kI, col in zip(range(n_items), mcolors.TABLEAU_COLORS):
    ax.plot(reprs_embedded[:, kI, 0], reprs_embedded[:, kI, 1], '.-',
            label=items[kI], markersize=4, color=col, linewidth=0.5)
    
for kI, col in zip(range(n_items), mcolors.TABLEAU_COLORS):
    ax.plot(reprs_embedded[0, kI, 0], reprs_embedded[0, kI, 1], 'g.',
           markersize=14)
    ax.plot(reprs_embedded[0, kI, 0], reprs_embedded[0, kI, 1], '.',
           markersize=8, color=col)
    ax.plot(reprs_embedded[-1, kI, 0], reprs_embedded[-1, kI, 1], 'r.',
           markersize=14)
    ax.plot(reprs_embedded[-1, kI, 0], reprs_embedded[-1, kI, 1], '.',
           markersize=8, color=col)
    
ax.legend()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [10]:
# Same but with PCA

embedding = PCA(n_components=2)
n_snaps, n_items, n_rep = rep_snapshots.shape
all_reprs = rep_snapshots.reshape((n_snaps * n_items, n_rep))

reprs_embedded = embedding.fit_transform(all_reprs)
reprs_embedded = reprs_embedded.reshape((n_snaps, n_items, 2))

fig, ax = plt.subplots()
for kI, col in zip(range(n_items), mcolors.TABLEAU_COLORS):
    ax.plot(reprs_embedded[:, kI, 0], reprs_embedded[:, kI, 1], '.-',
            label=items[kI], markersize=4, color=col, linewidth=0.5)
    
for kI, col in zip(range(n_items), mcolors.TABLEAU_COLORS):
    ax.plot(reprs_embedded[0, kI, 0], reprs_embedded[0, kI, 1], 'g.',
           markersize=14)
    ax.plot(reprs_embedded[0, kI, 0], reprs_embedded[0, kI, 1], '.',
           markersize=8, color=col)
    ax.plot(reprs_embedded[-1, kI, 0], reprs_embedded[-1, kI, 1], 'r.',
           markersize=14)
    ax.plot(reprs_embedded[-1, kI, 0], reprs_embedded[-1, kI, 1], '.',
           markersize=8, color=col)
    
ax.legend()
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [11]:
fig, ax = plt.subplots()
# ax.matshow(embedding.components_)
with torch.no_grad():
    ax.matshow(torch.sigmoid(net.item_to_rep.weight).numpy())
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …