## Role-Factor Tensor Network

* Paper: [Weber et al. (2018). Event Representations with Tensor-based Compositions](https://arxiv.org/pdf/1711.07611.pdf)
* PyTorch version: 0.4.0

### Generate random event data

In [2]:
from collections import Counter
from itertools import product
import numpy as np

SUBJ_ANIMAL = DOBJ_ANIMAL = ["cat", "horse", "dog"]
SUBJ_HUMAN = DOBJ_HUMAN = ["man", "woman", "phd"]
PRED_ANIMAL = ["eat", "bite", "poop"]
PRED_HUMAN = ["read", "love", "miss"]

class Indexer(object):
    
    def __init__(self):
        self.item_to_index = dict()
        self.index_to_item = dict()
        self.item_to_count = Counter()
        
    def __repr__(self):
        return "The size of the indexer = %d" % len(self.item_to_index)
    
    def get_item(self, index):
        if index in self.index_to_item:
            return self.index_to_item[index]
        return -1
    
    def get_index(self, item):
        if item not in self.item_to_index:
            index = len(self.item_to_index)
            self.item_to_index[item] = index
            self.index_to_item[index] = item
            self.item_to_count[item] += 1
        return self.item_to_index[item] 
    
    def get_count(self, item):
        if item in self.item_to_count:
            return self.item_to_count[item]
        return 0
    
    def get_top_item_set(self, k):
        return set(item for item, count in self.item_to_count.most_common(k))
    
    def get_top_index_set(self, k):
        return set(self.get_index(item) 
                   for item, count in self.item_to_count.most_common(k))
     

def generate_event_triples(word_indexer, event_indexer, subjects, predicates, dobjects):
    events = []
    for subject, predicate, dobject in product(subjects, predicates, dobjects):
        subject_index = word_indexer.get_index(subject)
        predicate_index = word_indexer.get_index(predicate)
        dobject_index = word_indexer.get_index(dobject)
        event = (subject_index, predicate_index, dobject_index)
        events.append(event)
        event_indexer.get_index(event)
    return np.array(events)

word_indexer = Indexer()
event_indexer = Indexer()
events_list = []
for subjects, predicates, dobjects in product([SUBJ_ANIMAL, SUBJ_HUMAN],
                                              [PRED_ANIMAL, PRED_HUMAN],
                                              [DOBJ_ANIMAL, DOBJ_HUMAN]):
    events_list.append(generate_event_triples(word_indexer, event_indexer,
                                              subjects, predicates, dobjects))
    
def sample_inner_random_events(events, sample_size):
    random_indices = np.random.choice(range(len(events)),
                                      size=sample_size)
    return np.array([events[index] for index in random_indices])

def sample_outer_random_events(event_indexer, sample_size):
    random_indices = np.random.choice(range(len(event_indexer.index_to_item)),
                                      size=sample_size)
    return np.array([event_indexer.get_item(index) for index in random_indices])

def get_batch(event_indexer, events, batch_size=10):
    batch_events = sample_inner_random_events(events, batch_size)
    batch_positive = sample_inner_random_events(events, batch_size)
    batch_negative = sample_outer_random_events(event_indexer, batch_size)
    return batch_events, batch_positive, batch_negative

def event_to_string(word_indexer, event):
    return "-".join([word_indexer.get_item(event_index) for event_index in event])\

def event_to_integer(word_indexer, event):
    return np.array([word_indexer.get_index(word) for word in event])

### Role-Factor Tensor Network

In [3]:
from __future__ import division
from __future__ import print_function
import math
import numpy as np
import os
import time
import torch
from torch.autograd import Variable as Var
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


class Embeddings(nn.Module):
    """Embedding lookup table."""
    
    def __init__(self, vocab_size, embedding_size):
        """Initializer.
        
        Args:
            vocab_size: size of input vocabulary (max index - 1).
            embedding_size: embedding size.
        """
        super(Embeddings, self).__init__()
        self.embed = nn.Embedding(vocab_size, embedding_size)
        self.vocab_size = vocab_size
        self.embedding_size = embedding_size
    
    def info(self):
        """Print out embedding lookup table stats."""
        print("Embedding lookup table of size <%d, %d>" % (self.vocab_size,
                                                           self.embedding_size))
    
    def forward(self, batch_inputs):
        """Forward pass.
        
        Args:
            batch_inputs: Variable.Tensor of the shape 
                          <batch_size, ..., sequence length>.
        Returns:
            Variable.Tensor of the shape <batch_size, ..., sequence length, embedding_size>.
        """
        return self.embed(batch_inputs) * math.sqrt(self.embedding_size)


class RoleFactorTensorNet(nn.Module):
    """Role-Factor Tensor Net (Weber et al. 2018)."""
    
    def __init__(self, embedding_size, hidden_size, output_size):
        """Initializer.
        
        Args:
            embedding_size: word embedding size.
            hidden_size: model hidden size.
            output_size: event embedding size.
        """
        super(RoleFactorTensorNet, self).__init__()
        self.T = torch.FloatTensor(hidden_size, embedding_size, embedding_size)
        nn.init.xavier_uniform_(self.T)
        self.W1 = nn.Linear(hidden_size, output_size)
        self.W2 = nn.Linear(hidden_size, output_size)
    
    def forward(self, batch_subjects, batch_predicates, batch_dobjects):
        """Forward pass.
        
        Args:
            batch_subjects: batch of subject embeddings, 
                            shape <batch_size, embedding_size>.
            batch_predicates: same as batch_subjects, for predicates.
            batch_dobjects: same as batch_subjects, for direct objects.
        Returns:
            Batch of event embeddings, shape <batch_size, output_size>.
        """
        # Einstein sum for tensor contraction (Weber/18, Eq.6).
        batch_v_subjects = torch.einsum("ijk,bj,bk->bi", [self.T, 
                                                          batch_subjects,
                                                          batch_predicates])
        batch_v_dobjects = torch.einsum("ijk,bj,bk->bi", [self.T, 
                                                          batch_dobjects,
                                                          batch_predicates])
        # Argument composition through linear layer (Weber/18, Eq.7).
        return self.W1(batch_v_subjects) + self.W2(batch_v_dobjects)


def compose_event(batch_inputs, embedder, rft_net):
    """Input->Event composition with Role-Factor Tensor Net.
    
    Args:
        batch_inputs: numpy ndarray, shape = <batch_size, svo=3>.
        embedder: Embeddings object.
        rft_net: RoleFactorTensorNet object.
    Returns:
        Compositional event embedding, shape = <batch_size, event_size>.
    """
    batch_subjects = embedder(torch.LongTensor(batch_inputs[:, 0]))
    batch_predicates = embedder(torch.LongTensor(batch_inputs[:, 1]))
    batch_dobjects = embedder(torch.LongTensor(batch_inputs[:, 2]))
    return rft_net(batch_subjects, batch_predicates, batch_dobjects)


def compute_event_similarity(event1, event2, word_indexer, embedder, rft_net):
    """Compute similarity for a pair of events with trained RFT-Net.
    
    Args:
        event1: (subject, predicate, dobject) string tuple.
        event2: same as event1.
        word_indexer: Indexer object.
        embedder: Embeddings object.
        rft_net: RoleFactorTensorNet object.
    Returns:
        Cosine similarity between event1 and event2 (as composed with RFT-Net).
    """
    event1 = event_to_integer(word_indexer, event1)
    event2 = event_to_integer(word_indexer, event2)
    event1_embedding = compose_event(np.array([event1]), embedder, rft_net)
    event2_embedding = compose_event(np.array([event2]), embedder, rft_net)
    return F.cosine_similarity(event1_embedding, event2_embedding).item()
    

def train(word_indexer, event_indexer,
          events_list, # a list of <?, svo=3> event batches.
          embedding_size, hidden_size, output_size,
          number_epochs, batch_size, learning_rate, margin,
          print_every):
    
    rft_net = RoleFactorTensorNet(embedding_size, hidden_size, output_size)
    word_embedder = Embeddings(vocab_size=len(word_indexer.index_to_item),
                               embedding_size=embedding_size)
    
    optimizer = optim.Adam(rft_net.parameters(), lr=learning_rate)
    
    for epoch in range(number_epochs):
        print("Epoch %d:\n" % (epoch+1))
        for events in events_list:
            # Get batch inputs, shape = <batch_size, svo=3>.
            batch_events, batch_positive, batch_negative = get_batch(event_indexer,
                                                                     events, 
                                                                     batch_size)
            # Encode svo-triples as event vectors,
            #   shape = <batch_size, event_size>.
            batch_events = compose_event(batch_events, word_embedder, rft_net)
            batch_positive = compose_event(batch_positive, word_embedder, rft_net)
            batch_negative = compose_event(batch_negative, word_embedder, rft_net)
            
            # Compute batch similarity, shape = <batch_size, >.
            similarity_positive = F.cosine_similarity(batch_events, batch_positive)
            similarity_negative = F.cosine_similarity(batch_events, batch_negative)
            similarity_difference = torch.mean(similarity_negative) - torch.mean(similarity_positive)

            optimizer.zero_grad()
            # Compute Hinge Loss: mean(sum(max(0.0, margin + sim_neg - sim_pos))).
            #   Weber/18, page 3.
            loss = torch.max(torch.FloatTensor(np.array(0.0)), 
                             margin + similarity_difference)
            optimizer.step()
        print("Epoch loss = %.4f\n" % loss.item())
    
    return rft_net, word_embedder

In [44]:
net, emb = train(word_indexer, event_indexer,
                 events_list,
                 embedding_size=3, hidden_size=4, output_size=5,
                 number_epochs=100, batch_size=2, learning_rate=1e-4, margin=0.5,
                 print_every=10)

Epoch 1:

Epoch loss = 0.2047

Epoch 2:

Epoch loss = 0.0000

Epoch 3:

Epoch loss = 1.1620

Epoch 4:

Epoch loss = 1.7363

Epoch 5:

Epoch loss = 1.4145

Epoch 6:

Epoch loss = 1.2888

Epoch 7:

Epoch loss = 1.7896

Epoch 8:

Epoch loss = 0.7008

Epoch 9:

Epoch loss = 0.2000

Epoch 10:

Epoch loss = 0.3229

Epoch 11:

Epoch loss = 0.9304

Epoch 12:

Epoch loss = 1.3641

Epoch 13:

Epoch loss = 0.4745

Epoch 14:

Epoch loss = 0.4743

Epoch 15:

Epoch loss = 0.6939

Epoch 16:

Epoch loss = 0.0566

Epoch 17:

Epoch loss = 0.9931

Epoch 18:

Epoch loss = 1.1332

Epoch 19:

Epoch loss = 0.5491

Epoch 20:

Epoch loss = 0.8481

Epoch 21:

Epoch loss = 0.0000

Epoch 22:

Epoch loss = 0.0000

Epoch 23:

Epoch loss = 1.0441

Epoch 24:

Epoch loss = 1.3546

Epoch 25:

Epoch loss = 0.1876

Epoch 26:

Epoch loss = 0.1612

Epoch 27:

Epoch loss = 1.3934

Epoch 28:

Epoch loss = 0.5476

Epoch 29:

Epoch loss = 0.6355

Epoch 30:

Epoch loss = 1.0474

Epoch 31:

Epoch loss = 0.2703

Epoch 32:

Epoch 

In [45]:
e1 = ("dog", "eat", "dog")
e2 = ("cat", "eat", "horse")
e3 = ("man", "love", "phd")

print(compute_event_similarity(e1, e2, word_indexer, emb, net))
print(compute_event_similarity(e1, e3, word_indexer, emb, net))

0.611241340637207
-0.08810067176818848
