In [1]:
import spacy, random, math, time
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchtext.datasets import TranslationDataset, Multi30k, IWSLT
from torchtext.data import Field, BucketIterator, RawField, Dataset

%load_ext autoreload
%autoreload 2

#### Multi30k
---

In [2]:
SEED = 11747
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [5]:
spacy_de = spacy.load('de')
spacy_en = spacy.load('en')

def tokenize_de(text):
    """
    Tokenizes German text from a string into a list of strings (tokens) and reverses it
    """
    return [tok.text for tok in spacy_de.tokenizer(text)]

def tokenize_en(text):
    """
    Tokenizes English text from a string into a list of strings (tokens)
    """
    return [tok.text for tok in spacy_en.tokenizer(text)]

In [6]:
SRC = Field(tokenize = tokenize_de, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True)
TGT = Field(tokenize = tokenize_en, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            lower = True)
GRH = RawField(postprocessing=None)
data_fields = [('src', SRC), ('trg', TGT), ('grh', GRH)]

In [7]:
train_data = Dataset(torch.load("data/Multi30k/train_data.pt"), data_fields)
valid_data = Dataset(torch.load("data/Multi30k/valid_data.pt"), data_fields)
test_data = Dataset(torch.load("data/Multi30k/test_data.pt"), data_fields)

In [8]:
BATCH_SIZE = 128
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = BATCH_SIZE, 
    device = device)

#### Experiment with just GCN

In [48]:
class GCNLayer(nn.Module):
    def __init__(self, input_dim, output_dim, dropout = 0.2):
        """
        each layer has the following form of computation
        H = f(A * H * W)
        H: (b, seq len, ninp)
        A: (b, seq len, seq len)
        W: (ninp, nout)
        """
        super(GCNLayer, self).__init__()
        self.W = nn.Parameter(torch.randn(input_dim, output_dim))
        self.b = nn.Parameter(torch.randn(output_dim))
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, A):
        """
        H = relu(A * x * W)
        x: (b, seq len, ninp)
        A: (b, seq len, seq len)
        W: (ninp, nout)
        """
        x = self.dropout(x)
        x = torch.bmm(A, x)  # x: (b, seq len, ninp)
        x = x.matmul(self.W) + self.b
        x = self.relu(x)
        return x
    
def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)

In [49]:
layer1 = GCNLayer(3, 2, 0)
layer1.apply(initialize_weights)

GCNLayer(
  (relu): ReLU()
  (dropout): Dropout(p=0, inplace=False)
)