In [1]:
import json
import random
import numpy as np

## Load Data and Embeddings

In [15]:
unwanted_chars = ['\\','.',',','/','\'s']
start = ['<null>']

def load_data(path): #load SNLI words
    data = []
    with open(path, 'r') as f:
        for line in f:
            obj = json.loads(line)
            datum = {}
            datum["label"] = obj["gold_label"]
            datum["premise"] = obj["sentence1"]
            datum["hypothesis"] = obj["sentence2"]
            if datum['label']!='-':
                data.append(datum)
    return data

def load_embeddings(path,number_of_words,emb_dim): #load pre-trained GloVe embeddings
    words_to_load = number_of_words

    with open(path) as f:
        loaded_embeddings = np.zeros((words_to_load, emb_dim))
        words = {}
        idx2words = {}
        ordered_words = []
        for i, line in enumerate(f):
            if i >= words_to_load: 
                break
            s = line.split()
            loaded_embeddings[i, :] = np.asarray(s[1:])
            words[s[0]] = i
            idx2words[i] = s[0]
            ordered_words.append(s[0])

    return loaded_embeddings, words, idx2words, ordered_words

def clean_words(text_list): # Removes characters and makes all words lowercase
    for i,word in enumerate(text_list):
        for ch in unwanted_chars:
            if ch in text_list[i]:
                text_list[i] = text_list[i].replace(ch,'')
            text_list[i] = text_list[i].lower()

def embed_words(text_list,emb_dim): # Assigns GloVe embeddings if it has one, otherwise gets embedding of zeros
    for i,word in enumerate(text_list):
        try:
            text_list[i] = embeddings[words[text_list[i]]]
        except KeyError:
            text_list[i] = np.zeros(emb_dim)
            continue
        
def tokenize(data,emb_dim):
    for text in data:
        text['hypothesis'] = start + text['hypothesis'].split()
        text['premise'] = start + text['premise'].split()
        clean_words(text['hypothesis'])
        embed_words(text['hypothesis'],emb_dim)
        clean_words(text['premise'])
        embed_words(text['premise'],emb_dim)
      
    return data

In [16]:
vocab_size = 50000
emb_dim = 300
glove_path = 'glove/glove.6B.300d.txt'
text_path = 'snli_1.0/snli_1.0_train.jsonl'

In [17]:
embeddings, words, idx2words, ordered_words = load_embeddings(glove_path, vocab_size, emb_dim)

In [18]:
data = load_data(text_path)

In [19]:
train_data = tokenize(data, emb_dim)

## Create Batches

In [7]:
def eval_iter(source):
    #tbd
    return None
    
def data_iter(source, batch_size):
    dataset_size = len(source)
    start = -1 * batch_size
    order = list(range(dataset_size))
    random.shuffle(order)

    while True:
        start += batch_size
        if start > dataset_size - batch_size:
            # Start another epoch.
            start = 0
            random.shuffle(order)   
        batch_indices = order[start:start + batch_size]
        yield [source[index] for index in batch_indices]

In [None]:
def attention_weights(e,a_input,b_input):
    ''' Part 3.1
    Takes weight matrix e, a_bar, and b_bar as inputs and returns 
    attention weight matrices alpha and beta.
    The jth row in alpha aligns with the jth word in sentence b (the hypothesis).
    The ith row in beta aligns with the ith word in sentence a (the premise)'''
    len_a = b_input.data.numpy().shape[0]
    len_b = a_input.data.numpy().shape[0]
    
    alphas = []
    betas = []
    for i in range(len_a):
        alphas.append(torch.sum(a_input*torch.transpose(torch.exp(e[:,i])/ \
                      torch.sum(torch.exp(e),dim=0)[i].view(-1,1),0,1),dim=0))
    for j in range(len_b):
        betas.append(torch.sum(b_bar*torch.transpose(torch.exp(e[i,:])/ \
                    torch.sum(torch.exp(e),dim=1)[i].view(-1,1),0,1),dim=0))
    
    alpha = torch.stack(alphas) #returns len_b x emb_dims
    beta = torch.stack(betas) #return len_a x emb_dim
    
    return alpha, beta

In [8]:
"""Note that the Decomposable attention paper selected batch_size=4"""
data_iter = data_iter(train_data,4)

In [None]:
#TBD
class DecomposableAttention(nn.Module):
    
    def __init__(self, input_size, embedding_dim, hidden_dim, num_labels):
        super(DecomposableAttention, self).__init__()
        #self.dropout = nn.Dropout(p=0.5)
            
        self.a_linear = nn.Linear(embedding_dim,embedding_dim)
        self.b_linear = nn.Linear(embedding_dim,embedding_dim)
        
    def forward(self, a,b):
        # Pass the input through your layers in order
        a_bar = a
        b_bar = b
        a_relu = F.relu(self.a_linear(a_bar))
        b_relu = F.relu(self.b_linear(b_bar))
        e = torch.matmul(a_relu,torch.transpose(b_relu,0,1))
        alpha, beta = attention_weights(e,a_bar,b_bar)
        
        '''up to 3.1'''
        
        return 

    def init_weights(self):
        initrange = 0.1
        lin_layers = [self.linear_1, self.linear_2]
             
        for layer in lin_layers:
            layer.weight.data.uniform_(-initrange, initrange)
            if layer in lin_layers:
                layer.bias.data.fill_(0)