In [None]:
import torch.nn as nn
import torch
import copy
import torch.optim as optim
import torch.nn.functional as F
import math
import matplotlib.pyplot as plt
import numpy as np
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Global Vocabs

In [None]:
english_dev = 'en_ewt-ud-train-projectivized.conllu'
english_train = 'en_ewt-ud-dev.conllu'
chinese_dev = 'zh_gsdsimp-ud-dev.conllu'
chinese_train = 'zh_gsdsimp-ud-train.conllu'

In [None]:
class LabelDataset():

    ROOT = ('<root>', '<root>', 0, '<root>')  # Pseudo-root

    def __init__(self, filename):
        self.filename = filename

    def __iter__(self):
        with open(self.filename, 'rt', encoding='utf-8') as lines:
            tmp = [LabelDataset.ROOT]
            for line in lines:
                if not line.startswith('#'):  # Skip lines with comments
                    line = line.rstrip()
                    if line:
                        columns = line.split('\t')
                        if columns[0].isdigit():  # Skip range tokens
                            tmp.append((columns[1], columns[3], int(columns[6]), columns[7])) #Colums 7 is dependency label
                    else:
                        yield tmp
                        tmp = [LabelDataset.ROOT]

label_train_data = LabelDataset(chinese_train)
label_dev_data = LabelDataset(chinese_dev)

In [None]:
PAD = '<pad>'
UNK = '<unk>'
ROOT = '<root>'
def make_vocabs(label_gold_data):
    # TODO: Replace the next line with your own code
    word_vocab = {PAD:0,UNK:1}
    tag_vocab = {PAD:0}
    label_vocab = {PAD:0, ROOT:1}
    inverse_word_vocab = {0:PAD,1:UNK}
    inverse_tag_vocab = {0:PAD}
    inverse_label_vocab = {0:PAD, 1:ROOT}
    word_i = 2
    tag_i = 1
    label_i = 2
    for sentence in label_gold_data:
        for pair in sentence:
            if not (pair[0] in word_vocab):
                word_vocab[pair[0]] = word_i
                inverse_word_vocab[word_i] = pair[0]
                word_i += 1
            if not (pair[1] in tag_vocab):
                tag_vocab[pair[1]] = tag_i
                inverse_tag_vocab[tag_i] = pair[1]
                tag_i += 1
            if not (pair[3] in label_vocab):
                label_vocab[pair[3]] = label_i
                inverse_label_vocab[label_i] = pair[3]
                label_i += 1           
    return word_vocab, tag_vocab, label_vocab, inverse_word_vocab, inverse_tag_vocab, inverse_label_vocab

vocab_words, vocab_tags, vocab_labels, inverse_vocab_words, inverse_vocab_tags, inverse_label_vocab = make_vocabs(label_train_data)

# Tagger

In [None]:
class tag_Dataset():

    def __init__(self, filename):
        self.filename = filename

    def __iter__(self):
        with open(self.filename, 'rt', encoding='utf-8') as lines:
            tmp = []
            for line in lines:
                if not line.startswith('#'):  # Skip lines with comments
                    line = line.rstrip()
                    if line:
                        columns = line.split('\t')
                        if columns[0].isdigit():  # Skip range tokens
                            tmp.append((columns[1], columns[3])) 
                    else:
                        yield tmp
                        tmp = []

tag_train_data = tag_Dataset(chinese_train)
tag_dev_data = tag_Dataset(chinese_dev)


In [None]:
def accuracy(tagger, gold_data):
    tp = 0
    total = 0
    for sentence in gold_data:
        tokens = []
        tags = []
        for pair in sentence:
            tokens.append(pair[0])
            tags.append(pair[1])
        for i, pred_tag in enumerate(tagger.predict(tokens)):
            if sentence[i][1] == pred_tag:
                tp += 1
            total += 1
    return tp/total

In [None]:
class tag_FixedWindowModel(nn.Module):

    def __init__(self, embedding_specs = [(3, len(vocab_words), 50), (1, len(vocab_tags), 10)],
                 hidden_dim = 100, output_dim = len(vocab_tags)):
        super().__init__()
        embed_out_dim = 0
        embed_list = []
        n_cols = 0
        iters = []
        embed_i = 0
        for n, num_words, word_dim in embedding_specs:
            tmp_embedding_layer = nn.Embedding(num_words, word_dim)
            nn.init.normal_(tmp_embedding_layer.weight, std=0.01)
            embed_list.append(tmp_embedding_layer)
            embed_out_dim += word_dim * n
            iters += [embed_i] * n
            embed_i += 1
            n_cols += n
        self.iters = iters
        self.n_cols = n_cols
        self.embed = nn.ModuleList(embed_list)
        self.linear1 = nn.Linear(embed_out_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, features):
        if len(features.shape) == 1:
            features = torch.unsqueeze(features, dim=0)
        features = torch.cat(tuple(self.embed[self.iters[i]](features[:,i]) for i in range(self.n_cols)), 1)
        features = self.linear1(features)
        features = self.relu(features)
        features = self.linear2(features)
        return features

In [None]:
class Tagger(object):

    def predict(self, sentence):
        raise NotImplementedError

class FixedWindowTagger(Tagger):

    def __init__(self, vocab_words, vocab_tags, inverse_vocab_words, inverse_vocab_tags, output_dim = 4, word_dim=50, tag_dim=10, hidden_dim=100):
        self.fw_model = tag_FixedWindowModel()
        self.vocab_words = vocab_words
        self.vocab_tags = vocab_tags
        self.inverse_vocab_words = inverse_vocab_words
        self.inverse_vocab_tags = inverse_vocab_tags

    def featurize(self, words, i, pred_tags):
        tagger_conf = [0] * 4
        tagger_conf[0] = words[i]
        if i-1 < 0:
            tagger_conf[1] = self.vocab_words[PAD]
            tagger_conf[3] = self.vocab_tags[PAD]
        else:
            tagger_conf[1] = words[i-1]
            tagger_conf[3] = pred_tags[i-1]
        if i+1 > len(words)-1:
            tagger_conf[2] = self.vocab_words[PAD]
        else:
            tagger_conf[2] = words[i+1]
        return torch.LongTensor(tagger_conf)

    def predict(self, words):
        pred_tag_ids = []
        word_ids = []
        for word in words:

            word_ids.append(vocab_words[word] if word in vocab_words else vocab_words[UNK])

        for i in range(len(word_ids)):
            window = self.featurize(word_ids, i, pred_tag_ids)
            pred = self.fw_model.forward(window)
            pred_tag_ids.append(torch.argmax(pred).item())
        pred_tags = []
        for tag_id in pred_tag_ids:
            pred_tags.append(inverse_vocab_tags[tag_id])

        return pred_tags


In [None]:
def tag_training_examples(vocab_words, vocab_tags, inverse_vocab_words, inverse_vocab_tags, gold_data, tagger, batch_size=100):
  batch_i = 0
  batch_x = torch.zeros((batch_size, 4), dtype=int)
  batch_y = torch.zeros(batch_size, dtype=int)
  for sentence in gold_data:
      word_ids = []
      tag_ids = []
      for pair in sentence:
          word_ids.append(vocab_words[pair[0]])
          tag_ids.append(vocab_tags[pair[1]])
      for i in range(len(word_ids)):
          if batch_i > batch_size - 1: 
              yield batch_x, batch_y
              batch_x = torch.zeros((batch_size, 4), dtype=int)
              batch_y = torch.zeros(batch_size, dtype=int)
              batch_i = 0        
          batch_x[batch_i,:] = tagger.featurize(word_ids, i, tag_ids)
          batch_y[batch_i] = tag_ids[i]
          batch_i += 1
  if not batch_i == 0:
      yield batch_x[0:batch_i,:], batch_y[0:batch_i]

In [None]:
def tag_train_fixed_window(train_data, n_epochs=2, batch_size=100, lr=1e-2):
    vocab_words, vocab_tags, inverse_vocab_words, inverse_vocab_tags,_,_ = make_vocabs(label_train_data)
    fw_tagger = FixedWindowTagger(vocab_words, vocab_tags, inverse_vocab_words, inverse_vocab_tags)
    optimizer = optim.Adam(fw_tagger.fw_model.parameters(), lr=lr)

    for ep in range(n_epochs):
        fw_tagger.fw_model.train()
        loss_sum = 0
        n_batch = 0
        for bx, by in tag_training_examples(vocab_words, vocab_tags, inverse_vocab_words, inverse_vocab_tags, train_data, fw_tagger, batch_size=batch_size):
            bx = bx.to(device)
            by = by.to(device)
            optimizer.zero_grad()
            output = fw_tagger.fw_model.forward(bx)
            loss = F.cross_entropy(output, by)
            loss.backward()
            optimizer.step()
            loss_sum += loss.item()
            n_batch += 1      
        fw_tagger.fw_model.eval()
        #print("Accuracy on validation data: " + str(accuracy(fw_tagger, tag_dev_data)))
    return fw_tagger

# Labeller

Data and vocabs already created

In [None]:
class label_FixedWindowModel(nn.Module):

    def __init__(self, embedding_specs = [(2, len(vocab_words), 50), (2, len(vocab_tags), 10)],
                 hidden_dim = 180, output_dim = len(vocab_labels)):
        super().__init__()
        embed_out_dim = 0
        embed_list = []
        n_cols = 0
        iters = []
        embed_i = 0
        for n, num_words, word_dim in embedding_specs:
            tmp_embedding_layer = nn.Embedding(num_words, word_dim)
            nn.init.normal_(tmp_embedding_layer.weight, std=0.01)
            embed_list.append(tmp_embedding_layer)
            embed_out_dim += word_dim * n
            iters += [embed_i] * n
            embed_i += 1
            n_cols += n
        self.iters = iters
        self.n_cols = n_cols
        self.embed = nn.ModuleList(embed_list)
        self.linear1 = nn.Linear(embed_out_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, features):
        if len(features.shape) == 1:
            features = torch.unsqueeze(features, dim=0)
        features = torch.cat(tuple(self.embed[self.iters[i]](features[:,i]) for i in range(self.n_cols)), 1)
        features = self.linear1(features)
        features = self.relu(features)
        features = self.linear2(features)
        return features

In [None]:
class FixedWindowLabler():

    def __init__(self, vocab_words, vocab_tags, inverse_vocab_labels, hidden_dim=180):
        super().__init__()
        self.fw_model = label_FixedWindowModel()
        self.vocab_words = vocab_words
        self.vocab_tags = vocab_tags
        self.inverse_vocab_labels = inverse_vocab_labels
 

    def featurize(self, words, tags, indexes): #config = (source_i, target_i)
        features = [0]*4
        source_i, target_i = indexes
        
        if source_i:
            features[0] = words[source_i]
            features[2] = tags[source_i]
        else:
            features[0] = self.vocab_tags[PAD]
            features[2] = self.vocab_tags[PAD]         
        if target_i:
            features[1] = words[target_i]
            features[3] = tags[target_i]
        else:
            features[1] = self.vocab_tags[PAD]
            features[3] = self.vocab_tags[PAD]  
        return torch.LongTensor(features)


    def predict(self, source_word_id, target_word_id, source_tag_id, target_tag_id, want_print = False):
        window = torch.LongTensor([source_word_id, target_word_id, source_tag_id, target_tag_id])
        pred = self.fw_model.forward(window)
        pred_label_id = torch.argmax(pred).item()
        return pred_label_id


In [None]:
def label_training_examples(vocab_words, vocab_tags, vocab_labels, gold_data, labler, batch_size=100):
  batch_i = 0
  K = 4
  batch_x = torch.zeros((batch_size, K), dtype=int)
  batch_y = torch.zeros(batch_size, dtype=int)
  for sentence in gold_data:
      word_ids = []
      tag_ids = []
      heads = []
      label_ids = []
      for quad in sentence:
          word_ids.append(vocab_words[quad[0]])
          tag_ids.append(vocab_tags[quad[1]])
          heads.append(quad[2])
          label_ids.append(vocab_labels[quad[3]])
      
      for i in range(len(word_ids)):
          if batch_i > batch_size - 1: 
              yield batch_x, batch_y
              batch_x = torch.zeros((batch_size, 4), dtype=int)
              batch_y = torch.zeros(batch_size, dtype=int)
              batch_i = 0        
          batch_x[batch_i,:] = labler.featurize(word_ids, tag_ids, (i, heads[i]))
          batch_y[batch_i] = label_ids[i]
          batch_i += 1
  if not batch_i == 0:
      yield batch_x[0:batch_i,:], batch_y[0:batch_i]

In [None]:
def label_train_fixed_window(train_data, n_epochs=2, batch_size=100, lr=1e-2):
    vocab_words, vocab_tags, vocab_labels, inverse_vocab_words, inverse_vocab_tags, inverse_vocab_labels = make_vocabs(label_train_data)
    labler = FixedWindowLabler(vocab_words, vocab_tags, inverse_vocab_labels)
    optimizer = optim.Adam(labler.fw_model.parameters(), lr=lr)
    
    for ep in range(n_epochs):
        labler.fw_model.train()
        n_batch = 0
        for bx, by in label_training_examples(vocab_words, vocab_tags, vocab_labels, train_data, labler, batch_size=batch_size):
            bx = bx.to(device)
            by = by.to(device)
            optimizer.zero_grad()
            output = labler.fw_model.forward(bx)
            loss = 0
            try:
                loss = F.cross_entropy(output, by)
            except:
                print(by.shape)
                print(output.shape)
                print(vocab_labels)
                print(vocab_words)
                print(vocab_tags)
                for elem in by:
                    try:
                        inverse_vocab_labels[elem.item()]
                    except:
                        print("by")
                        print(elem.item())
                
            loss.backward()
            optimizer.step()
            n_batch += 1
        labler.fw_model.eval()
    
    return labler

In [None]:
labler = label_train_fixed_window(label_train_data, n_epochs=2)

# Parser

In [None]:
class Dataset():

    ROOT = ('<root>', '<root>', 0)  # Pseudo-root

    def __init__(self, filename):
        self.filename = filename

    def __iter__(self):
        with open(self.filename, 'rt', encoding='utf-8') as lines:
            tmp = [Dataset.ROOT]
            for line in lines:
                if not line.startswith('#'):  # Skip lines with comments
                    line = line.rstrip()
                    if line:
                        columns = line.split('\t')
                        if columns[0].isdigit():  # Skip range tokens
                            tmp.append((columns[1], columns[3], int(columns[6])))
                    else:
                        yield tmp
                        tmp = [Dataset.ROOT]

parse_train_data = Dataset(chinese_train)
parse_dev_data = Dataset(chinese_dev)

In [None]:
def uas(parser, gold_data):
    tp = 0
    total = 0
    for sentence in gold_data:
        words, tags, heads = zip(*sentence)
        pred = parser.predict(words, tags)
        for i in range(1, len(pred)):
            if heads[i] == pred[i]:
                tp += 1
            total += 1
            #print("Iteration: " + str(total) + ", UAS: " + str(tp/total))
    return tp/total

In [None]:
def oracle_moves(gold_heads):
 
    MOVES = tuple(range(3))
    SH, LA, RA = MOVES

    parser = ArcStandardParser()
    config = parser.initial_config(len(gold_heads))
  
   
    while (not parser.is_final_config(config)):
        valid_moves = parser.valid_moves(config)

        if len(valid_moves) >= 2:
            indices1 = [i for i, x in enumerate(gold_heads) if x == config[1][-2]]
            indices2 = [i for i, x in enumerate(config[2]) if x == config[1][-2]]
            indices3 = [i for i, x in enumerate(gold_heads) if x == config[1][-1]]
            indices4 = [i for i, x in enumerate(config[2]) if x == config[1][-1]]
            if LA in valid_moves and gold_heads[config[1][-2]] == config[1][-1] and all(item in indices2 for item in indices1):       
                yield (config, LA)
                config = parser.next_config(config, LA)
            elif ((RA in valid_moves) and (gold_heads[config[1][-1]] == config[1][-2]) and (all(item in indices4 for item in indices3))):
                yield (config, RA)
                config = parser.next_config(config, RA)
            else:
                yield (config, SH)
                config = parser.next_config(config, SH)
              
        else:
            yield (parser.next_config(config, SH), SH)
            config = parser.next_config(config, SH)

In [None]:
class Parser(object):

    def predict(self, words, tags):
        raise NotImplementedError

class ArcStandardParser(Parser):

    MOVES = tuple(range(3))

    SH, LA, RA = MOVES  # Parser moves are specified as integers.

    @staticmethod
    def initial_config(num_words):
        return (0, [], [0]*num_words)

    @staticmethod
    def valid_moves(config):
        valid_moves = []
        i, stack, head = config
        if i < len(head):
            valid_moves.append(0)
        if len(stack) >= 2:
            valid_moves.append(1)
            valid_moves.append(2)
        return valid_moves

    @staticmethod
    def next_config(config, move):
        i, stack, head = config
        i, stack, head = copy.deepcopy(i), copy.deepcopy(stack), copy.deepcopy(head)
        if move == 0:
            stack.append(i)
            i += 1
            return (i, stack, head)
        if move == 1:
            head[stack[-2]] = stack[-1]
            del stack[-2]
            return (i, stack, head)
        if move == 2:
            head[stack[-1]] = stack[-2]
            del stack[-1]
            return (i, stack, head)
        print("Error,  config: "+ str(config) + "move: " + str(move))

    @staticmethod
    def is_final_config(config):
        i, stack, head = config

        if i == len(head) and len(stack) == 1:
            return True
        return False

In [None]:
class parse_FixedWindowModel(nn.Module):

    def __init__(self, embedding_specs,
                 hidden_dim = 180, output_dim = 3, 
                 use_distance = False, use_valency = False): 
        super().__init__()
        self.n_word_features = 0
        self.use_distance = use_distance
        self.use_valency = use_valency
        embed_out_dim = 0
        embed_list = []
        n_cols = 0
        iters = []
        embed_i = 0
        for n, num_words, word_dim in embedding_specs:
            tmp_embedding_layer = nn.Embedding(num_words, word_dim)
            nn.init.normal_(tmp_embedding_layer.weight, std=0.01)
            embed_list.append(tmp_embedding_layer)
            embed_out_dim += word_dim * n
            iters += [embed_i] * n 
            if embed_i == 0:
                self.n_word_features = n
                self.word_dim = word_dim
            if embed_i == 1:
                self.tag_dim = word_dim
            embed_i += 1
            n_cols += n
        self.iters = iters
        self.n_cols = n_cols
        self.embed = nn.ModuleList(embed_list)
        if self.use_distance:
            #embed_out_dim += self.word_dim + self.tag_dim
            embed_out_dim += 2
        if self.use_valency:
            embed_out_dim += 4 #Concatenate valencies to input tensor in linnear layer
        self.linear1 = nn.Linear(embed_out_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, features, want_print = False):
        if want_print:
            print("In forward: "+ str(features.shape))
        if len(features.shape) == 1:
            features = torch.unsqueeze(features, dim=0)
        valencies = features[:,-5:-1] #Last 4 features
        features = torch.cat(tuple(self.embed[self.iters[i]](features[:,i]) for i in range(self.n_cols)), 1)
        ###Distance####
        """
        if self.use_distance: 
            features = torch.cat((features, torch.abs(features[:,self.word_dim:2*self.word_dim] 
                                - features[:,2*self.word_dim:3*self.word_dim])), 1)
            features = torch.cat((features, torch.abs(features[:,self.n_word_features*self.word_dim + self.tag_dim:
                                               self.n_word_features*self.word_dim + 2*self.tag_dim] 
                                      - features[:,self.n_word_features*self.word_dim + 2*self.tag_dim:
                                               self.n_word_features*self.word_dim + 3*self.tag_dim])), 1)
        """
        if self.use_distance:
            tmp_diff = torch.unsqueeze(features[:,self.word_dim:2*self.word_dim] 
                                      - features[:,2*self.word_dim:3*self.word_dim], 1)
            features = torch.cat((features, torch.sum(torch.pow(tmp_diff, 2), 2)),1)
            tmp_diff = torch.unsqueeze(features[:,self.n_word_features*self.word_dim + self.tag_dim:
                                                  self.n_word_features*self.word_dim + 2*self.tag_dim] 
                                          - features[:,self.n_word_features*self.word_dim + 2*self.tag_dim:
                                                  self.n_word_features*self.word_dim + 3*self.tag_dim], 1)
            features = torch.cat((features, torch.sum(torch.pow(tmp_diff, 2), 2)),1)
        ###Distance end####
        if self.use_valency:
            features = torch.cat((features, valencies), 1)
        features = self.linear1(features)
        features = self.relu(features)
        features = self.linear2(features)
        return features

In [None]:
class FixedWindowParser(ArcStandardParser):
    def __init__(self, vocab_words, vocab_tags, embedding_specs, 
                 vocab_labels=vocab_labels, labler=labler, hidden_dim=180, 
                 use_distance = False, use_unigram = False, 
                 use_third_order = False, use_valency = False):
        super().__init__()
        self.fw_model = parse_FixedWindowModel(embedding_specs,
                                               use_distance=use_distance,
                                               use_valency=use_valency)
        self.vocab_words = vocab_words
        self.vocab_tags = vocab_tags
        self.vocab_labels = vocab_labels
        self.labler = labler
        self.use_unigram = use_unigram
        self.use_third_order = use_third_order
        self.use_valency = use_valency
 

    def featurize(self, words, tags, config, want_print = False):
        word_features = []
        tag_features = []
        label_features = []
        i, stack, head = config
        i, stack, head = copy.deepcopy(i), copy.deepcopy(stack), copy.deepcopy(head)
        #s0Lw s0Lp s0Rw s0Rp

        #0. word form of the next word in the buffer
        #1. word form of the topmost word on the stack
        #2. word form of the second-topmost word on the stack
        #3. word form of the leftmost modifier of topmost word on stack
        #4. word form of the rightmost modifier of topmost word on stack
        #5. word form of the leftmost modifier of second topmost word on stack
        #6. word form of the rightmost modifier of second topmost word on stack (Maybe should be removed)
        #7. part-of-speech tag of the next word in the buffer
        #8. part-of-speech tag of the topmost word on the stack
        #9. part-of-speech tag of the second-topmost word on the stack
        #10. part-of-speech tag of the leftmost modifier of topmost word on stack
        #11. part-of-speech tag of the rightmost modifier of topmost word on stack
        #12. part-of-speech tag of the leftmost modifier of second topmost word on stack
        #13. part-of-speech tag of the rightmost modifier of second topmost word on stack (Maybe should be removed)
        #14. dependency label of the leftmost modifier of topmost word on stack
        #15. dependency label of the rightmost modifier of topmost word on stack
        #16. dependency label of the leftmost modifier of second topmost word on stack
        #17. dependency label of the rightmost modifier of second topmost word on stack (Maybe should be removed)

        #Feature 0, 7 (next word in buffer):
        if i < len(words):
            word_features.append(words[i])
            tag_features.append(tags[i])  
        else:
            word_features.append(self.vocab_words[PAD])
            tag_features.append(self.vocab_tags[PAD])  
      
        #Feature 1,8 (Topmost of stack): 
        if len(stack) == 0:
            word_features.append(self.vocab_words[PAD]) 
            tag_features.append(self.vocab_tags[PAD])  
        else:
            word_features.append(words[stack[-1]])    
            tag_features.append(tags[stack[-1]])      
        
        #Feature 1, 9 (Second topmost of stack):
        if len(stack) <= 1:
            word_features.append(self.vocab_words[PAD])
            tag_features.append(self.vocab_tags[PAD]) 
        else:
            word_features.append(words[stack[-2]])    
            tag_features.append(tags[stack[-2]])              
      
        ####Above is Baseline features###########
        if self.use_unigram:
        ####Below is Unigram Features############
            #left&rightmost modifier of topmost word on stack
            if len(stack) != 0 and stack[-1] in head:
                word_features.append(words[head[head.index(stack[-1])]])#s0Lw 
                tag_features.append(tags[head[head.index(stack[-1])]])#s0Lp
                label_features.append(labler.predict(words[head[head.index(stack[-1])]], words[stack[-1]], 
                                                      tags[head[head.index(stack[-1])]], tags[stack[-1]]))
                head.reverse() #Check from right to left
                word_features.append(words[head[head.index(stack[-1])]])#s0Rw 
                tag_features.append(tags[head[head.index(stack[-1])]])#s0Rp
                label_features.append(labler.predict(words[head[head.index(stack[-1])]], words[stack[-1]], 
                                                      tags[head[head.index(stack[-1])]], tags[stack[-1]]))
                head.reverse()
            else:
                word_features.append(self.vocab_words[PAD])
                word_features.append(self.vocab_words[PAD])
                tag_features.append(self.vocab_tags[PAD])
                tag_features.append(self.vocab_tags[PAD])
                label_features.append(self.vocab_labels[PAD])
                label_features.append(self.vocab_labels[PAD])
            #left&rightmost modifier of second topmost word on stack (topmost word of queue in paper)
            if len(stack) >= 2 and stack[-2] in head:
                word_features.append(words[head[head.index(stack[-2])]])#s0Lw 
                tag_features.append(tags[head[head.index(stack[-2])]])#s0Lp
                label_features.append(labler.predict(words[head[head.index(stack[-2])]], words[stack[-2]], 
                                                      tags[head[head.index(stack[-2])]], tags[stack[-2]]))
                head.reverse() #Check from right to left
                word_features.append(words[head[head.index(stack[-2])]])#s0Rw 
                tag_features.append(tags[head[head.index(stack[-2])]])#s0Rp
                label_features.append(labler.predict(words[head[head.index(stack[-2])]], words[stack[-2]], 
                                                      tags[head[head.index(stack[-2])]], tags[stack[-2]]))
                head.reverse()
            else:
                word_features.append(self.vocab_words[PAD])
                word_features.append(self.vocab_words[PAD])
                tag_features.append(self.vocab_tags[PAD])
                tag_features.append(self.vocab_tags[PAD])
                label_features.append(self.vocab_labels[PAD])
                label_features.append(self.vocab_labels[PAD])        
        ####Above is Unigram Features############
        if self.use_third_order:
        ####Below is Third order relation########
            if len(stack) != 0 and stack[-1] in head and len([i for i, head_pos in enumerate(head) if head_pos == stack[-1]]) > 1:
                second_leftmost_modifier = [i for i, head_pos in enumerate(head) if head_pos == stack[-1]][1] #Last indexation gives the SECOND leftmost word in heads
                word_features.append(words[head[second_leftmost_modifier]])#s0Lw 
                tag_features.append(tags[head[second_leftmost_modifier]])#s0Lp
                label_features.append(labler.predict(words[head[second_leftmost_modifier]], words[stack[-1]], 
                                                      tags[head[second_leftmost_modifier]], tags[stack[-1]]))
                head.reverse() #Check from right to left
                second_rightmost_modifier = [i for i, head_pos in enumerate(head) if head_pos == stack[-1]][1] #Last indexation gives the SECOND rightmost word in heads 
                word_features.append(words[head[second_rightmost_modifier]])#s0Rw 
                tag_features.append(tags[head[second_rightmost_modifier]])#s0Rp
                label_features.append(labler.predict(words[head[second_rightmost_modifier]], words[stack[-1]], 
                                                      tags[head[second_rightmost_modifier]], tags[stack[-1]]))
                head.reverse()
            else:
                word_features.append(self.vocab_words[PAD])
                word_features.append(self.vocab_words[PAD])
                tag_features.append(self.vocab_tags[PAD])
                tag_features.append(self.vocab_tags[PAD])
                label_features.append(self.vocab_labels[PAD])
                label_features.append(self.vocab_labels[PAD])
            #left&rightmost modifier of second topmost word on stack (topmost word of queue in paper)
            if len(stack) >= 2 and stack[-2] in head and len([i for i, head_pos in enumerate(head) if head_pos == stack[-2]]) > 1:
                second_leftmost_modifier = [i for i, head_pos in enumerate(head) if head_pos == stack[-2]][1] #Last indexation gives the SECOND leftmost word in heads
                word_features.append(words[head[second_leftmost_modifier]])#s0Lw 
                tag_features.append(tags[head[second_leftmost_modifier]])#s0Lp
                label_features.append(labler.predict(words[head[second_leftmost_modifier]], words[stack[-2]], 
                                                      tags[head[second_leftmost_modifier]], tags[stack[-2]]))
                head.reverse() #Check from right to left
                second_rightmost_modifier = [i for i, head_pos in enumerate(head) if head_pos == stack[-2]][1] #Last indexation gives the SECOND rightmost word in heads
                word_features.append(words[head[second_rightmost_modifier]])#s0Rw 
                tag_features.append(tags[head[second_rightmost_modifier]])#s0Rp
                label_features.append(labler.predict(words[head[second_rightmost_modifier]], words[stack[-2]], 
                                                      tags[head[second_rightmost_modifier]], tags[stack[-2]]))
                head.reverse()
            else:
                word_features.append(self.vocab_words[PAD])
                word_features.append(self.vocab_words[PAD])
                tag_features.append(self.vocab_tags[PAD])
                tag_features.append(self.vocab_tags[PAD])
                label_features.append(self.vocab_labels[PAD])
                label_features.append(self.vocab_labels[PAD])  
        ####Above is Third order relation########
        ####Below is Valency#####################
        valency_features = []
        if self.use_valency :
            if len(stack)>1:
                top_left_valency = sum(1 for k in head[:i] if k==stack[-1])
                top_right_valency = sum(1 for k in head[i:] if k==stack[-1])
                second_left_valency = sum(1 for k in head[:i] if k==stack[-2])
                second_right_valency = sum(1 for k in head[i:] if k==stack[-2])
                valency_features.append(top_left_valency)
                valency_features.append(top_right_valency)
                valency_features.append(second_left_valency)
                valency_features.append(second_right_valency)
            elif len(stack)==1:
                top_left_valency = sum(1 for k in head[:i] if k==stack[-1])
                top_right_valency = sum(1 for k in head[i:] if k==stack[-1])
                valency_features.append(top_left_valency)
                valency_features.append(top_right_valency)
                valency_features.append(0)
                valency_features.append(0)
            else:
                valency_features.append(0)
                valency_features.append(0)
                valency_features.append(0)
                valency_features.append(0)          
        ####Above is Valency#####################
        featurized_features = word_features + tag_features + label_features + valency_features

        return torch.LongTensor(featurized_features)
    def predict(self, words, tags, want_print = False):
        word_ids = []
        tag_ids = []
        for word in words:
            word_ids.append(self.vocab_words[word] if word in self.vocab_words else self.vocab_words[UNK])
        for tag in tags:
            tag_ids.append(self.vocab_tags[tag])
        config = self.initial_config(len(words))
        while not self.is_final_config(config):
            model_features = self.featurize(word_ids, tag_ids, config, want_print = want_print)
            if (want_print):
                print("In predict: " + str(model_features.shape))
            pred = self.fw_model.forward(model_features, want_print = want_print)
            try:
                valid_moves = self.valid_moves(config)
            except:
                print("pred: " + str(pred))
                print("model_features: " + str(model_features))
                print("model_features: " + str(model_features.shape))

            best_move_score = -math.inf
            best_move = None
            for move in valid_moves:
                if pred[0,move].item() > best_move_score:
                    best_move = move
                    best_move_score = pred[0,move].item()
            if want_print:
                print(config)
            config = self.next_config(config, best_move)
        return config[2]

In [None]:
def parse_training_examples(vocab_words, vocab_tags, inverse_vocab_words, 
                            inverse_vocab_tags, gold_data, parser, 
                            feature_k, batch_size=100):
  batch_i = 0
  batch_x = torch.zeros((batch_size, feature_k), dtype=int)
  batch_y = torch.zeros(batch_size, dtype=int)
  for sentence in gold_data:
      word_ids = []
      tag_ids = []
      heads = []
      for trip in sentence:
          word_ids.append(vocab_words[trip[0]])
          tag_ids.append(vocab_tags[trip[1]])
          heads.append(trip[2])
      
      for config, move in tuple(oracle_moves(heads))[1:]:
          if batch_i > batch_size - 1:
              yield batch_x, batch_y
              batch_x = torch.zeros((batch_size, feature_k), dtype=int)
              batch_y = torch.zeros(batch_size, dtype=int)
              batch_i = 0
          batch_x[batch_i,:] = parser.featurize(word_ids, tag_ids, config)
          batch_y[batch_i] = move 
          batch_i += 1
  if not batch_i == 0:
      yield batch_x[0:batch_i,:], batch_y[0:batch_i]

In [None]:
def parse_train_fixed_window(train_data, n_epochs=2, batch_size=100, lr=1e-2, 
        embedding_specs=[(3, len(vocab_words), 50), (3, len(vocab_tags), 10)], 
        feature_k = 6, use_distance = False, use_third_order = False, 
        use_unigram = False, use_valency = False):
    vocab_words, vocab_tags, vocab_labels, inverse_vocab_words, inverse_vocab_tags, inverse_vocab_labels = make_vocabs(label_train_data)
    parser = FixedWindowParser(vocab_words, vocab_tags, embedding_specs=embedding_specs, 
                               use_distance=use_distance, use_unigram=use_unigram, 
                               use_third_order=use_third_order, use_valency=use_valency)
    optimizer = optim.Adam(parser.fw_model.parameters(), lr=lr)
    
    
    
    for ep in range(n_epochs):
        parser.fw_model.train()
        n_batch = 0
        for bx, by in parse_training_examples(vocab_words, vocab_tags, inverse_vocab_words, inverse_vocab_tags, train_data, parser, feature_k, batch_size=batch_size):
            bx = bx.to(device)
            by = by.to(device)
            optimizer.zero_grad()
            output = parser.fw_model.forward(bx)
            loss = F.cross_entropy(output, by)
            loss.backward()
            optimizer.step()
            n_batch += 1
        parser.fw_model.eval()
    
    return parser

In [None]:
def mean_list(test_list):
    mean = sum(test_list) / len(test_list) 
    return str(mean)

def var_list(test_list):
    mean = sum(test_list) / len(test_list) 
    res = sum((i - mean) ** 2 for i in test_list) / len(test_list)
    return str(res)

# Pipeline

In [None]:
tagger = tag_train_fixed_window(tag_train_data, n_epochs=2)
print("Pure Tagger Baseline: " + '{:.4f}'.format(accuracy(tagger, tag_dev_data)))


Pure Tagger Baseline: 0.8766


# Results

## Baseline only


In [None]:
uas_list_baseline = []
for _ in range(10):
    vocab_words, vocab_tags, vocab_labels, inverse_vocab_words, inverse_vocab_tags, inverse_vocab_labels = make_vocabs(label_train_data)
    parser = parse_train_fixed_window(parse_train_data, n_epochs=2)
    pipeline_valid_data = []
    for sentence in list(parse_dev_data):
        words = []
        heads = []
        for trip in sentence:
            words.append(trip[0])
            heads.append(trip[2])
        pred_tags = tagger.predict(words)
        valid_sentence = []
        for i in range(len(words)):
            valid_sentence.append((words[i], pred_tags[i], heads[i]))
        pipeline_valid_data.append(valid_sentence)
    uas_list_baseline.append(uas(parser, pipeline_valid_data))

print("Pipeline Baseline Mean Accuracy: " + mean_list(uas_list_baseline)+ ", and Variance: " + var_list(uas_list_baseline))
print("All values: " + str(uas_list_baseline))


Pipeline Baseline Mean Accuracy: 0.5578141040827609, and Variance: 8.139265540956428e-05
All values: [0.5674010897891495, 0.5598989181078733, 0.5660585959093422, 0.5552396746426597, 0.5556345257837795, 0.5503435204927742, 0.5670062386480297, 0.5618731738134723, 0.5587933349127379, 0.5358919687277897]


## Baseline + Distance

In [None]:
uas_list_distance = []
for _ in range(10):
    parser = parse_train_fixed_window(parse_train_data, n_epochs=3, use_distance = True)
    pipeline_valid_data = []
    for sentence in list(parse_dev_data):
        words = []
        heads = []
        for trip in sentence:
            words.append(trip[0])
            heads.append(trip[2])
        pred_tags = tagger.predict(words)
        valid_sentence = []
        for i in range(len(words)):
            valid_sentence.append((words[i], pred_tags[i], heads[i]))
        pipeline_valid_data.append(valid_sentence)
    uas_list_distance.append(uas(parser, pipeline_valid_data))

print("Pipeline Baseline Mean Accuracy: " + mean_list(uas_list_distance)+ ", and Variance: " + var_list(uas_list_distance))
print("All values: " + str(uas_list_distance))

Pipeline Baseline Mean Accuracy: 0.5667219458264234, and Variance: 6.317892654982375e-05
All values: [0.5628208165521599, 0.5654268340835505, 0.5691384348100766, 0.5809049988154465, 0.5756929637526652, 0.5715075416567954, 0.5640843402037432, 0.5579246624022743, 0.5677959409302693, 0.5519229250572534]


Pipeline Baseline Mean Accuracy: 0.6297558453952601, and Variance: 0.00011954375634655416
All values: [0.6171862573564498, 0.6375854938762526, 0.6330920947987911, 0.6221965961507874, 0.6267297598218546, 0.6223556545252108, 0.6351200890726897, 0.6560362653093685, 0.6285986957213298, 0.6186575473198664]

## Baseline + Unigram

In [None]:
uas_list_unigram = []
for _ in range(10):
    embedding_specs = [(7, len(vocab_words), 50), (7, len(vocab_tags), 10), (4, len(vocab_labels), 10)]
    feature_k = 18
    parser = parse_train_fixed_window(parse_train_data, n_epochs=2, embedding_specs=embedding_specs, feature_k=feature_k, use_unigram = True)
    pipeline_valid_data = []
    for sentence in list(parse_dev_data):
        words = []
        heads = []
        for trip in sentence:
            words.append(trip[0])
            heads.append(trip[2])
        pred_tags = tagger.predict(words)
        valid_sentence = []
        for i in range(len(words)):
            valid_sentence.append((words[i], pred_tags[i], heads[i]))
        pipeline_valid_data.append(valid_sentence)

    uas_list_unigram.append(uas(parser, pipeline_valid_data))

print("Pipeline Baseline Mean Accuracy: " + mean_list(uas_list_unigram)+ ", and Variance: " + var_list(uas_list_unigram))
print("All values: " + str(uas_list_unigram))

Pipeline Baseline Mean Accuracy: 0.5775566611387506, and Variance: 2.2258840510628266e-05
All values: [0.5794835347074153, 0.5818526415541341, 0.5752981126115454, 0.5766406064913527, 0.5843007186290768, 0.5786148621969518, 0.576087814893785, 0.5657427149964463, 0.5775092790018164, 0.580036326304983]


## Baseline + Third Order


In [None]:
uas_list_third_order = []
for _ in range(10):
    embedding_specs = [(7, len(vocab_words), 50), (7, len(vocab_tags), 10), (4, len(vocab_labels), 12)]
    feature_k = 18
    parser = parse_train_fixed_window(parse_train_data, n_epochs=2, embedding_specs=embedding_specs, feature_k=feature_k, use_third_order = True)
    pipeline_valid_data = []
    for sentence in list(parse_dev_data):
        words = []
        heads = []
        for trip in sentence:
            words.append(trip[0])
            heads.append(trip[2])
        pred_tags = tagger.predict(words)
        valid_sentence = []
        for i in range(len(words)):
            valid_sentence.append((words[i], pred_tags[i], heads[i]))
        pipeline_valid_data.append(valid_sentence)

    uas_list_third_order.append(uas(parser, pipeline_valid_data))

print("Pipeline Baseline Mean Accuracy: " + mean_list(uas_list_third_order)+ ", and Variance: " + var_list(uas_list_third_order))
print("All values: " + str(uas_list_third_order))

Pipeline Baseline Mean Accuracy: 0.5725736397378188, and Variance: 4.9607559499464636e-05
All values: [0.5671641791044776, 0.5640053699755192, 0.5782989812840559, 0.5817736713259102, 0.5764036958066808, 0.5747453210139777, 0.5588723051409619, 0.5690594645818526, 0.5763247255784569, 0.5790886835662955]


## Baseline + Valency

In [None]:
uas_list_valency = []
for _ in range(10):
    feature_k = 10
    parser = parse_train_fixed_window(parse_train_data, n_epochs=2, feature_k=feature_k, use_valency = True)
    pipeline_valid_data = []
    for sentence in list(parse_dev_data):
        words = []
        heads = []
        for trip in sentence:
            words.append(trip[0])
            heads.append(trip[2])
        pred_tags = tagger.predict(words)
        valid_sentence = []
        for i in range(len(words)):
            valid_sentence.append((words[i], pred_tags[i], heads[i]))
        pipeline_valid_data.append(valid_sentence)

    uas_list_valency.append(uas(parser, pipeline_valid_data))

print("Pipeline Baseline Mean Accuracy: " + mean_list(uas_list_valency)+ ", and Variance: " + var_list(uas_list_valency))
print("All values: " + str(uas_list_valency))

Pipeline Baseline Mean Accuracy: 0.578859669904446, and Variance: 5.149366514773534e-05
All values: [0.5810629392718945, 0.5720603332543631, 0.5891968727789624, 0.5807470583589986, 0.5921977414514727, 0.5707968096027798, 0.5801152965332069, 0.5749032614704257, 0.5692174050383005, 0.5782989812840559]


## Baseline + all features except distance


In [None]:
uas_list_all = []
for _ in range(10):
    print("Iteration", _+1)
    embedding_specs = [(11, len(vocab_words), 50), (11, len(vocab_tags), 10), (8, len(vocab_labels), 12)]
    feature_k = 34 
    parser = parse_train_fixed_window(parse_train_data, n_epochs=2, 
                                      embedding_specs=embedding_specs, 
                                      feature_k=feature_k,
                                      use_distance = False, 
                                      use_unigram = True, 
                                      use_third_order = True, 
                                      use_valency = True)
    pipeline_valid_data = []
    for sentence in list(parse_dev_data):
        words = []
        heads = []
        for trip in sentence:
            words.append(trip[0])
            heads.append(trip[2])
        pred_tags = tagger.predict(words)
        valid_sentence = []
        for i in range(len(words)):
            valid_sentence.append((words[i], pred_tags[i], heads[i]))
        pipeline_valid_data.append(valid_sentence)

    uas_list_all.append(uas(parser, pipeline_valid_data))

print("Pipeline Baseline + All features Mean Accuracy: " + mean_list(uas_list_all)+ ", and Variance: " + var_list(uas_list_all))
print("All values: " + str(uas_list_all))

Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Pipeline Baseline + All features Mean Accuracy: 0.5712548369264787, and Variance: 2.7600353570597147e-05
All values: [0.5731659164494985, 0.5647161020295348, 0.5730869462212745, 0.5612414119876806, 0.5736397378188423, 0.5715865118850193, 0.5732448866777226, 0.5795625049356392, 0.5760088446655611, 0.5662955065940141]


## Baseline + all features

In [None]:
uas_list_all = []
for _ in range(10):
    print("Iteration", _+1)
    embedding_specs = [(11, len(vocab_words), 50), (11, len(vocab_tags), 10), (8, len(vocab_labels), 12)]
    feature_k = 34 
    parser = parse_train_fixed_window(parse_train_data, n_epochs=2, 
                                      embedding_specs=embedding_specs, 
                                      feature_k=feature_k,
                                      use_distance = True, 
                                      use_unigram = True, 
                                      use_third_order = True, 
                                      use_valency = True)
    pipeline_valid_data = []
    for sentence in list(parse_dev_data):
        words = []
        heads = []
        for trip in sentence:
            words.append(trip[0])
            heads.append(trip[2])
        pred_tags = tagger.predict(words)
        valid_sentence = []
        for i in range(len(words)):
            valid_sentence.append((words[i], pred_tags[i], heads[i]))
        pipeline_valid_data.append(valid_sentence)

    uas_list_all.append(uas(parser, pipeline_valid_data))

print("Pipeline Baseline + All features Mean Accuracy: " + mean_list(uas_list_all)+ ", and Variance: " + var_list(uas_list_all))
print("All values: " + str(uas_list_all))

Iteration 1
Iteration 2
Iteration 3
Iteration 4
Iteration 5
Iteration 6
Iteration 7
Iteration 8
Iteration 9
Iteration 10
Pipeline Baseline + All features Mean Accuracy: 0.5787333175392877, and Variance: 9.86862810179304e-06
All values: [0.5793255942509674, 0.5771933980889205, 0.5727710653083787, 0.5832741056621653, 0.5771144278606966, 0.5747453210139777, 0.5823264629234779, 0.5792466240227434, 0.5812208797283425, 0.5801152965332069]
