# Dependency parsing baseline


Dependency parsing is the task of mapping a sentence to a formal representation of its syntactic structure in the form of a dependency tree, which consists of directed arcs between individual words (tokens). Here we will implement a dependency parser baseline based on the arc-standard algorithm and the fixed-window model that we implemented in Lab L3.

## Imports

In [1]:
from batchify import *
from create_vocab import *
from data_handling import *
import syntax_parser as parser 
from projectivize import *
from uas import *
from window_models import *
from taggers import *
import importlib


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
device

device(type='cuda')

## The data set

In [4]:
train_data = Dataset('./data/en_gum-ud-train-projectivized.conllu')
dev_data = Dataset('./data/en_gum-ud-dev-projectivized.conllu')
test_data = Dataset('./data/en_gum-ud-test-projectivized.conllu')

## Train tagger

In [5]:
import torch.optim as optim
import torch
import torch.nn.functional as F

def train_fixed_window(train_data, n_epochs=1, batch_size=100, lr=1e-2):
    vocab_words, vocab_tags = make_vocabs(train_data)
    tagger = FixedWindowTagger(vocab_words, vocab_tags, len(vocab_tags))
    
    optimizer = optim.Adam(tagger.model.parameters(), lr=lr)
    for i in range(n_epochs):
        total_loss = 0
        batch_nr = 0
        for x, y in training_examples_tagger(vocab_words, vocab_tags, train_data, tagger):
            batch_nr += 1
            
            optimizer.zero_grad()
            y_pred = tagger.model.forward(x)
            
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            total_loss += loss.item()
            optimizer.step()
            if batch_nr % 100 == 1:
                print(total_loss/batch_nr)
                #pass
    return tagger


In [None]:
tagger = train_fixed_window(train_data)

In [None]:
accuracy(tagger, dev_data)

## Train parser

In [12]:
import torch.optim as optim
import torch
import torch.nn.functional as F
import tqdm as tqdm
import time

def train_fixed_parser(train_data, n_epochs=10, batch_size=100, lr=1e-3):
    vocab_words, vocab_tags = make_vocabs(train_data)
    myparser = parser.FixedWindowParser(vocab_words, vocab_tags)
    myparser.model.train()
    optimizer = optim.Adam(myparser.model.parameters(), lr=lr)
    
    start_time = time.time()
    for i in tqdm.tqdm(range(n_epochs)):
        total_loss = 0
        batch_nr = 0
        
        for words, tags, i, x, y in training_examples_parser(vocab_words, vocab_tags, train_data, myparser):
            words = words.to(device)
            tags = tags.to(device)
            x = x.to(device)
            y = y.to(device)
            i = i.to(device)

            batch_nr += 1
            
            optimizer.zero_grad()
            y_pred = myparser.model.forward(words[i], tags[i], x)
            
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            total_loss += loss.item()
            optimizer.step()
        print("loss: ", total_loss/batch_nr, "time was: ", time.time() - start_time)
        start_time = time.time()
        print("", uas(myparser, dev_data))
        myparser.model.train()
            
    
    return myparser

In [13]:
importlib.reload(parser)
myparser = train_fixed_parser(train_data)

In [None]:
uas(myparser, dev_data)

In [None]:
uas(myparser, test_data)

## Testing parser with predicted tags 

In [None]:
def calc_uas_with_tagger_preds(tagger, _parser, data):
    correct = 0
    total = 0
    
    new_data = []
    for sent in data:
        pred_tags = tagger.predict(sent)
    
        # Replace gold tags with predicted
        for i , (_, tag) in enumerate(pred_tags):
            sent[i] = (sent[i][0], tag, sent[i][2])
        new_data.append(sent)
        
    return uas(_parser, new_data)

In [None]:
calc_uas_with_tagger_preds(tagger, myparser, dev_data)

In [None]:
path = "~/paper-180-lstm-10-epochs-845acc"
torch.save(myparser.state_dict(), PATH)