# Dependency parsing baseline


Dependency parsing is the task of mapping a sentence to a formal representation of its syntactic structure in the form of a dependency tree, which consists of directed arcs between individual words (tokens). Here we will implement a dependency parser baseline based on the arc-standard algorithm and the fixed-window model that we implemented in Lab L3.

## Imports

In [24]:
from batchify import *
from create_vocab import *
from data_handling import *
from parser import *
from projectivize import *
from uas import *
from window_models import *
from taggers import *

In [25]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [26]:
device

device(type='cpu')

## The data set

In [27]:
train_data = Dataset('../data/en_gum-ud-train-projectivized.conllu')
dev_data = Dataset('../data/en_gum-ud-dev-projectivized.conllu')
test_data = Dataset('../data/en_gum-ud-test-projectivized.conllu')

## Train tagger

In [28]:
import torch.optim as optim
import torch
import torch.nn.functional as F

def train_fixed_window(train_data, n_epochs=2, batch_size=100, lr=1e-2):
    vocab_words, vocab_tags = make_vocabs(train_data)
    tagger = FixedWindowTagger(vocab_words, vocab_tags, len(vocab_tags))
    
    optimizer = optim.Adam(tagger.model.parameters(), lr=lr)
    for i in range(n_epochs):
        total_loss = 0
        batch_nr = 0
        for x, y in training_examples_tagger(vocab_words, vocab_tags, train_data, tagger):
            batch_nr += 1
            
            optimizer.zero_grad()
            y_pred = tagger.model.forward(x)
            
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            total_loss += loss.item()
            optimizer.step()
            if batch_nr % 100 == 1:
                print(total_loss/batch_nr)
                #pass
    return tagger


In [29]:
tagger = train_fixed_window(train_data)

2.9544906616210938
1.0781216923848238
0.7784059331636524
0.64725699104938
0.571165594915648
0.5266368385114356
0.4936308122415114
0.4747134044277515
0.45567129494694436
0.3410508334636688
0.18849418046745922
0.15005630080064583
0.13257628279827857
0.13192731325057083
0.13199662393750902
0.12808836604384846
0.1264808471875927
0.12622545749505984


In [30]:
accuracy(tagger, dev_data)

0.8830423635697717

## Train parser

In [44]:
import torch.optim as optim
import torch
import torch.nn.functional as F
def train_fixed_parser(train_data, n_epochs=2, batch_size=100, lr=1e-2):
    vocab_words, vocab_tags = make_vocabs(train_data)
    parser = FixedWindowParser(vocab_words, vocab_tags)
    
    optimizer = optim.Adam(parser.model.parameters(), lr=lr)
    for i in range(n_epochs):
        total_loss = 0
        batch_nr = 0
        for x, y in training_examples_parser(vocab_words, vocab_tags, train_data, parser):
            batch_nr += 1
            
            optimizer.zero_grad()
            y_pred = parser.model.forward(x)
            
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            total_loss += loss.item()
            optimizer.step()
            if batch_nr % 100 == 1:
                print(total_loss/batch_nr)
                if total_loss/batch_nr <= 0.5:
                    break
                
    return parser

In [45]:
parser = train_fixed_parser(train_data)

1.1093326807022095
0.5482913636335052
0.4688233163819384
0.4829970896244049


In [46]:
uas(parser, dev_data)

0.6089883318374151

## Testing parser with predicted tags 

In [34]:
def calc_uas_with_tagger_preds(tagger, parser, data):
    correct = 0
    total = 0
    
    new_data = []
    for sent in data:
        pred_tags = tagger.predict(sent)
    
        # Replace gold tags with predicted
        for i , (_, tag) in enumerate(pred_tags):
            sent[i] = (sent[i][0], tag, sent[i][2])
        new_data.append(sent)
        
    return uas(parser, new_data)

In [35]:
calc_uas_with_tagger_preds(tagger, parser, dev_data)

0.6261700217976663