# Dependency parsing baseline


Dependency parsing is the task of mapping a sentence to a formal representation of its syntactic structure in the form of a dependency tree, which consists of directed arcs between individual words (tokens). Here we will implement a dependency parser baseline based on the arc-standard algorithm and the fixed-window model that we implemented in Lab L3.

## Imports

In [14]:
from batchify import *
from create_vocab import *
from data_handling import *
from parser import *
from projectivize import *
from uas import *
from window_models import *
from taggers import *

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [16]:
device

device(type='cpu')

## The data set

In [17]:
train_data = Dataset('./data/en_gum-ud-train-projectivized.conllu')
dev_data = Dataset('./data/en_gum-ud-dev-projectivized.conllu')
test_data = Dataset('./data/en_gum-ud-test-projectivized.conllu')

## Train tagger

In [18]:
import torch.optim as optim
import torch
import torch.nn.functional as F

def train_fixed_window(train_data, n_epochs=2, batch_size=100, lr=1e-3):
    vocab_words, vocab_tags = make_vocabs(train_data)
    tagger = FixedWindowTagger(vocab_words, vocab_tags, len(vocab_tags))
    
    optimizer = optim.Adam(tagger.model.parameters(), lr=lr)
    for i in range(n_epochs):
        total_loss = 0
        batch_nr = 0
        for x, y in training_examples_tagger(vocab_words, vocab_tags, train_data, tagger):
            batch_nr += 1
            
            optimizer.zero_grad()
            y_pred = tagger.model.forward(x)
            
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            total_loss += loss.item()
            optimizer.step()
            if batch_nr % 100 == 1:
                print(total_loss/batch_nr)
                #pass
    return tagger


In [19]:
tagger = train_fixed_window(train_data)

2.9685540199279785
2.3825770805377773
1.815235838664705
1.4485517878865086
1.2125645115265524
1.0670260203158308
0.9569959682900576
0.8841368622131933
0.8198371470048931
0.5263502597808838
0.2880955667483925
0.25445233796959493
0.22974832894348624
0.21985018116763405
0.21230887778534624
0.2024515613095336
0.19645805638310063
0.19186879511220328


In [20]:
accuracy(tagger, dev_data)

0.8846905139787572

## Train parser

In [21]:
import torch.optim as optim
import torch
import torch.nn.functional as F
def train_fixed_parser(train_data, n_epochs=3, batch_size=100, lr=1e-3):
    vocab_words, vocab_tags = make_vocabs(train_data)
    parser = FixedWindowParser(vocab_words, vocab_tags)
    
    optimizer = optim.Adam(parser.model.parameters(), lr=lr)
    for i in range(n_epochs):
        total_loss = 0
        batch_nr = 0
        for x, y in training_examples_parser(vocab_words, vocab_tags, train_data, parser):
            batch_nr += 1
            
            optimizer.zero_grad()
            y_pred = parser.model.forward(x)
            
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            total_loss += loss.item()
            optimizer.step()
            if batch_nr % 100 == 1:
                print(total_loss/batch_nr)
                
    return parser

In [22]:
parser = train_fixed_parser(train_data)

1.3895262479782104
0.975690499390706
0.826820241426354
0.7343247677400658
0.6786869084299948
0.6332944770773014
0.6022871892880679
0.5780175194122992
0.5587399612912227
0.5438054190921731
0.5308842497629362
0.5193402110772388
0.5087740350697658
0.5009557470095148
0.4935720121643528
0.48423793569435525
0.47475677231637337
0.4661284290179654
0.459025665695703
0.4538143041971356
0.44697732341730373
0.43955135329855677
0.43345202805507405
0.4310107293114979
0.4292923091526381
0.42606381785983993
0.4229723537560611
0.4199600121332557
0.4173018935667494
0.41431059093516026
0.4129874807587269
0.41238731051915617
0.4108746001535898
0.4098496104764671
0.4089982012608268
0.40748008760689874
0.40705859166396985
0.40697694239673404
0.40496820795343164
0.4043519115794075
0.40325102238908644
0.40163830784915916
0.4000097432503443
0.3985437331671383
0.39758884557686835
0.3954571121912536
0.3939200635142001
0.41952258348464966
0.35665053778355665
0.35473744986365685
0.35410707545438874
0.3498835005888

In [23]:
uas(parser, dev_data)

0.7809334530067957

In [24]:
for i, data in enumerate(dev_data):
    if i == 3:
        uas(parser, [data])
        print(data)
        break

[('<root>', '<root>', 0), ('However', 'ADV', 6), (',', 'PUNCT', 1), ('it', 'PRON', 6), ('is', 'AUX', 6), ('not', 'PART', 6), ('enough', 'ADJ', 0), ('to', 'PART', 9), ('have', 'AUX', 9), ('attained', 'VERB', 6), ('such', 'ADJ', 12), ('native-like', 'ADJ', 12), ('levels', 'NOUN', 9), ('.', 'PUNCT', 6)]


## Testing parser with predicted tags 

In [25]:
def calc_uas_with_tagger_preds(tagger, parser, data):
    correct = 0
    total = 0
    
    new_data = []
    for sent in data:
        pred_tags = tagger.predict(sent)
    
        # Replace gold tags with predicted
        for i , (_, tag) in enumerate(pred_tags):
            sent[i] = (sent[i][0], tag, sent[i][2])
        new_data.append(sent)
        
    return uas(parser, new_data)

In [26]:
calc_uas_with_tagger_preds(tagger, parser, dev_data)

0.7137453519682011