# Dependency parsing baseline


Dependency parsing is the task of mapping a sentence to a formal representation of its syntactic structure in the form of a dependency tree, which consists of directed arcs between individual words (tokens). Here we will implement a dependency parser baseline based on the arc-standard algorithm and the fixed-window model that we implemented in Lab L3.

## Imports

In [1]:
from batchify import *
from create_vocab import *
from data_handling import *
import parser 
from projectivize import *
from uas import *
from window_models import *
from taggers import *
import importlib


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
device

device(type='cuda')

## The data set

In [4]:
train_data = Dataset('./data/en_gum-ud-train-projectivized.conllu')
dev_data = Dataset('./data/en_gum-ud-dev-projectivized.conllu')
test_data = Dataset('./data/en_gum-ud-test-projectivized.conllu')

## Train tagger

In [5]:
import torch.optim as optim
import torch
import torch.nn.functional as F

def train_fixed_window(train_data, n_epochs=1, batch_size=100, lr=1e-2):
    vocab_words, vocab_tags = make_vocabs(train_data)
    tagger = FixedWindowTagger(vocab_words, vocab_tags, len(vocab_tags))
    
    optimizer = optim.Adam(tagger.model.parameters(), lr=lr)
    for i in range(n_epochs):
        total_loss = 0
        batch_nr = 0
        for x, y in training_examples_tagger(vocab_words, vocab_tags, train_data, tagger):
            batch_nr += 1
            
            optimizer.zero_grad()
            y_pred = tagger.model.forward(x)
            
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            total_loss += loss.item()
            optimizer.step()
            if batch_nr % 100 == 1:
                print(total_loss/batch_nr)
                #pass
    return tagger


In [6]:
tagger = train_fixed_window(train_data)

2.937457323074341
1.071372836385623
0.7779828862319538
0.6490695105735645
0.5716420860584834
0.5260861294414707
0.49424736804256025
0.4759819487617291
0.45490004186438265


In [7]:
accuracy(tagger, dev_data)

0.8818215114149677

## Train parser

In [13]:
import torch.optim as optim
import torch
import torch.nn.functional as F
import tqdm as tqdm

def train_fixed_parser(train_data, n_epochs=3, batch_size=100, lr=1e-3):
    vocab_words, vocab_tags = make_vocabs(train_data)
    myparser = parser.FixedWindowParser(vocab_words, vocab_tags)
    myparser.model.to(device)
    myparser.model.train()
    optimizer = optim.Adam(myparser.model.parameters(), lr=lr)
    
    for i in tqdm.tqdm(range(n_epochs)):
        total_loss = 0
        batch_nr = 0
        for words, tags, i, x, y in training_examples_parser(vocab_words, vocab_tags, train_data, myparser):
            words = words.to(device)
            tags = tags.to(device)
            x = x.to(device)
            y = y.to(device)
            i = i.to(device)
            
            
            batch_nr += 1
            
            optimizer.zero_grad()
            y_pred = myparser.model.forward(words[i], tags[i], x)
            
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            total_loss += loss.item()
            optimizer.step()
            if batch_nr % 100 == 1:
                print(total_loss/batch_nr)
                
    return myparser

In [None]:
importlib.reload(parser)
myparser = train_fixed_parser(train_data)

  0%|          | 0/3 [00:00<?, ?it/s]

1.379256010055542
0.9629937197902415
0.8871581204495027
0.7967927023223864
0.732381122367935
0.6831960812538208
0.6491877659783387
0.6207710619532263
0.5989636413650715
0.5820867101869626
0.5664940446198403
0.5514037147854589
0.537201793360323
0.5273784646888956
0.5174646438455088
0.5060015501904932
0.49348421916970603
0.4828756241996207
0.47365867780280535
0.4661196679648978
0.4571078937111617
0.4473539880968485
0.4389947838150388
0.43475657250442384
0.43203998425015805
0.42760012039943773
0.4227902938900763
0.4182071388464301
0.41403215433916785
0.409596511928486
0.4068671518962201
0.40539977186735177
0.4032764710975891
0.4011902673261946
0.3991172637010551
0.39599181750352486
0.3946178392795507
0.3935347965993219


In [None]:
importlib.reload(parser)

In [None]:
uas(myparser, dev_data)

In [12]:
uas(myparser, test_data)

KeyboardInterrupt: 

In [None]:
for i, data in enumerate(dev_data):
    if i == 3:
        uas(parser, [data])
        print(data)
        break

## Testing parser with predicted tags 

In [None]:
def calc_uas_with_tagger_preds(tagger, parser, data):
    correct = 0
    total = 0
    
    new_data = []
    for sent in data:
        pred_tags = tagger.predict(sent)
    
        # Replace gold tags with predicted
        for i , (_, tag) in enumerate(pred_tags):
            sent[i] = (sent[i][0], tag, sent[i][2])
        new_data.append(sent)
        
    return uas(parser, new_data)

In [None]:
calc_uas_with_tagger_preds(tagger, parser, dev_data)