# Dependency parsing baseline


Dependency parsing is the task of mapping a sentence to a formal representation of its syntactic structure in the form of a dependency tree, which consists of directed arcs between individual words (tokens). Here we will implement a dependency parser baseline based on the arc-standard algorithm and the fixed-window model that we implemented in Lab L3.

## Imports

In [1]:
from batchify import *
from create_vocab import *
from data_handling import *
from parser import *
from projectivize import *
from uas import *
from window_models import *
from taggers import *

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

  return torch._C._cuda_getDeviceCount() > 0


In [3]:
device

device(type='cpu')

## The data set

In [4]:
train_data = Dataset('data/en_gum-ud-train.conllu')
dev_data = Dataset('data/en_gum-ud-dev.conllu')
test_data = Dataset('data/en_gum-ud-test.conllu')

### Test code
To test your implementation, you can run the code below. The code in this cell creates the initial configuration for the example sentence, simulates a sequence of moves, and then tests that the resulting configuration is the expected final configuration.

In [5]:
moves = [0, 0, 0, 1, 0, 0, 1, 2, 0, 2, 2]    # 0 = SH, 1 = LA, 2 = RA

parser = ArcStandardParser()
config = parser.initial_config(len(example_sentence))
for move in moves:
    assert move in parser.valid_moves(config)
    config = parser.next_config(config, move)
assert parser.is_final_config(config)
assert config == (6, [0], [0, 2, 0, 4, 2, 2])

print('Looks good!')

NameError: name 'example_sentence' is not defined

## Oracle

In [None]:
gold_heads = [h for w, t, h in example_sentence]
gold_moves = [0, 0, 0, 1, 0, 0, 1, 2, 0, 2, 2]

assert list(m for _, m in oracle_moves(gold_heads)) == gold_moves
print("OK")

## Train tagger

In [5]:
import torch.optim as optim
import torch
import torch.nn.functional as F

def train_fixed_window(train_data, n_epochs=2, batch_size=100, lr=1e-2):
    vocab_words, vocab_tags = make_vocabs(train_data)
    tagger = FixedWindowTagger(vocab_words, vocab_tags, len(vocab_tags))
    
    optimizer = optim.Adam(tagger.model.parameters(), lr=lr)
    for i in range(n_epochs):
        total_loss = 0
        batch_nr = 0
        for x, y in training_examples_tagger(vocab_words, vocab_tags, train_data, tagger):
            batch_nr += 1
            
            optimizer.zero_grad()
            y_pred = tagger.model.forward(x)
            
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            total_loss += loss.item()
            optimizer.step()
            if batch_nr % 100 == 1:
                print(total_loss/batch_nr)
                #pass
    return tagger


In [6]:
tagger = train_fixed_window(train_data)

2.9525060653686523
1.0906923168366498
0.7805956846742488
0.6482049253246317
0.5709965155829217
0.5254366809498526
0.49203830626413547
0.47255402207162
0.4507444996419322
0.4384034276008606
0.18906206903186176
0.14861268526990318
0.12981052105423324
0.1256765405347854
0.12747879608156915
0.12542040444470434
0.12424754625460008
0.12416812111524755


In [7]:
accuracy(tagger, test_data)

ModuleAttributeError: 'FixedWindowModel' object has no attribute 'softmax'

## Train parser

In [8]:
import torch.optim as optim
import torch
import torch.nn.functional as F
def train_fixed_parser(train_data, n_epochs=2, batch_size=100, lr=1e-2):
    vocab_words, vocab_tags = make_vocabs(train_data)
    parser = FixedWindowParser(vocab_words, vocab_tags)
    
    optimizer = optim.Adam(parser.model.parameters(), lr=lr)
    for i in range(n_epochs):
        total_loss = 0
        batch_nr = 0
        for x, y in training_examples_parser(vocab_words, vocab_tags, train_data, parser):
            batch_nr += 1
            
            optimizer.zero_grad()
            y_pred = parser.model.forward(x)
            
            loss = F.cross_entropy(y_pred, y)
            loss.backward()
            total_loss += loss.item()
            optimizer.step()
            print(batch_nr)
            if batch_nr % 100 == 1:
                print(total_loss/batch_nr)
                
    return parser

In [9]:
windw_model = train_fixed_parser(train_data)

1
1.100593090057373


IndexError: list index out of range

In [None]:
accuracy(windw_model, test_data)