# Dependency parsing baseline


Dependency parsing is the task of mapping a sentence to a formal representation of its syntactic structure in the form of a dependency tree, which consists of directed arcs between individual words (tokens). Here we will implement a dependency parser baseline based on the arc-standard algorithm and the fixed-window model that we implemented in Lab L3.

## Imports

In [5]:
from batchify import *
from create_vocab import *
from data_handling import *
from parser import *
from projectivize import *
from uas import *
from window_models import *

## The data set

In [6]:
train_data = Dataset('data/en_gum-ud-train.conllu')
dev_data = Dataset('data/en_gum-ud-dev.conllu')
test_data = Dataset('data/en_gum-ud-test.conllu')

## Evaluation function

## Create the vocabularies

### Test code
To test your implementation, you can run the code below. The code in this cell creates the initial configuration for the example sentence, simulates a sequence of moves, and then tests that the resulting configuration is the expected final configuration.

In [None]:
moves = [0, 0, 0, 1, 0, 0, 1, 2, 0, 2, 2]    # 0 = SH, 1 = LA, 2 = RA

parser = ArcStandardParser()
config = parser.initial_config(len(example_sentence))
for move in moves:
    assert move in parser.valid_moves(config)
    config = parser.next_config(config, move)
assert parser.is_final_config(config)
assert config == (6, [0], [0, 2, 0, 4, 2, 2])

print('Looks good!')

## Oracle

In [1]:
gold_heads = [h for w, t, h in example_sentence]
gold_moves = [0, 0, 0, 1, 0, 0, 1, 2, 0, 2, 2]

assert list(m for _, m in oracle_moves(gold_heads)) == gold_moves
print("OK")

NameError: name 'example_sentence' is not defined

## Train tagger

## Train parser

In [7]:
def train_fixed_window(train_data, n_epochs=1, batch_size=100, lr=1e-2):
    
    # create vocab
    vocab_words, vocab_labels = make_vocabs(train_data)
        
    # Initialize the parser and model
    parser = FixedWindowParser(vocab_words, vocab_labels)
    model = parser.model


    # Initialize the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    

    c = 0
    # We train for several epochs
    for t in range(n_epochs):
        
        tot_loss = 0
        # In each epoch, we loop over all the minibatches
        for bx, by in training_examples(vocab_words, vocab_labels, train_data, parser, batch_size):
            c += 1
            #if c > 10:
                #break
            # Reset the accumulated gradients
            optimizer.zero_grad()
            
            # Forward pass
            output = model.forward(bx)
            
            # Compute the loss
            loss = torch.nn.functional.cross_entropy(output, by)
            tot_loss += loss.item()
            # Backward pass; propagates the loss and computes the gradients
            loss.backward()

            # Update the parameters of the model
            optimizer.step()
        print('Avg loss during {} epoch: {:.4f}'.format(t+1, tot_loss / c))
        print('Accuracy after {} epochs: {:.4f}'.format(t+1, uas(parser, dev_data)))
    
    return parser