# BERT part of speech tagger

In [1]:
import torch
# If GPU available
if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: GeForce GTX 1060 3GB


In [2]:
from batchify import *
from create_vocab import *
from data_handling import *
from parser import *
from projectivize import *
from uas import *
from window_models import *
from taggers import *

## Import data

In [3]:
train_data = Dataset('data/en_gum-ud-train-projectivized.conllu')
dev_data = Dataset('data/en_gum-ud-dev-projectivized.conllu')
test_data = Dataset('data/en_gum-ud-test-projectivized.conllu')

In [4]:
len(list([word for elem in train_data for word in elem]))

86148

In [5]:
word_vocab, label_vocab = make_vocabs(train_data)

In [6]:
words = [[word[0] for word in sent] for sent in train_data]
tags = [word[1] for sent in train_data for word in sent]

In [142]:
#words[:10]

## Load embeddings

In [113]:
import torch

from gensim.models import Word2Vec

word_model = Word2Vec(words, size=200, min_count=1, workers=4)

weights = torch.FloatTensor(word_model.wv.vectors)
embedding = nn.Embedding.from_pretrained(weights)

In [51]:
embedding

Embedding(170, 200)

In [7]:
from gensim.models import KeyedVectors
filename='GoogleNews-vectors-negative300.bin'
word_model2 = KeyedVectors.load_word2vec_format(filename, binary=True)

In [8]:
weights = torch.FloatTensor(word_model2.wv.vectors)
embedding = nn.Embedding.from_pretrained(weights)

  weights = torch.FloatTensor(word_model2.wv.vectors)


In [9]:
word_model2.wv["I", "hey"].shape

  word_model2.wv["I", "hey"].shape


(2, 300)

In [12]:
len(word_model2.wv.vectors)

  len(word_model2.wv.vectors)


3000000

In [138]:
embedding

Embedding(3000000, 300)

## Train tagger

In [1]:
import torch.optim as optim
import torch
import torch.nn.functional as F

from batchify import *
from create_vocab import *
from data_handling import *
from parser import *
from projectivize import *
from uas import *
from window_models import *
from taggers import *



# If GPU available
if torch.cuda.is_available():    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

    
    
train_data = Dataset('data/en_gum-ud-train-projectivized.conllu')
dev_data = Dataset('data/en_gum-ud-dev-projectivized.conllu')
test_data = Dataset('data/en_gum-ud-test-projectivized.conllu')

    
def train_fixed_window(train_data, n_epochs=2, batch_size=1, lr=1e-2):
    vocab_words, vocab_tags = make_vocabs(train_data)
    tagger = FixedWindowTagger(vocab_words, vocab_tags, len(vocab_tags))
    tagger.model.to(device)
    
    optimizer = optim.Adam(tagger.model.parameters(), lr=lr)
    for i in range(n_epochs):
        total_loss = 0
        batch_nr = 0
        for x, y in training_examples_tagger2(vocab_words, vocab_tags, train_data, tagger):
            x = x.to(device)
            y = y.to(device)
            batch_nr += 1
            
            optimizer.zero_grad()
            # shape + (batch, seq_len, classes)
            y_pred = tagger.model.forward(x)
            # Cross entropy wants (batch, classes, seq_len)
            y_pred = y_pred.reshape((y_pred.shape[0], y_pred.shape[2], y_pred.shape[1]))
            if(i==1237):
                print(".....")
                print(torch.argmax(y_pred, dim=1))
                print(y)
            loss = F.cross_entropy(y_pred, y, ignore_index=0)
            loss.backward()
            total_loss += loss.item()
            optimizer.step()
            #if batch_nr % 500 == 1:
                #print(total_loss/batch_nr)
                #pass
        print(f"Epoch {i}, loss: {total_loss/batch_nr:.4f}, val_acc: {accuracy_sentences(tagger, dev_data):.4f}")
    return tagger


There are 1 GPU(s) available.
We will use the GPU: GeForce GTX 1060 3GB


In [2]:
tagger = train_fixed_window(train_data, n_epochs=10)

torch.Size([1, 2])
torch.Size([2])
torch.Size([1, 33])
torch.Size([33])
torch.Size([1, 36])
torch.Size([36])
torch.Size([1, 14])
torch.Size([14])
torch.Size([1, 24])
torch.Size([24])
torch.Size([1, 33])
torch.Size([33])
torch.Size([1, 50])
torch.Size([50])
torch.Size([1, 78])
torch.Size([78])
torch.Size([1, 29])
torch.Size([29])
torch.Size([1, 3])
torch.Size([3])
torch.Size([1, 54])
torch.Size([54])
torch.Size([1, 34])
torch.Size([34])
torch.Size([1, 76])
torch.Size([76])
torch.Size([1, 77])
torch.Size([77])
torch.Size([1, 47])
torch.Size([47])
torch.Size([1, 46])
torch.Size([46])
torch.Size([1, 17])
torch.Size([17])
torch.Size([1, 34])
torch.Size([34])
torch.Size([1, 67])
torch.Size([67])
torch.Size([1, 36])
torch.Size([36])
torch.Size([1, 40])
torch.Size([40])
torch.Size([1, 30])
torch.Size([30])
torch.Size([1, 63])
torch.Size([63])
torch.Size([1, 50])
torch.Size([50])
torch.Size([1, 5])
torch.Size([5])
torch.Size([1, 6])
torch.Size([6])
torch.Size([1, 10])
torch.Size([10])
torch.Siz

KeyboardInterrupt: 

In [22]:
accuracy_sentences(tagger, train_data)

0.1137925430654223

In [9]:
for sentence in train_data:
    pred = tagger.predict_sentence(sentence)
    for i in range(len(sentence)):
        print(sentence[i][1], pred[i])

<root> DET
ADJ <root>
NOUN X
CCONJ PROPN
ADJ AUX
NOUN PROPN
PUNCT PUNCT
<root> PRON
NOUN <root>
ADP NUM
NOUN PRON
<root> NUM
PROPN <root>
PROPN SYM
PROPN <root>
PROPN VERB
ADP VERB
PROPN <pad>
PUNCT VERB
PROPN AUX
PROPN X
<root> NUM
PROPN <root>
PROPN DET
PROPN NUM
PROPN <pad>
PROPN VERB
PUNCT VERB
PROPN AUX
PROPN X
<root> PART
PROPN <root>
PROPN PART
PROPN SCONJ
PROPN CCONJ
PROPN VERB
PUNCT DET
PROPN ADV
PROPN X
<root> INTJ
PROPN <root>
PROPN PRON
PROPN VERB
PROPN CCONJ
PROPN VERB
PUNCT SCONJ
PROPN <pad>
PROPN X
<root> ADJ
ADV <root>
AUX NUM
NOUN ADP
VERB NUM
ADP PRON
CCONJ PUNCT
VERB SCONJ
NOUN X
PUNCT X
<root> X
DET <root>
NOUN ADV
ADP ADV
ADJ AUX
NOUN ADV
AUX PROPN
PRON CCONJ
VERB ADP
ADP ADP
PUNCT PROPN
<root> NUM
AUX <root>
NOUN CCONJ
NOUN SYM
VERB <root>
DET SYM
NOUN <pad>
ADP <root>
SCONJ ADV
NOUN VERB
VERB PROPN
ADP PUNCT
NOUN SCONJ
PUNCT X
<root> INTJ
DET <root>
NOUN VERB
NOUN PUNCT
ADP ADJ
NOUN X
AUX VERB
DET <root>
ADJ X
NUM X
PUNCT PROPN
VERB ADJ
NOUN SYM
ADP SCONJ
NOUN AD

KeyboardInterrupt: 

In [None]:
accuracy(tagger, dev_data)

In [7]:
accuracy(tagger, test_data)

NameError: name 'tagger' is not defined

In [13]:
accuracy(tagger, train_data)

0.9864651529925245