In [1]:
from util import *
import torch
from allennlp.commands.elmo import ElmoEmbedder
from model import *

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.


In [2]:
# parse the data
wsd_data, train_data, test_data, dev_data = parse_data()

# return the raw sentences from the EUD for train, test, and dev
# test the first 20 sentences
train_sentences, train_word_sense, train_word_index, test_sentences, test_word_sense, test_word_index, dev_sentences, dev_word_sense, dev_word_index = get_raw_sentences(wsd_data, train_data, test_data, dev_data, 20)


Parsed 439312 word sense data from White et. al., 2016.
Parsed 12543 training data from UD_English-EWT/en_ewt-ud-train.conllu.
Parsed 2077 testing data from UD_English-EWT/en_ewt-ud-test.conllu.
Parsed 2002 dev data from UD_English-EWT/en_ewt-ud-dev.conllu.
Processed 20 sentences for test purpose.

******************* Data Example ***********************
Sentence: ['on', 'August', '9', ',', '2004', ',', 'it', 'be', 'announce', 'that', 'in', 'the', 'spring', 'of', '2001', ',', 'a', 'man', 'name', 'El', '-', 'Shukrijumah', ',', 'also', 'know', 'as', 'Jafar', 'the', 'Pilot', ',', 'who', 'be', 'part', 'of', 'a', '"', 'second', 'wave', ',', '"', 'have', 'be', 'case', 'New', 'York', 'City', 'helicopter', '.']
Target Word Index: 12
Target Word Sense (index in WordNet 3.1): spring.n.01
********************************************************


In [3]:
# ELMo setup
# ELMo is tuned to lower dimension (256) by MLP in Model
elmo = ElmoEmbedder()
model = Model(elmo_class = elmo)

# print the model 
print_whole_model(model)

# MLP illustration
print_fine_tuning_MLP(model, 'WSD')

# forward propagation
# ELMo (1024) -> dimension reduction (256) -> bi-LSTM (512) -> fine-tuning MLP (10)
model.forward(train_sentences, train_word_index)


All parameters in the model:
layers.WSD.0.weight torch.Size([300, 512])
layers.WSD.0.bias torch.Size([300])
layers.WSD.2.weight torch.Size([10, 300])
layers.WSD.2.bias torch.Size([10])
dimension_reduction_MLP.weight torch.Size([256, 3072])
dimension_reduction_MLP.bias torch.Size([256])
wsd_lstm.weight_ih_l0 torch.Size([1024, 256])
wsd_lstm.weight_hh_l0 torch.Size([1024, 256])
wsd_lstm.bias_ih_l0 torch.Size([1024])
wsd_lstm.bias_hh_l0 torch.Size([1024])
wsd_lstm.weight_ih_l0_reverse torch.Size([1024, 256])
wsd_lstm.weight_hh_l0_reverse torch.Size([1024, 256])
wsd_lstm.bias_ih_l0_reverse torch.Size([1024])
wsd_lstm.bias_hh_l0_reverse torch.Size([1024])
wsd_lstm.weight_ih_l1 torch.Size([1024, 512])
wsd_lstm.weight_hh_l1 torch.Size([1024, 256])
wsd_lstm.bias_ih_l1 torch.Size([1024])
wsd_lstm.bias_hh_l1 torch.Size([1024])
wsd_lstm.weight_ih_l1_reverse torch.Size([1024, 512])
wsd_lstm.weight_hh_l1_reverse torch.Size([1024, 256])
wsd_lstm.bias_ih_l1_reverse torch.Size([1024])
wsd_lstm.bias_h

[tensor([0.1036, 0.1022, 0.1002, 0.0972, 0.0987, 0.0980, 0.1058, 0.1029, 0.0956,
         0.0958], grad_fn=<SoftmaxBackward>),
 tensor([0.1036, 0.1022, 0.1002, 0.0972, 0.0987, 0.0980, 0.1058, 0.1029, 0.0956,
         0.0958], grad_fn=<SoftmaxBackward>),
 tensor([0.1036, 0.1022, 0.1002, 0.0972, 0.0987, 0.0980, 0.1058, 0.1029, 0.0956,
         0.0958], grad_fn=<SoftmaxBackward>),
 tensor([0.1036, 0.1022, 0.1002, 0.0972, 0.0987, 0.0980, 0.1058, 0.1029, 0.0956,
         0.0958], grad_fn=<SoftmaxBackward>),
 tensor([0.1036, 0.1022, 0.1002, 0.0972, 0.0987, 0.0980, 0.1058, 0.1029, 0.0956,
         0.0958], grad_fn=<SoftmaxBackward>),
 tensor([0.1036, 0.1022, 0.1002, 0.0972, 0.0987, 0.0980, 0.1058, 0.1029, 0.0956,
         0.0958], grad_fn=<SoftmaxBackward>),
 tensor([0.1036, 0.1022, 0.1002, 0.0972, 0.0987, 0.0980, 0.1058, 0.1029, 0.0956,
         0.0958], grad_fn=<SoftmaxBackward>),
 tensor([0.1036, 0.1022, 0.1002, 0.0972, 0.0987, 0.0980, 0.1058, 0.1029, 0.0956,
         0.0958], grad_fn=<Sof