## Code for replicating gao et al research on VUA Sequence Model

In [1]:
# mount drive
from google.colab import drive
ROOT = '/content/drive'
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# add repo directory to path
import os
import sys
from os.path import join 
repo_dir = '/content/drive/MyDrive/Repos/metaphor-detection'
if repo_dir not in sys.path:
    sys.path.append(repo_dir)
print(sys.path)

['/content', '/env/python', '/usr/lib/python37.zip', '/usr/lib/python3.7', '/usr/lib/python3.7/lib-dynload', '', '/usr/local/lib/python3.7/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.7/dist-packages/IPython/extensions', '/root/.ipython', '/content/drive/MyDrive/Repos/metaphor-detection']


In [3]:
# directories
# to download glove and elmo vectors see: notebooks/Download_large_data.ipynb
data_dir = repo_dir + '/resources/metaphor-in-context/data/'
glove_dir = repo_dir + '/resources/glove/'
elmo_dir = repo_dir + '/resources/elmo/'

In [4]:

%pwd

'/content'

Gao code

In [5]:
# pip install requirements (takes a while)
!cd drive/MyDrive/Repos/metaphor-detection/; pip install -r gao-g-requirements.txt
!pip install --upgrade google-cloud-storage



In [6]:
!pip install Ipython --upgrade



In [7]:

#!pip install Ipython --upgrade

%load_ext autoreload
%autoreload 2

In [8]:
from core.gao_files.sequence.util import get_num_lines, get_pos2idx_idx2pos, index_sequence, get_vocab, embed_indexed_sequence, \
    get_word2idx_idx2word, get_embedding_matrix, write_predictions, get_performance_VUAverb_val, \
    get_performance_VUAverb_test, get_performance_VUA_test
from core.gao_files.sequence.util import TextDatasetWithGloveElmoSuffix as TextDataset
from core.gao_files.sequence.util import evaluate
from core.gao_files.sequence.model import RNNSequenceModel

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader

import csv
import h5py
import numpy as np
import matplotlib.pyplot as plt
import math
import random
import ast

In [9]:
print("PyTorch version:")
print(torch.__version__)
print("GPU Detected:")
print(torch.cuda.is_available())
using_GPU = torch.cuda.is_available()

PyTorch version:
1.10.0+cu111
GPU Detected:
True


In [10]:
"""
1. Data pre-processing
"""
'''
1.1 VUA
get raw dataset as a list:
  Each element is a triple:
    a sentence: string
    a list of labels: 
    a list of pos: 
'''
pos_set = set()
raw_train_vua = []
with open(data_dir + 'VUAsequence/VUA_seq_formatted_train.csv', encoding='latin-1') as f:
    lines = csv.reader(f)
    next(lines)
    for line in lines:
        pos_seq = ast.literal_eval(line[4])
        label_seq = ast.literal_eval(line[3])
        assert (len(pos_seq) == len(label_seq))
        assert (len(line[2].split()) == len(pos_seq))
        raw_train_vua.append([line[2], label_seq, pos_seq])
        pos_set.update(pos_seq)

raw_val_vua = []
with open(data_dir + 'VUAsequence/VUA_seq_formatted_val.csv', encoding='latin-1') as f:
    lines = csv.reader(f)
    next(lines)
    for line in lines:
        pos_seq = ast.literal_eval(line[4])
        label_seq = ast.literal_eval(line[3])
        assert (len(pos_seq) == len(label_seq))
        assert (len(line[2].split()) == len(pos_seq))
        raw_val_vua.append([line[2], label_seq, pos_seq])
        pos_set.update(pos_seq)

# embed the pos tags
pos2idx, idx2pos = get_pos2idx_idx2pos(pos_set)

for i in range(len(raw_train_vua)):
    raw_train_vua[i][2] = index_sequence(pos2idx, raw_train_vua[i][2])
for i in range(len(raw_val_vua)):
    raw_val_vua[i][2] = index_sequence(pos2idx, raw_val_vua[i][2])
print('size of training set, validation set: ', len(raw_train_vua), len(raw_val_vua))

size of training set, validation set:  6323 1550


In [11]:
raw_train_vua[:4]

[["Ca n't fail to be entertaining .",
  [0, 0, 0, 0, 0, 0, 0],
  [11, 8, 11, 7, 11, 2, 9]],
 ['How much was he going to tell her ?',
  [0, 0, 0, 0, 0, 0, 0, 0, 0],
  [8, 2, 11, 3, 11, 7, 11, 3, 9]],
 ['Up until that news hit the Committee , Don had won the day with his UK Vehicle Division proposals .',
  [0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
  [14, 14, 6, 0, 11, 6, 13, 9, 13, 11, 11, 6, 0, 14, 2, 13, 13, 13, 0, 9]],
 ["Could go on to the rugby and go with them could n't he ?",
  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  [11, 11, 7, 14, 6, 0, 1, 11, 14, 3, 11, 8, 3, 9]]]

In [12]:
"""
2. Data preparation
"""
'''
2. 1
get vocabulary and glove embeddings in raw dataset 
'''
# vocab is a set of words
vocab = get_vocab(raw_train_vua)
# two dictionaries. <PAD>: 0, <UNK>: 1
word2idx, idx2word = get_word2idx_idx2word(vocab)
# glove_embeddings a nn.Embeddings
glove_embeddings = get_embedding_matrix(glove_dir + 'glove.840B.300d.txt',word2idx, idx2word, normalization=False)
# elmo_embeddings
elmos_train_vua = h5py.File(elmo_dir + 'VUA_train.hdf5', 'r')
elmos_val_vua = h5py.File(elmo_dir + 'VUA_val.hdf5', 'r')
# no suffix embeddings for sequence labeling
suffix_embeddings = None

vocab size:  13843


100%|██████████| 2196017/2196017 [01:00<00:00, 36489.11it/s]


Number of pre-trained word vectors loaded:  13404
Embeddings mean:  0.0005707233212888241
Embeddings stdev:  0.3729434907436371


In [13]:
print(len(vocab))
glove_embeddings.weight.shape
# 300d embeddings for the 13843 words in the vocab

13843


torch.Size([13845, 300])

In [14]:
'''
2. 2
embed the datasets
'''
# raw_train_vua: sentence, label_seq, pos_seq
# embedded_train_vua: embedded_sentence, pos, labels
embedded_train_vua = [[embed_indexed_sequence(example[0], example[2], word2idx,
                                      glove_embeddings, elmos_train_vua, suffix_embeddings),
                       example[2], example[1]]
                      for example in raw_train_vua]
embedded_val_vua = [[embed_indexed_sequence(example[0], example[2], word2idx,
                                    glove_embeddings, elmos_val_vua, suffix_embeddings),
                     example[2], example[1]]
                    for example in raw_val_vua]

In [15]:
# embedded_train_vua is a list of lists -- one list per sentence
# each sentence list contains 
#     an array of embeddings (seq_length x embedding_dim)
#     list of pos tag ids
#     list of labels
print(len(embedded_train_vua))
print(len(embedded_train_vua[0]))
print(embedded_train_vua[0][0].shape)
print(embedded_train_vua[0][1])
print(embedded_train_vua[0][2])

6323
3
(7, 1324)
[11, 8, 11, 7, 11, 2, 9]
[0, 0, 0, 0, 0, 0, 0]


In [16]:
'''
2. 3
set up Dataloader for batching
'''
# Separate the input (embedded_sequence) and labels in the indexed train sets.
# embedded_train_vua: embedded_sentence, pos, labels
train_dataset_vua = TextDataset([example[0] for example in embedded_train_vua],
                                [example[1] for example in embedded_train_vua],
                                [example[2] for example in embedded_train_vua])
val_dataset_vua = TextDataset([example[0] for example in embedded_val_vua],
                              [example[1] for example in embedded_val_vua],
                              [example[2] for example in embedded_val_vua])

# Data-related hyperparameters
batch_size = 64
# Set up a DataLoader for the training, validation, and test dataset
train_dataloader_vua = DataLoader(dataset=train_dataset_vua, batch_size=batch_size, shuffle=True,
                              collate_fn=TextDataset.collate_fn)
val_dataloader_vua = DataLoader(dataset=val_dataset_vua, batch_size=batch_size,
                            collate_fn=TextDataset.collate_fn)

In [17]:
"""
3. Model training
"""
'''
3. 1 
set up model, loss criterion, optimizer
'''
# Instantiate the model
# embedding_dim = glove + elmo + suffix indicator
# dropout1: dropout on input to RNN
# dropout2: dropout in RNN; would be used if num_layers!=1
# dropout3: dropout on hidden state of RNN to linear layer
RNNseq_model = RNNSequenceModel(num_classes=2, embedding_dim=300 + 1024, hidden_size=300, num_layers=1, bidir=True,
                                dropout1=0.5, dropout2=0, dropout3=0.1)
# Move the model to the GPU if available
if using_GPU:
    RNNseq_model = RNNseq_model.cuda()
# Set up criterion for calculating loss
loss_criterion = nn.NLLLoss()
# Set up an optimizer for updating the parameters of the rnn_clf
rnn_optimizer = optim.Adam(RNNseq_model.parameters(), lr=0.005)
# Number of epochs (passes through the dataset) to train the model for.
num_epochs = 10

In [18]:
'''
3. 2
train model
'''
train_loss = []
val_loss = []
performance_matrix = None
val_f1s = []
train_f1s = []
# A counter for the number of gradient updates
num_iter = 0
comparable = []
for epoch in range(num_epochs):
    print("Starting epoch {}".format(epoch + 1))
    for (__, example_text, example_lengths, labels) in train_dataloader_vua:
        example_text = Variable(example_text)
        example_lengths = Variable(example_lengths)
        labels = Variable(labels)
        if using_GPU:
            example_text = example_text.cuda()
            example_lengths = example_lengths.cuda()
            labels = labels.cuda()
        # predicted shape: (batch_size, seq_len, 2)
        predicted = RNNseq_model(example_text, example_lengths)
        batch_loss = loss_criterion(predicted.view(-1, 2), labels.view(-1))
        rnn_optimizer.zero_grad()
        batch_loss.backward()
        rnn_optimizer.step()
        num_iter += 1
        # Calculate validation and training set loss and accuracy every 200 gradient updates
        if num_iter % 200 == 0:
            avg_eval_loss, performance_matrix = evaluate(idx2pos, val_dataloader_vua, RNNseq_model,
                                                         loss_criterion, using_GPU)
            val_loss.append(avg_eval_loss)
            val_f1s.append(performance_matrix[:, 2])
            print("Iteration {}. Validation Loss {}.".format(num_iter, avg_eval_loss))
#             avg_eval_loss, performance_matrix = evaluate(idx2pos, train_dataloader_vua, RNNseq_model,
#                                                          loss_criterion, using_GPU)
#             train_loss.append(avg_eval_loss)
#             train_f1s.append(performance_matrix[:, 2])
#             print("Iteration {}. Training Loss {}.".format(num_iter, avg_eval_loss))

Starting epoch 1
Starting epoch 2
Starting epoch 3


  eval_text = Variable(eval_text, volatile=True)
  eval_lengths = Variable(eval_lengths, volatile=True)
  eval_labels = Variable(eval_labels, volatile=True)


------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  NOUN 78.125 40.49676025917927 53.342816500711244 90.43592360402391
PRFA performance for  CCONJ nan nan nan 100.0
PRFA performance for  ADJ 70.39473684210526 31.1046511627907 43.14516129032258 91.22315592903828
PRFA performance for  PRON nan 0.0 nan 99.68609865470852
PRFA performance for  X nan nan nan 100.0
PRFA performance for  INTJ nan 0.0 nan 98.74213836477988
PRFA performance for  DET 84.34163701067615 94.8 89.26553672316382 98.39028523016097
PRFA performance for  PART 67.6470588235294 47.422680412371136 55.75757575757576 93.57959542656113
PRFA performance for  ADV 69.56521739130434 31.16883116883117 43.04932735426009 94.302377747869
PRFA performance for  PUNCT 100.0 60.0 75.0 99.94811932555123
PRFA performance for  SYM nan nan nan 100.0
PRFA performance for  VERB 74.97435897435898 54.55223880597015 63.15334773218142 88.07326621923937
PRFA performance for  NUM nan 0.0 nan 99.77272727272727
PRF

  precision = 100 * grid[1, 1] / np.sum(grid[1])
  recall = 100 * grid[1, 1] / np.sum(grid[:, 1])


Starting epoch 4
Starting epoch 5
------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  NOUN 70.44198895027624 55.0755939524838 61.81818181818181 90.81498760752297
PRFA performance for  CCONJ nan nan nan 100.0
PRFA performance for  ADJ 52.38095238095238 60.75581395348837 56.258411843876175 89.8848428260193
PRFA performance for  PRON 0.0 0.0 nan 99.64125560538116
PRFA performance for  X nan nan nan 100.0
PRFA performance for  INTJ 100.0 50.0 66.66666666666667 99.37106918238993
PRFA performance for  DET 89.1891891891892 92.4 90.76620825147349 98.67269133013274
PRFA performance for  PART 61.94690265486726 72.16494845360825 66.66666666666669 93.84344766930519
PRFA performance for  ADV 64.46280991735537 50.64935064935065 56.72727272727273 94.66128308658591
PRFA performance for  PUNCT 100.0 80.0 88.88888888888889 99.97405966277562
PRFA performance for  SYM nan nan nan 100.0
PRFA performance for  VERB 66.5017667844523 70.22388059701493 68.31215970961888 87.

  f1 = 2 * precision * recall / (precision + recall)


Starting epoch 6
Starting epoch 7
------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  NOUN 72.91941875825627 59.611231101511876 65.59714795008912 91.55853622977111
PRFA performance for  CCONJ nan nan nan 100.0
PRFA performance for  ADJ 57.098765432098766 53.77906976744186 55.38922155688623 90.72517896047307
PRFA performance for  PRON 0.0 0.0 nan 99.64125560538116
PRFA performance for  X nan nan nan 100.0
PRFA performance for  INTJ 100.0 50.0 66.66666666666667 99.37106918238993
PRFA performance for  DET 87.25099601593625 87.6 87.42514970059881 98.22084157017791
PRFA performance for  PART 67.0 69.0721649484536 68.02030456852792 94.45910290237467
PRFA performance for  ADV 62.280701754385966 46.103896103896105 52.985074626865675 94.34724091520862
PRFA performance for  PUNCT 100.0 80.0 88.88888888888889 99.97405966277562
PRFA performance for  SYM nan nan nan 100.0
PRFA performance for  VERB 67.38078783690393 72.76119402985074 69.96770721205597 88.296979

In [19]:
print(val_loss)
RNNseq_model

[tensor(0.1753, device='cuda:0'), tensor(0.1099, device='cuda:0'), tensor(0.0887, device='cuda:0'), tensor(0.0846, device='cuda:0')]


RNNSequenceModel(
  (rnn): LSTM(1324, 300, batch_first=True, bidirectional=True)
  (output_projection): Linear(in_features=600, out_features=2, bias=True)
  (dropout_on_input_to_LSTM): Dropout(p=0.5, inplace=False)
  (dropout_on_input_to_linear_layer): Dropout(p=0.1, inplace=False)
)

In [20]:
"""
for additional training
"""
rnn_optimizer = optim.Adam(RNNseq_model.parameters(), lr=0.0001)
for epoch in range(10):
    print("Starting epoch {}".format(epoch + 1))
    for (__, example_text, example_lengths, labels) in train_dataloader_vua:
        example_text = Variable(example_text)
        example_lengths = Variable(example_lengths)
        labels = Variable(labels)
        if using_GPU:
            example_text = example_text.cuda()
            example_lengths = example_lengths.cuda()
            labels = labels.cuda()
        # predicted shape: (batch_size, seq_len, 2)
        predicted = RNNseq_model(example_text, example_lengths)
        batch_loss = loss_criterion(predicted.view(-1, 2), labels.view(-1))
        rnn_optimizer.zero_grad()
        batch_loss.backward()
        rnn_optimizer.step()
        num_iter += 1
        # Calculate validation and training set loss and accuracy every 200 gradient updates
        if num_iter % 200 == 0:
            avg_eval_loss, performance_matrix = evaluate(idx2pos, val_dataloader_vua, RNNseq_model,
                                                         loss_criterion, using_GPU)
            val_loss.append(avg_eval_loss)
            val_f1s.append(performance_matrix[:, 2])
            print("Iteration {}. Validation Loss {}.".format(num_iter, avg_eval_loss))

#             avg_eval_loss, performance_matrix = evaluate(idx2pos, train_dataloader_vua, RNNseq_model,
#                                                          loss_criterion, using_GPU)
#             train_loss.append(avg_eval_loss)
#             train_f1s.append(performance_matrix[:, 2])
#             print("Iteration {}. Training Loss {}.".format(num_iter, avg_eval_loss))
#             comparable.append(get_performance())

print("Training done!")

Starting epoch 1
------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  NOUN 75.13089005235602 61.98704103671706 67.92899408284025 92.09797346551976
PRFA performance for  CCONJ nan nan nan 100.0
PRFA performance for  ADJ 58.88157894736842 52.03488372093023 55.24691358024691 90.97416744475568
PRFA performance for  PRON 0.0 0.0 nan 99.64125560538116
PRFA performance for  X nan nan nan 100.0
PRFA performance for  INTJ 100.0 50.0 66.66666666666667 99.37106918238993
PRFA performance for  DET 87.74703557312253 88.8 88.27037773359842 98.33380401016662
PRFA performance for  PART 62.745098039215684 65.97938144329896 64.32160804020099 93.75549692172383
PRFA performance for  ADV 66.39344262295081 52.5974025974026 58.69565217391305 94.88559892328398
PRFA performance for  PUNCT 100.0 80.0 88.88888888888889 99.97405966277562
PRFA performance for  SYM nan nan nan 100.0
PRFA performance for  VERB 66.76156583629893 70.0 68.3424408014572 87.84955257270694
PRFA performa

In [21]:
"""
test on genres by POS tags
"""
print("**********************************************************")
print("Evalutation on test set: ")

raw_test_vua = []
with open(data_dir + 'VUAsequence/VUA_seq_formatted_test.csv', encoding='latin-1') as f:
    lines = csv.reader(f)
    next(lines)
    for line in lines:
        # txt_id	sen_ix	sentence	label_seq	pos_seq	labeled_sentence	genre
        pos_seq = ast.literal_eval(line[4])
        label_seq = ast.literal_eval(line[3])
        assert(len(pos_seq) == len(label_seq))
        assert(len(line[2].split()) == len(pos_seq))
        raw_test_vua.append([line[2], label_seq, pos_seq])
print('number of examples(sentences) for test_set ', len(raw_test_vua))

for i in range(len(raw_test_vua)):
    raw_test_vua[i][2] = index_sequence(pos2idx, raw_test_vua[i][2])

elmos_test_vua = h5py.File(elmo_dir + 'VUA_test.hdf5', 'r')
# raw_train_vua: sentence, label_seq, pos_seq
# embedded_train_vua: embedded_sentence, pos, labels
embedded_test_vua = [[embed_indexed_sequence(example[0], example[2], word2idx,
                                      glove_embeddings, elmos_test_vua, suffix_embeddings),
                       example[2], example[1]]
                      for example in raw_test_vua]

# Separate the input (embedded_sequence) and labels in the indexed train sets.
# embedded_train_vua: embedded_sentence, pos, labels
test_dataset_vua = TextDataset([example[0] for example in embedded_test_vua],
                              [example[1] for example in embedded_test_vua],
                              [example[2] for example in embedded_test_vua])

# Set up a DataLoader for the test dataset
test_dataloader_vua = DataLoader(dataset=test_dataset_vua, batch_size=batch_size,
                              collate_fn=TextDataset.collate_fn)

print("Tagging model performance on VUA test set by POS tags: regardless of genres")
avg_eval_loss, performance_matrix = evaluate(idx2pos, test_dataloader_vua, RNNseq_model, loss_criterion, using_GPU)

**********************************************************
Evalutation on test set: 
number of examples(sentences) for test_set  2694
Tagging model performance on VUA test set by POS tags: regardless of genres
------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  NOUN 68.15589353612167 55.53834237025561 61.20358514724711 89.41546343735445
PRFA performance for  CCONJ nan nan nan 100.0
PRFA performance for  ADJ 65.82278481012658 57.67097966728281 61.477832512315274 90.13871374527112
PRFA performance for  PRON nan 0.0 nan 99.84829329962074
PRFA performance for  X nan nan nan 100.0
PRFA performance for  INTJ nan 0.0 nan 99.49748743718592
PRFA performance for  DET 90.04629629629629 92.8400954653938 91.42185663924795 98.22729480330257
PRFA performance for  PART 57.57575757575758 63.758389261744966 60.509554140127385 91.52426520847574
PRFA performance for  ADV 76.16580310880829 60.24590163934426 67.27688787185355 95.78544061302682
PRFA performance for  PUNC

In [22]:
avg_eval_loss, performance_matrix = evaluate(idx2pos, test_dataloader_vua, RNNseq_model, loss_criterion, using_GPU)

------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  NOUN 68.15589353612167 55.53834237025561 61.20358514724711 89.41546343735445
PRFA performance for  CCONJ nan nan nan 100.0
PRFA performance for  ADJ 65.82278481012658 57.67097966728281 61.477832512315274 90.13871374527112
PRFA performance for  PRON nan 0.0 nan 99.84829329962074
PRFA performance for  X nan nan nan 100.0
PRFA performance for  INTJ nan 0.0 nan 99.49748743718592
PRFA performance for  DET 90.04629629629629 92.8400954653938 91.42185663924795 98.22729480330257
PRFA performance for  PART 57.57575757575758 63.758389261744966 60.509554140127385 91.52426520847574
PRFA performance for  ADV 76.16580310880829 60.24590163934426 67.27688787185355 95.78544061302682
PRFA performance for  PUNCT nan 0.0 nan 99.94099134539732
PRFA performance for  SYM nan nan nan 100.0
PRFA performance for  VERB 68.0083638264506 68.94541600423953 68.47368421052632 87.8646677471637
PRFA performance for  NUM nan nan nan

In [23]:
def get_batch_predictions(predictions, pos_seqs):
    """

    :param predictions: a numpy array of shape (batch_size, max_seq_len)
    :param pos_seqs: a list of variable-length indexed pos sequence
    :return: a list of variable-length predictions. each inner list is prediction for a sentence
    """
    pred_lst = []
    for i in range(len(pos_seqs)):  # each example i.e. each row
        indexed_pos_sequence = pos_seqs[i]
        prediction_padded = predictions[i]
        cur_pred_lst = []
        for j in range(len(indexed_pos_sequence)):  # inside each example: up to sentence length
            cur_pred_lst.append(prediction_padded[j])
        pred_lst.append(cur_pred_lst)
    return pred_lst



  def get_predictions(raw_dataset, evaluation_dataloader, model, using_GPU, rawdata_filename):
    """
    Evaluate the model on the given evaluation_dataloader

    :param raw_dataset
    :param evaluation_dataloader:
    :param model:
    :param using_GPU: a boolean
    :return: a list of
    """
    # Set model to eval mode, which turns off dropout.
    model.eval()

    predictions = []
    for (eval_pos_seqs, eval_text, eval_lengths, eval_labels) in evaluation_dataloader:
        eval_text = Variable(eval_text, volatile=True)
        eval_lengths = Variable(eval_lengths, volatile=True)
        eval_labels = Variable(eval_labels, volatile=True)
        if using_GPU:
            eval_text = eval_text.cuda()
            eval_lengths = eval_lengths.cuda()
            eval_labels = eval_labels.cuda()

        # predicted shape: (batch_size, seq_len, 2)
        predicted = model(eval_text, eval_lengths)
        # get 0 or 1 predictions
        # predicted_labels: (batch_size, seq_len)
        _, predicted_labels = torch.max(predicted.data, 2)
        predictions.extend(get_batch_predictions(predicted_labels, eval_pos_seqs))

    # Set the model back to train mode, which activates dropout again.
    model.train()
    assert (len(predictions) == len(raw_dataset))

    # read original data
    data = []
    with open(rawdata_filename, encoding='latin-1') as f:
        lines = csv.reader(f)
        for line in lines:
            data.append(line)

    # append predictions to the original data
    data[0].append('prediction')
    for i in range(len(predictions)):
        data[i + 1].append(predictions[i])
    return data

/content/drive/MyDrive/Repos/metaphor-detection


  eval_text = Variable(eval_text, volatile=True)
  eval_lengths = Variable(eval_lengths, volatile=True)
  eval_labels = Variable(eval_labels, volatile=True)


ValueError: ignored

In [None]:
# """
# write the test prediction on the VUA-verb to a file: sequence prediction
# read and extract to get a comparabel performance on VUA-verb test set.
# """
# def get_comparable_performance_test():
#     result = write_predictions(raw_test_vua, test_dataloader_vua, RNNseq_model, using_GPU, '../data/VUAsequence/VUA_seq_formatted_test.csv')
#     f = open('../predictions/vua_seq_test_predictions_LSTMsequence_vua.csv', 'w')
#     writer = csv.writer(f)
#     writer.writerows(result)
#     f.close()

#     get_performance_VUAverb_test()
#     get_performance_VUA_test()

# get_comparable_performance_test()

In [32]:
seq_test_pred = write_predictions(raw_test_vua, test_dataloader_vua, RNNseq_model, using_GPU, data_dir + 'VUAsequence/VUA_seq_formatted_test.csv')

  eval_text = Variable(eval_text, volatile=True)
  eval_lengths = Variable(eval_lengths, volatile=True)
  eval_labels = Variable(eval_labels, volatile=True)


In [33]:
seq_test_pred[0:2]

[['txt_id',
  'sen_ix',
  'sentence',
  'label_seq',
  'pos_seq',
  'labeled_sentence',
  'genre',
  'prediction'],
 ['a3m-fragment02',
  '45',
  'Design : Crossed lines over the toytown tram : City transport could soon be back on the right track , says Jonathan Glancey',
  '[0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0]',
  "['NOUN', 'PUNCT', 'ADJ', 'NOUN', 'ADP', 'DET', 'ADJ', 'NOUN', 'PUNCT', 'NOUN', 'NOUN', 'VERB', 'ADV', 'VERB', 'ADV', 'ADP', 'DET', 'ADJ', 'NOUN', 'PUNCT', 'VERB', 'PROPN', 'PROPN']",
  'Design : M_Crossed M_lines M_over the toytown tram : City transport could soon be M_back M_on the right M_track , says Jonathan Glancey',
  'news',
  [tensor(1, device='cuda:0'),
   tensor(0, device='cuda:0'),
   tensor(1, device='cuda:0'),
   tensor(1, device='cuda:0'),
   tensor(0, device='cuda:0'),
   tensor(0, device='cuda:0'),
   tensor(0, device='cuda:0'),
   tensor(0, device='cuda:0'),
   tensor(0, device='cuda:0'),
   tensor(0, device='cuda:0'),
   te

In [44]:
get_performance_VUAverb_test(data_dir,seq_test_pred)

Tagging model performance on test-verb: genre
news Precision, Recall, F1, Accuracy:  72.23230490018149 71.19856887298748 71.7117117117117 74.42996742671009
fiction Precision, Recall, F1, Accuracy:  56.46687697160883 65.56776556776556 60.677966101694906 83.24909747292419
academic Precision, Recall, F1, Accuracy:  75.0 75.23510971786834 75.11737089201878 74.74185861795075
conversation Precision, Recall, F1, Accuracy:  56.15384615384615 50.171821305841924 52.994555353901994 87.05647176411794
Tagging model performance on test-verb: regardless of genre
Precision, Recall, F1, Accuracy:  68.04298642533936 68.31345826235093 68.17795409464436 80.878596969181


array([64.96325701, 65.54331637, 65.12540101, 79.86934882])

In [42]:
seq_test_pred[2][7]

[tensor(0, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(1, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(1, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(0, device='cuda:0'),
 tensor(1, device='cuda:0'),
 tensor(0, device='cuda:0')]

In [None]:
get_performance_VUA_test(data_dir, seq_test_pred)


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/lib/python3.7/bdb.py", line 332, in set_trace
    sys.settrace(self.trace_dispatch)



> /content/drive/MyDrive/Repos/metaphor-detection/core/gao_files/sequence/util.py(508)get_performance_VUA_test()
-> pred = pred_sequence[i]
array([[[0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.]]])
(4, 2, 2)
*** NameError: name 'genre_idx' is not defined
0
'news'
['news', 'fiction', 'academic', 'conversation']
array([[0., 0.],
       [0., 0.]])
[tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0'), tensor(0, device='cuda:0'), tensor(1, device='cuda:0'), tensor(0, device='cuda:0')


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/lib/python3.7/bdb.py", line 357, in set_quit
    sys.settrace(None)



BdbQuit: ignored