##Code for replicating gao et al research on VUA Classifcation Model

In [None]:
# mount drive
from google.colab import drive
ROOT = '/content/drive'
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# add repo directory to path
import os
import sys
from os.path import join 
repo_dir = '/content/drive/MyDrive/Repos/metaphor-detection'
if repo_dir not in sys.path:
    sys.path.append(repo_dir)
print(sys.path)

['', '/content', '/env/python', '/usr/lib/python37.zip', '/usr/lib/python3.7', '/usr/lib/python3.7/lib-dynload', '/usr/local/lib/python3.7/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.7/dist-packages/IPython/extensions', '/root/.ipython', '/content/drive/MyDrive/Repos/metaphor-detection']


In [None]:
# pip install requirements (takes a while)
!cd drive/MyDrive/Repos/metaphor-detection/; pip install -r gao-g-requirements.txt
!pip install --upgrade google-cloud-storage

In [None]:
from core.gao_files.classification.util import get_num_lines, get_vocab, embed_sequence, get_word2idx_idx2word, get_embedding_matrix
from core.gao_files.classification.util  import TextDatasetWithGloveElmoSuffix as TextDataset
from core.gao_files.classification.util  import evaluate
from core.gao_files.classification.model import RNNSequenceClassifier
from core.gao_files.classification.vua_util  import write_predictions_vua_cls, get_performance_VUAverb_test

import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader

import csv
import h5py
# import matplotlib
# matplotlib.use('Agg')  # to avoid the error: _tkinter.TclError: no display name and no $DISPLAY environment variable
# matplotlib.use('tkagg') # to display the graph on remote server
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
print("PyTorch version:")
print(torch.__version__)
print("GPU Detected:")
print(torch.cuda.is_available())
using_GPU = torch.cuda.is_available()

PyTorch version:
1.10.0+cu111
GPU Detected:
True


In [None]:
# directories
# to download glove and elmo vectors see: notebooks/Download_large_data.ipynb
data_dir = repo_dir + '/resources/metaphor-in-context/data/'
glove_dir = repo_dir + '/resources/glove/'
elmo_dir = repo_dir + '/resources/elmo/'

### Gao code

In [None]:
"""
1. Data pre-processing
"""
'''
1.1 VUA
get raw dataset as a list:
  Each element is a triple:
    a sentence: string
    a index: int: idx of the focus verb
    a label: int 1 or 0
'''
# header for vua files:
# text_idx, sentence_idx, verb, sentence, verb_idx, label
SENT_IDX = 3
VERB_IDX = 4
LABEL_IDX = 5

raw_train_vua = []
with open(data_dir + 'VUA/VUA_formatted_train.csv', encoding='latin-1') as f:
    lines = csv.reader(f)
    next(lines)
    for line in lines:
        raw_train_vua.append([line[SENT_IDX], int(line[VERB_IDX]), int(line[LABEL_IDX])])

raw_val_vua = []
with open(data_dir+ 'VUA/VUA_formatted_val.csv', encoding='latin-1') as f:
    lines = csv.reader(f)
    next(lines)
    for line in lines:
        raw_val_vua.append([line[SENT_IDX], int(line[VERB_IDX]), int(line[LABEL_IDX])])

raw_test_vua = []
with open(data_dir+ 'VUA/VUA_formatted_test.csv', encoding='latin-1') as f:
    lines = csv.reader(f)
    next(lines)
    for line in lines:
        raw_test_vua.append([line[SENT_IDX], int(line[VERB_IDX]), int(line[LABEL_IDX])])
print('VUA dataset division: ', '\ntrain:', len(raw_train_vua), '\nval:',len(raw_val_vua), '\ntest:',len(raw_test_vua))

VUA dataset division:  
train: 15516 
val: 1724 
test: 5873


In [None]:
"""
2. Data preparation
"""
'''
2. 1
get vocabulary and glove embeddings in raw dataset 
'''
# vocab is a set of words
vocab = get_vocab(raw_train_vua + raw_val_vua + raw_test_vua)
# two dictionaries. <PAD>: 0, <UNK>: 1
word2idx, idx2word = get_word2idx_idx2word(vocab)
# glove_embeddings a nn.Embeddings
glove_embeddings = get_embedding_matrix(glove_dir + 'glove.840B.300d.txt', word2idx, idx2word, normalization=False)
# elmo_embeddings
elmos_train_vua = h5py.File(elmo_dir + 'VUA_train.hdf5', 'r')
elmos_val_vua = h5py.File(elmo_dir + 'VUA_val.hdf5', 'r')
# suffix_embeddings: number of suffix tag is 2, and the suffix embedding dimension is 50
suffix_embeddings = nn.Embedding(2, 50)

vocab size:  18695


100%|██████████| 2196017/2196017 [00:50<00:00, 43816.39it/s]


Number of pre-trained word vectors loaded:  17941
Embeddings mean:  -0.0001772342948243022
Embeddings stdev:  0.37537267804145813


In [None]:
'''
2. 2
embed the datasets
'''
# indices for raw_[train/val]_vua lists
raw_sent_idx = 0
raw_verb_idx = 1
raw_label_idx = 2

embedded_train_vua = [[embed_sequence(example[raw_sent_idx], 
                                      example[raw_verb_idx], 
                                      word2idx, glove_embeddings, elmos_train_vua, suffix_embeddings), 
                       example[raw_label_idx]]
                      for example in raw_train_vua]

embedded_val_vua = [[embed_sequence(example[raw_sent_idx],
                                    example[raw_verb_idx],
                                    word2idx, glove_embeddings, elmos_val_vua, suffix_embeddings), 
                     example[raw_label_idx]]
                    for example in raw_val_vua]

# each row in embedded_[train/val]_vua contains an embedding and a label

In [None]:
'''
2. 3
set up Dataloader for batching
'''
embedding_idx = 0
label_idx = 1
# Separate the input (embedded_sequence) and labels in the indexed train sets.
train_dataset_vua = TextDataset([example[embedding_idx] for example in embedded_train_vua],
                                [example[label_idx] for example in embedded_train_vua])

val_dataset_vua = TextDataset([example[embedding_idx] for example in embedded_val_vua],
                              [example[label_idx] for example in embedded_val_vua])

# Data-related hyperparameters
batch_size = 64
# Set up a DataLoader for the training, validation, and test dataset
train_dataloader_vua = DataLoader(dataset=train_dataset_vua, batch_size=batch_size, shuffle=True,
                                  collate_fn=TextDataset.collate_fn)

val_dataloader_vua = DataLoader(dataset=val_dataset_vua, batch_size=batch_size,
                                collate_fn=TextDataset.collate_fn)

In [None]:
"""
3. Model training
"""
'''
3. 1 
set up model, loss criterion, optimizer
'''
# Instantiate the model
# embedding_dim = glove + elmo + suffix indicator
# dropout1: dropout on input to RNN
# dropout2: dropout in RNN; would be used if num_layers=1
# dropout3: dropout on hidden state of RNN to linear layer
rnn_clf = RNNSequenceClassifier(num_classes=2, embedding_dim=300 + 1024 + 50, hidden_size=300, num_layers=1, bidir=True,
                                dropout1=0.3, dropout2=0.2, dropout3=0.2)
# Move the model to the GPU if available
if using_GPU:
    rnn_clf = rnn_clf.cuda()
# Set up criterion for calculating loss
nll_criterion = nn.NLLLoss()
# Set up an optimizer for updating the parameters of the rnn_clf
rnn_clf_optimizer = optim.SGD(rnn_clf.parameters(), lr=0.01,momentum=0.9)
# Number of epochs (passes through the dataset) to train the model for.
num_epochs = 20

  "num_layers={}".format(dropout, num_layers))


In [None]:
'''
3. 2
train model
'''
training_loss = []
val_loss = []

training_f1 = []
val_f1 = []

val_accuracy = []
val_precision = []
val_recall = []
val_fus_f1 = []

# A counter for the number of gradient updates
num_iter = 0
for epoch in range(num_epochs):
    print("Starting epoch {}".format(epoch + 1))
    for (example_text, example_lengths, labels) in train_dataloader_vua:
        example_text = Variable(example_text)
        example_lengths = Variable(example_lengths)
        labels = Variable(labels)
        if using_GPU:
            example_text = example_text.cuda()
            example_lengths = example_lengths.cuda()
            labels = labels.cuda()
        # predicted shape: (batch_size, 2)
        predicted = rnn_clf(example_text, example_lengths)
        batch_loss = nll_criterion(predicted, labels)
        rnn_clf_optimizer.zero_grad()
        batch_loss.backward()
        rnn_clf_optimizer.step()
        num_iter += 1
        # Calculate validation and training set loss and accuracy every 200 gradient updates
        if num_iter % 200 == 0:
            avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(val_dataloader_vua, rnn_clf,
                                                                                   nll_criterion, using_GPU, print_verbose=False)
            val_loss.append(avg_eval_loss)
            val_f1.append(f1)
            val_accuracy.append(eval_accuracy)
            val_precision.append(precision)
            val_recall.append(recall)
            val_fus_f1.append(fus_f1)

            # print metrics less often
            print(
                  "Iteration {}. Validation Loss {}. Validation Accuracy {}. Validation Precision {}. Validation Recall {}. Validation F1 {}. Validation class-wise F1 {}.".format(
                      num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1))
            # filename = '../models/LSTMSuffixElmoAtt_???_all_iter_' + str(num_iter) + '.pt'
            # torch.save(rnn_clf, filename)
            # avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(train_dataloader_vua, rnn_clf,
            #                                                                        nll_criterion, using_GPU)
            # training_loss.append(avg_eval_loss)
            # training_f1.append(f1)
            
#             print(
#                 "Iteration {}. Training Loss {}. Training Accuracy {}. Training Precision {}. Training Recall {}. Training F1 {}. Training class-wise F1 {}.".format(
#                     num_iter, avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1))
print("Training done!")

In [None]:
import numpy as np
print("#"*10, "average performance on val data","#"*10)
print('Precision on vua = ', np.mean(np.array(val_precision)))
print('Recall on vua = ', np.mean(np.array(val_recall)))
print('F1 on vua = ', np.mean(np.array(val_f1)))

########## average performance on val data ##########
Precision on vua =  66.77455142229971
Recall on vua =  50.47593390804598
F1 on vua =  55.050035745309685


In [None]:
"""
4. test the model
the following code is for test data of VUA
"""
'''
VUA
'''
elmos_test_vua = h5py.File(elmo_dir + 'VUA_test.hdf5', 'r')
embedded_test_vua = [[embed_sequence(example[raw_sent_idx],
                                     example[raw_verb_idx],
                                     word2idx, glove_embeddings, elmos_test_vua, suffix_embeddings), 
                        example[raw_label_idx]]
                     for example in raw_test_vua]

test_dataset_vua = TextDataset([example[embedding_idx] for example in embedded_test_vua], # embeddings
                               [example[label_idx] for example in embedded_test_vua] # labels
                               )

test_dataloader_vua = DataLoader(dataset=test_dataset_vua, batch_size=batch_size,
                                 collate_fn=TextDataset.collate_fn)

avg_eval_loss, eval_accuracy, precision, recall, f1, fus_f1 = evaluate(test_dataloader_vua, rnn_clf,
                                                                       nll_criterion, using_GPU)

  eval_text = Variable(eval_text, volatile=True)
  eval_lengths = Variable(eval_lengths, volatile=True)
  eval_labels = Variable(eval_labels, volatile=True)


[[3536.  497.]
 [ 576. 1264.]]


In [None]:
gao_scores = [53.4, 65.6, 58.9, 69.1, 53.4]
our_scores = [precision,
  recall,
  f1,
  eval_accuracy.item(),
  None]
our_scores = [round(score,1) if score else score for score in our_scores]
all_scores = [gao_scores, our_scores]
all_scores_df = pd.DataFrame(all_scores, columns= ['P', 'R', 'F1', 'Acc', 'MaF1'], index=['Gao et al', 'US'])
print("vua classification model: classification task\n")
all_scores_df

vua classification model: classification task



Unnamed: 0,P,R,F1,Acc,MaF1
Gao et al,53.4,65.6,58.9,69.1,53.4
US,68.7,71.8,70.2,81.7,


## Compute performance on test data using code adapted from Sequence Util

In [None]:
cls_test_pred = write_predictions_vua_cls(raw_test_vua, test_dataloader_vua, rnn_clf , using_GPU, data_dir + 'VUA/VUA_formatted_test.csv')

  eval_text = Variable(example_text, volatile=True)
  eval_lengths = Variable(example_lengths, volatile=True)
  eval_labels = Variable(labels, volatile=True)


[[3536.  497.]
 [ 576. 1264.]]


In [None]:
macro_avg_performance, overall_verb_performance = get_performance_VUAverb_test(data_dir, cls_test_pred)

confusion matrix by genre

[[[ 523.  126.]
  [ 146.  433.]]

 [[ 996.   97.]
  [ 116.  176.]]

 [[ 428.  131.]
  [ 193.  507.]]

 [[1589.  143.]
  [ 121.  148.]]]

confusion matrix all

[[3536.  497.]
 [ 576. 1264.]]
Tagging model performance on test-verb: genre
news Precision, Recall, F1, Accuracy:  74.78411053540587 77.4597495527728 76.09841827768014 77.85016286644951
fiction Precision, Recall, F1, Accuracy:  60.273972602739725 64.46886446886447 62.30088495575222 84.62093862815884
academic Precision, Recall, F1, Accuracy:  72.42857142857143 79.46708463949844 75.7847533632287 74.26528991262907
conversation Precision, Recall, F1, Accuracy:  55.01858736059479 50.85910652920962 52.85714285714285 86.80659670164917
Tagging model performance on test-verb: regardless of genre
Precision, Recall, F1, Accuracy:  68.69565217391305 71.77739920499717 70.20272146625938 81.72995062148817


In [None]:
macro_F1 = macro_avg_performance[2]
classification_performance = np.append(overall_verb_performance, macro_F1)

In [None]:
our_scores = classification_performance
our_scores = [round(score,1) for score in our_scores]
all_scores = [gao_scores, our_scores]
all_scores_df = pd.DataFrame(all_scores, columns= ['P', 'R', 'F1', 'Acc', 'MaF1'], index=['Gao et al', 'US'])
print("vua classification model: classification task\n")
all_scores_df

vua classification model: classification task



Unnamed: 0,P,R,F1,Acc,MaF1
Gao et al,53.4,65.6,58.9,69.1,53.4
US,68.7,71.8,70.2,81.7,66.8
