In [1]:
from google.colab import drive
ROOT = '/content/drive'
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import os
import sys
from os.path import join 
repo_dir = '/content/drive/MyDrive/metaphor-detection'

In [3]:
## directories for resources
data_dir = repo_dir + '/resources/metaphor-in-context/data/'
glove_dir = repo_dir + '/resources/glove/'
elmo_dir = repo_dir + '/resources/elmo/'

In [4]:
## installing the requirements
%cd 'drive/MyDrive/metaphor-detection/' 
#!pip install allennlp
#!pip install -r gao-g-requirements.txt
#!pip install --upgrade google-cloud-storage

/content/drive/MyDrive/metaphor-detection


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader

from core.gao_files.sequence.model import RNNSequenceModel
import time
import matplotlib
from core.gao_files.sequence.util import *
from core.data.gao_data import *
import h5py
import math
import numpy as np
import random

## Data Preperation

In [6]:
### Read MOH-x Data 
data_dir = os.path.join("resources", "metaphor-in-context", "data")
data_container = ExperimentData(data_dir)
data_container.read_moh_x_data(to_pandas = False)
moh_x_data = data_container.moh_x_formatted_svo_cleaned

MOH-X formatted svo nrow: 647
MOH-X formatted svo cleaned nrow: 647


In [7]:
moh_x_data_formatted = []

In [8]:
raw_mohx = []

with open(data_dir + '/MOH-X/MOH-X_formatted_svo_cleaned.csv') as f:
    # arg1  	arg2	verb	sentence	verb_idx	label
    lines = csv.reader(f)
    next(lines)
    for line in lines:
        sentence = line[3]
        label_seq = [0] * len(sentence.split())
        pos_seq = [0] * len(label_seq)
        verb_idx = int(line[4])
        verb_label = int(line[5])
        label_seq[verb_idx] = verb_label
        pos_seq[verb_idx] = 1   # idx2pos = {0: 'words that are not focus verbs', 1: 'focus verb'}
        raw_mohx.append([sentence.strip(), label_seq, pos_seq])



In [9]:
## Sample data format
raw_mohx[0]

['He absorbed the knowledge or beliefs of his tribe .',
 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]]

In [10]:
vocab = get_vocab(raw_mohx)
word2idx, idx2word = get_word2idx_idx2word(vocab)
glove_embeddings = get_embedding_matrix(glove_dir + 'glove840B300d.txt', 
                                        word2idx, 
                                        idx2word, 
                                        normalization=False)

vocab size:  1732


100%|██████████| 2196017/2196017 [00:47<00:00, 45921.26it/s]


Number of pre-trained word vectors loaded:  1730
Embeddings mean:  -0.0001946780103025958
Embeddings stdev:  0.3732253909111023


In [11]:
elmo_embeddings = h5py.File(elmo_dir + 'MOH-X_cleaned.hdf5', 'r')

In [12]:
## Embedding the data with shuffled randomly
random.seed(0)
random.shuffle(raw_mohx)

# second argument is the post sequence, which we don't need
embedded_mohx = [[embed_indexed_sequence(data[0], data[2], word2idx,
                                      glove_embeddings, elmo_embeddings, None),
                       data[2], data[1]]
                      for data in raw_mohx]

In [13]:
sentences = [data[0] for data in embedded_mohx]
poss = [data[1] for data in embedded_mohx]
labels = [data[2] for data in embedded_mohx]

## K-Fold Training

In [16]:
print(f"Data Length is {len(raw_mohx)}")
NUMBER_FOLD = 10
fold_size = round(len(raw_mohx) / NUMBER_FOLD)
print(f"Each fold size is {fold_size}")

Data Length is 647
Each fold size is 65


In [17]:
folds = []
for i in range(NUMBER_FOLD):
    folds.append((sentences[i * fold_size:(i + 1) * fold_size],
                  poss[i * fold_size:(i + 1) * fold_size],
                  labels[i * fold_size: (i + 1) * fold_size]))
idx2pos = {0: 'words that are not focus verbs', 1: 'focus verb'}


In [18]:
optimal_f1s = []
accuracies = []
precisions = []
recalls = []
BATCH_SIZE = 10
using_GPU = True
NUM_EPOCHS = 10
for i in range(NUMBER_FOLD):
    ### DATA BATCHING
    training_sentences = []
    training_labels = []
    training_poss = []
    for j in range(NUMBER_FOLD):
        if j != i:
            training_sentences.extend(folds[j][0])
            training_poss.extend(folds[j][1])
            training_labels.extend(folds[j][1])
    training_dataset_mohX = TextDatasetWithGloveElmoSuffix(training_sentences, 
                                                            training_poss, 
                                                            training_labels)
    val_dataset_mohX = TextDatasetWithGloveElmoSuffix(folds[i][0], 
                                                       folds[i][1], 
                                                       folds[i][2])

    # Data-related hyperparameters
    # Set up a DataLoader for the training, validation, and test dataset
    train_dataloader_mohX = DataLoader(dataset=training_dataset_mohX, 
                                       batch_size=BATCH_SIZE, 
                                       shuffle=True,
                                      collate_fn=TextDatasetWithGloveElmoSuffix
                                                .collate_fn)
    val_dataloader_mohX = DataLoader(dataset=val_dataset_mohX, 
                                     batch_size=BATCH_SIZE, 
                                     shuffle=False,
                                      collate_fn=TextDatasetWithGloveElmoSuffix
                                                .collate_fn)
    rnn_seq = RNNSequenceModel(num_classes=2, 
                                    embedding_dim=300+1024, 
                                    hidden_size=300, num_layers=1, 
                                    bidir=True,
                                    dropout1=0.5, dropout2=0, dropout3=0)
    nll_criterion = nn.NLLLoss()
    if using_GPU:
        rnn_seq = rnn_seq.cuda()
        nll_criterion = nll_criterion.cuda()

    rnn_seq_optimizer = optim.Adam(rnn_seq.parameters(), lr=0.001)
    #### TRAIN ####
    performance_matrix = None
    training_loss = []
    val_loss = []
    val_p = []
    val_r = []
    val_acc = []
    training_f1 = []
    val_f1 = []
    train_dataloader = train_dataloader_mohX
    val_dataloader = val_dataloader_mohX
    for epoch in range(NUM_EPOCHS):
        num_iter = 0
        model_index = 0
        comparable = []
        print("-----Starting epoch {}------".format(epoch + 1))
        now = time.time()
        for (__, example_text, example_lengths, labels) in train_dataloader:
            example_text = Variable(example_text)
            example_lengths = Variable(example_lengths)
            labels = Variable(labels)
            if using_GPU:
                example_text = example_text.cuda()
                example_lengths = example_lengths.cuda()
                labels = labels.cuda()

            # predicted shape: (batch_size, 2)
            predicted = rnn_seq(example_text, example_lengths)
            batch_loss = nll_criterion(predicted.view(-1, 2), labels.view(-1))
            rnn_seq_optimizer.zero_grad()
            batch_loss.backward()
            rnn_seq_optimizer.step()
            num_iter += 1
            # Calculate validation and training set loss and accuracy every 200 gradient updates
            if num_iter % 50 == 0:
                avg_eval_loss, performance_matrix = evaluate(idx2pos, 
                                                             val_dataloader_mohX, 
                                                             rnn_seq,
                                                             nll_criterion, 
                                                             using_GPU)
                val_loss.append(avg_eval_loss)
                val_p.append(performance_matrix[1][0])
                val_r.append(performance_matrix[1][1])
                val_f1.append(performance_matrix[1][2])
                val_acc.append(performance_matrix[1][3])

                #print("####Iteration {}. Validation Loss {}.#####".format(num_iter, avg_eval_loss))
                # filename = '../models/LSTMSuffixElmoAtt_???_all_iter_' + str(num_iter) + '.pt'
                # torch.save(rnn_clf, filename)
                 
    print('val_f1: ', val_f1)
    idx = 0
    if math.isnan(max(val_f1)):
        optimal_f1s.append(max(val_f1[6:]))
        idx = val_f1.index(optimal_f1s[-1])
        precisions.append(val_p[idx])
        recalls.append(val_r[idx])
        accuracies.append(val_acc[idx])
    else:
        optimal_f1s.append(max(val_f1))
        idx = val_f1.index(optimal_f1s[-1])
        precisions.append(val_p[idx])
        recalls.append(val_r[idx])
        accuracies.append(val_acc[idx])


print('F1 on MOH-X by 10-fold = ', optimal_f1s)
print('Precision on MOH-X = ', np.mean(np.array(precisions)))
print('Recall on MOH-X = ', np.mean(np.array(recalls)))
print('F1 on MOH-X = ', np.mean(np.array(optimal_f1s)))
print('Accuracy on MOH-X = ', np.mean(np.array(accuracies)))

-----Starting epoch 1------


  eval_text = Variable(eval_text, volatile=True)
  eval_lengths = Variable(eval_lengths, volatile=True)
  eval_labels = Variable(eval_labels, volatile=True)
  recall = 100 * grid[1, 1] / np.sum(grid[:, 1])


------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  words that are not focus verbs 0.0 nan nan 99.55654101995566
PRFA performance for  focus verb 40.0 100.0 57.142857142857146 40.0
-----Starting epoch 2------
------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  words that are not focus verbs 0.0 nan nan 99.77827050997783
PRFA performance for  focus verb 40.0 100.0 57.142857142857146 40.0
-----Starting epoch 3------
------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  words that are not focus verbs 0.0 nan nan 99.77827050997783
PRFA performance for  focus verb 40.0 100.0 57.142857142857146 40.0
-----Starting epoch 4------
------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  words that are not focus verbs nan nan nan 100.0
PRFA performance for  focus verb 40.0 100.0 57.142857142857146 40.0
-----Starting epoch 5------


  precision = 100 * grid[1, 1] / np.sum(grid[1])


------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  words that are not focus verbs nan nan nan 100.0
PRFA performance for  focus verb 40.0 100.0 57.142857142857146 40.0
-----Starting epoch 6------
------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  words that are not focus verbs 0.0 nan nan 99.77827050997783
PRFA performance for  focus verb 40.0 100.0 57.142857142857146 40.0
-----Starting epoch 7------
------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  words that are not focus verbs nan nan nan 100.0
PRFA performance for  focus verb 40.0 100.0 57.142857142857146 40.0
-----Starting epoch 8------
------------------------------
total_eval_loss.shape torch.Size([])
PRFA performance for  words that are not focus verbs nan nan nan 100.0
PRFA performance for  focus verb 40.0 100.0 57.142857142857146 40.0
-----Starting epoch 9------
------------------------------
total_eval_loss.s

In [20]:
import pandas as pd
gao_scores = [79.1, 73.5, 75.6, 77.2]
our_scores = [np.mean(np.array(precisions)),
  np.mean(np.array(recalls)),
  np.mean(np.array(optimal_f1s)),
  np.mean(np.array(accuracies))]
our_scores = [round(score,1) for score in our_scores]
all_scores = [gao_scores, our_scores]
all_scores_df = pd.DataFrame(all_scores, columns= ['P', 'R', 'F1', 'Acc'], index=['Gao et al', 'US'])
print("Moh-X seq model: classification task\n")
all_scores_df

Moh-X seq model: classification task



Unnamed: 0,P,R,F1,Acc
Gao et al,79.1,73.5,75.6,77.2
US,49.9,99.2,66.1,51.0
