# Lexical Baseline

In [1]:
from google.colab import drive
ROOT = '/content/drive'
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import os
import sys
from os.path import join 
parent_dr = os.path.split(os.getcwd())[0]
if parent_dr not in sys.path:
    sys.path.append(parent_dr)

In [2]:
repo_dir = '/content/drive/MyDrive/nlp_group_proj/metaphor-detection'
if repo_dir not in sys.path:
    sys.path.append(repo_dir)
#join(repo_dir)

In [3]:
import numpy as np
import random
from core.data.gao_data import *
from models.classification import lexical_baseline as lb

In [4]:
# gao et all data directory
data_dir = os.path.join("resources", "metaphor-in-context", "data")
print(f"Data directory: {data_dir}")

Data directory: resources/metaphor-in-context/data


In [5]:
data_container = ExperimentData(data_dir)

In [6]:
PROJECT_PATH = join(repo_dir)
%cd "{PROJECT_PATH}"

/content/drive/MyDrive/nlp_group_proj/metaphor-detection


In [7]:
%pwd

'/content/drive/MyDrive/nlp_group_proj/metaphor-detection'

**Read in all data**

In [8]:
data_container.read_all_data(to_pandas=False)

MOH formatted nrow: 1603
MOH train nrow: 1283
MOH test nrow: 160
MOH val nrow: 160
MOH-X formatted svo nrow: 647
MOH-X formatted svo cleaned nrow: 647
TroFi formatted_all3737 nrow: 3737
TroFi-X formatted svo nrow: 1444
VUA formatted nrow: 23113
VUA train nrow: 15516
VUA train augmented nrow: 116622
VUA train no val nrow: 12541
VUA test nrow: 5873
VUA val nrow: 1724
VUA_seq train nrow: 6323
VUA_seq test nrow: 2694
VUA_seq val nrow: 1550


In [10]:
print(len(data_container.vua_formatted_train))
print(len(data_container.vua_seq_formatted_test))

15516
2694


In [11]:
# Convert the data into lists of tuples of the form: ('verb', 'label')
tupled_vua_train = lb.process_vua_to_tuple(data_container.vua_formatted_train)
tupled_vua_test = lb.process_vua_to_tuple(data_container.vua_formatted_test)

In [13]:
vua_LB_model = lb.LexicalBaseline()

Initializing new Lexical Baseline model


In [14]:
# create_CLS_Model() records rate at which each verb is used metaphorically
# in the provided data
vua_LB_model.create_CLS_Model(tupled_vua_train)

In [15]:
# CLS_predict() uses the recorded probabilities to predict verb metaphoricity on
# the provided test data
vua_LB_model.CLS_predict(tupled_vua_test)

In [16]:
# evaluate() uses the gold standard labels and the predictions from CLS_predict()
# to compute performance metrics
vua_LB_model.evaluate()


                Accuracy: 76.24723310063
                Precision: 68.22709163346613
                Recall: 38.898353208404316
                F1: 49.54792043399638
                


Cross Fold Validaton on the MOH-X Data

In [17]:
len(data_container.moh_x_formatted_svo)

647

In [9]:
mohx_cv_results = lb.lex_baseline_CV(data_container.moh_x_formatted_svo)

NameError: ignored

In [17]:
tupled_moh_x_train = lb.process_moh_x_to_tuple(data_container.moh_x_formatted_svo)
len(tupled_moh_x_train)

647

In [18]:
# upside down floor division
lines_per_fold = -(len(tupled_moh_x_train) // -10)
print(lines_per_fold)

65


In [19]:
# setting same seed as Gao et al.
random.seed(3)
random.shuffle(tupled_moh_x_train)

# prepare 10 folds
ten_folds = []
for i in range(10):
    ten_folds.append(tupled_moh_x_train[i*lines_per_fold: (i+1)*lines_per_fold])

print(len(ten_folds))
# 10 fold
PRFA_list = []
for i in range(10):
    raw_train_mohX = []
    raw_val_mohX = []
    # separate training and validation data
    for j in range(10):
        if j != i:
            raw_train_mohX.extend(ten_folds[j])
        else:
            raw_val_mohX = ten_folds[j]
    # make model, predict, and evaluate
    mohx_model = lb.LexicalBaseline()
    mohx_model.create_CLS_Model(raw_train_mohX)
    mohx_model.CLS_predict(raw_val_mohX)
    mohx_model.evaluate()
    PRFA_list.append([mohx_model.precision,
                      mohx_model.recall,
                      mohx_model.met_f1,
                      mohx_model.accuracy])
    
PRFA = np.array(PRFA_list)
      

10
Initializing new Lexical Baseline model

                Accuracy: 44.61538461538461
                Precision: 34.78260869565217
                Recall: 27.586206896551722
                F1: 30.769230769230766
                
Initializing new Lexical Baseline model

                Accuracy: 49.23076923076923
                Precision: 50.0
                Recall: 33.333333333333336
                F1: 40.0
                
Initializing new Lexical Baseline model

                Accuracy: 49.23076923076923
                Precision: 43.47826086956522
                Recall: 33.333333333333336
                F1: 37.735849056603776
                
Initializing new Lexical Baseline model

                Accuracy: 40.0
                Precision: 29.41176470588235
                Recall: 15.625
                F1: 20.408163265306126
                
Initializing new Lexical Baseline model

                Accuracy: 56.92307692307692
                Precision: 57.142857142857146
  

In [20]:
print('Prec., Recall, F1, Acc. for MOH-X: ',np.mean(PRFA, axis=0))

Prec., Recall, F1, Acc. for MOH-X:  [39.0868605  26.69819114 31.2878613  43.55583127]


Cross fold Validation on the TroFi data