# Lexical Baseline

In [1]:
from google.colab import drive
ROOT = '/content/drive'
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import os
import sys
from os.path import join 
parent_dr = os.path.split(os.getcwd())[0]
if parent_dr not in sys.path:
    sys.path.append(parent_dr)

In [2]:
repo_dir = '/content/drive/MyDrive/nlp_group_proj/metaphor-detection'
if repo_dir not in sys.path:
    sys.path.append(repo_dir)
#join(repo_dir)

In [3]:
import numpy as np
import random
from core.data.gao_data import *
from models.classification import lexical_baseline as lb

In [4]:
# gao et all data directory
data_dir = os.path.join("resources", "metaphor-in-context", "data")
print(f"Data directory: {data_dir}")

Data directory: resources/metaphor-in-context/data


In [5]:
data_container = ExperimentData(data_dir)

In [6]:
PROJECT_PATH = join(repo_dir)
%cd "{PROJECT_PATH}"

/content/drive/MyDrive/nlp_group_proj/metaphor-detection


In [7]:
%pwd

'/content/drive/MyDrive/nlp_group_proj/metaphor-detection'

**Read in all data**

In [8]:
data_container.read_all_data(to_pandas=False)

MOH formatted nrow: 1603
MOH train nrow: 1283
MOH test nrow: 160
MOH val nrow: 160
MOH-X formatted svo nrow: 647
MOH-X formatted svo cleaned nrow: 647
TroFi formatted_all3737 nrow: 3737
TroFi-X formatted svo nrow: 1444
VUA formatted nrow: 23113
VUA train nrow: 15516
VUA train augmented nrow: 116622
VUA train no val nrow: 12541
VUA test nrow: 5873
VUA val nrow: 1724
VUA_seq train nrow: 6323
VUA_seq test nrow: 2694
VUA_seq val nrow: 1550


In [36]:
print(len(data_container.vua_formatted_train))
print(len(data_container.vua_seq_formatted_test))

15516
2694


In [37]:
# Convert the data into lists of tuples of the form: ('verb', 'label')
tupled_vua_train = lb.process_vua_to_tuple(data_container.vua_formatted_train)
tupled_vua_test = lb.process_vua_to_tuple(data_container.vua_formatted_test)

In [38]:
vua_LB_model = lb.LexicalBaseline()

Initializing new Lexical Baseline model


In [39]:
# create_CLS_Model() records rate at which each verb is used metaphorically
# in the provided data
vua_LB_model.create_CLS_Model(tupled_vua_train)

In [40]:
# CLS_predict() uses the recorded probabilities to predict verb metaphoricity on
# the provided test data
vua_LB_model.CLS_predict(tupled_vua_test)

In [41]:
# evaluate() uses the gold standard labels and the predictions from CLS_predict()
# to compute performance metrics
vua_LB_model.evaluate()


                Accuracy: 76.24723310063
                Precision: 68.22709163346613
                Recall: 38.898353208404316
                F1: 49.54792043399638
                


Cross Fold Validaton on the MOH-X Data

In [17]:
len(data_container.moh_x_formatted_svo)

647

In [10]:
tupled_moh_x_train = lb.process_moh_x_to_tuple(data_container.moh_x_formatted_svo)
len(tupled_moh_x_train)

647

In [11]:
mohx_cv_results = lb.lex_baseline_CV(tupled_moh_x_train)

Initializing new Lexical Baseline model

                Accuracy: 41.53846153846154
                Precision: 33.333333333333336
                Recall: 22.580645161290324
                F1: 26.923076923076923
                
Initializing new Lexical Baseline model

                Accuracy: 43.07692307692308
                Precision: 33.333333333333336
                Recall: 23.333333333333332
                F1: 27.45098039215686
                
Initializing new Lexical Baseline model

                Accuracy: 46.15384615384615
                Precision: 30.0
                Recall: 22.22222222222222
                F1: 25.53191489361702
                
Initializing new Lexical Baseline model

                Accuracy: 40.0
                Precision: 46.666666666666664
                Recall: 18.42105263157895
                F1: 26.41509433962264
                
Initializing new Lexical Baseline model

                Accuracy: 41.53846153846154
                Precision: 

In [30]:
avg_mohx_results = np.mean(mohx_cv_results, axis=0)
print('Prec., Recall, F1, Acc. for MOH-X: ', np.round(avg_mohx_results,1))

Prec., Recall, F1, Acc. for MOH-X:  [35.7 23.9 28.3 41.5]


Cross fold Validation on the TroFi data

In [13]:
tupled_trofi = lb.process_trofi_to_tuple(data_container.trofi_formatted_all)
trofi_cv_results = lb.lex_baseline_CV(tupled_trofi)

Initializing new Lexical Baseline model

                Accuracy: 72.19251336898395
                Precision: 76.06837606837607
                Recall: 53.93939393939394
                F1: 63.120567375886516
                
Initializing new Lexical Baseline model

                Accuracy: 69.25133689839572
                Precision: 66.66666666666667
                Recall: 54.43037974683544
                F1: 59.930313588850176
                
Initializing new Lexical Baseline model

                Accuracy: 73.52941176470588
                Precision: 74.21875
                Recall: 59.006211180124225
                F1: 65.7439446366782
                
Initializing new Lexical Baseline model

                Accuracy: 70.32085561497327
                Precision: 70.8955223880597
                Recall: 56.88622754491018
                F1: 63.12292358803986
                
Initializing new Lexical Baseline model

                Accuracy: 71.92513368983957
               

In [25]:
avg_trofi_results = np.mean(trofi_cv_results, axis=0)
print('Prec., Recall, F1, Acc. for TroFi: ',np.round(avg_trofi_results,1))

Prec., Recall, F1, Acc. for TroFi:  [72.6 55.3 62.8 71.5]


Macro-averaged F1

In [43]:
n_moh = len(data_container.moh_formatted_train)
n_trofi = len(data_container.trofi_formatted_all)
n_vua = len(data_container.vua_formatted_train)
n = n_moh + n_trofi + n_vua
n
macro_F1 = ((avg_trofi_results[2] * n_trofi) + (avg_mohx_results[2] * n_moh) \
            + (vua_LB_model.met_f1 * n_vua)) / n
macro_F1

50.6247224044731