In [1]:
#Data manipulation
import pandas as pd
import numpy as np
import string

In [2]:
#NLP nltk
import nltk
from nltk.corpus import wordnet as wn
from nltk.corpus import stopwords
stop_words = list(stopwords.words("english"))

In [3]:
#NLP spaCy
import spacy
trf = spacy.load('en_core_web_trf')
lg = spacy.load('en_core_web_lg')

In [4]:
#NLP word embeddings
import gensim
from gensim.test.utils import datapath
from gensim import utils
import gensim.models
from gensim.models import Word2Vec

import gensim.downloader as api
dataset = api.load("text8")
wv = Word2Vec(dataset)

In [5]:
#Cosine computation
import scipy as sp
from scipy.spatial import distance

In [6]:
#Creating useful functions/classes

def clean_synonyms(synsets):
    cleaned_synonyms = []
    no_stop_words = []
    syn_in_wv = []

    
    sep = '.'
    for s in synsets:
        synonym = s.name()
        cleaned_synonyms.append(synonym.split(sep, 1)[0])
    
    for word in cleaned_synonyms:
        if word not in stop_words:
            no_stop_words.append(word)
        if word in wv.wv.index_to_key:
            syn_in_wv.append(word)
    
    return syn_in_wv

class Sentence:
    
    def __init__(self, text = ''):
        self.text = text
        
        doc = trf(text)
        target = []
        context = []
                
        for token in doc:
            if token.pos_ == 'VERB':
                target.append(token.text)
            else:
                if token.text.isalpha() == True:
                    context.append(token.text)
                else:
                    pass
        
        self.target = target
        self.context = context

In [7]:
def metaphor_checker(sentence):
    
    synonyms = []
    syn_vec = []
    context_vec = []
    comparison = []
    
    target = Sentence(sentence).target
    context = Sentence(sentence).context
    
    direct_syn = wn.synsets(target[0], pos = 'v')
 
    for syn in direct_syn:
        lemmas = syn.lemmas()
        for l in lemmas:
            hyponyms = l.synset().hyponyms() # hyponym: "a word whose meaning is included in that of another word"
            if len(hyponyms) > 0:
                for h in hyponyms:
                    if h in synonyms:
                        pass
                    else:
                        synonyms.append(h)
    
    if len(synonyms) > 0:
        cleaned_synonyms = clean_synonyms(synonyms)
            
        for s in cleaned_synonyms:
            vec = wv.wv[s]
            syn_vec.append(vec)
    
        for word in context:
            if word not in stop_words:
                context_vec.append(wv.wv[word])
            else:
                pass
        
        context_mean = np.mean(context_vec, axis = 0)
    
        for vec in syn_vec:
            comparison.append(distance.cosine(context_mean, vec))
        
        comparison_array = np.array(comparison)
        
        best_fit = cleaned_synonyms[comparison.index(np.amax(comparison_array))] 
    
        best_fit_word = synonyms[comparison.index(np.amax(comparison_array))] #Identifying best fit word
    else:
        best_fit = target
    
    cosine = distance.cosine(wv.wv[best_fit], wv.wv[target])
    
    if cosine > 0.90:
        return sentence, target, 'LITERAL', cosine, best_fit
    else:
        return sentence, target, 'METAPHORICAL', cosine, best_fit

In [8]:
metaphor_checker('she read the newest book from her favorite young adult author.') ##Problem appears to be occuring w/identifying best fit word



('she read the newest book from her favorite young adult author.',
 ['read'],
 'LITERAL',
 1.0915144681930542,
 'strike')

In [9]:
metaphor_checker('she devoured the newest book from her favorite young adult author.')

('she devoured the newest book from her favorite young adult author.',
 ['devoured'],
 'METAPHORICAL',
 0,
 ['devoured'])

In [10]:
metaphor_checker('she devours the magazines from the old corner store.')

('she devours the magazines from the old corner store.',
 ['devours'],
 'METAPHORICAL',
 0,
 ['devours'])