# Prepare data

First we will create a dictionary of Semcor words, and look at them and their frequencies.


Next, we want to create a dataset of a subsample of semcor. We want to remove the most common and least common words


We limit this set in several ways:
    - only noun senses
    - max 30 examples of each sense of a word.
    - concrete
    - remove nominalizations, which tend to have eventive readings (we are interested in nouns denoting entities)

So, we begin iterating through a randomly shuffled semcor. For each word, we throw it out if it does not fit our criteria. Then, we look at the senses.



At the end, we store a list of all of the words we've collected. For each item in the dictionary, we should know:
- the number of tokens
- the wordnet senses
- a list of the semcor sentence indices of the tokens of each word. 


In [1]:
import sys
sys.path.append("../lib/")

from nltk.corpus import semcor
from nltk.tree import Tree
import itertools
import random
import pandas as pd
import torch
from bert import *
import csv
from nltk.corpus.reader.wordnet import Lemma
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer

import inflect
import os
from scipy.stats import spearmanr, pearsonr
import re


bert = BERTBase()


INFO:pytorch_pretrained_bert.modeling:loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/gabriellachronis/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
INFO:pytorch_pretrained_bert.modeling:extracting archive file /Users/gabriellachronis/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /var/folders/9m/vzvx58rs51v_x5nm620fz4xr0000gn/T/tmpy4hhbilk
INFO:pytorch_pretrained_bert.modeling:Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 2,
  "vocab_size": 3

In [None]:
lemmatizer = WordNetLemmatizer()
lemmatizer.lemmatize("impressed")

In [None]:
"""
load semcor stats
"""

#uncomment for whole dataset
sents = semcor.sents()
tagged_sents = semcor.tagged_sents( tag = ' sem ' )
words = semcor.words()


##########
# DEBUG ONLY
############

# tagged_sents = semcor.tagged_sents( tag = ' sem ' )[:20]
# sents = semcor.sents()[:20]
# words = semcor.words()[:1000]





In [None]:
lala = semcor.tagged_sents( tag = ' sem ' )[:20]
lala = lala[0][1]
lala.pos()

In [None]:
def get_senses_in_tagged_sentence(tagged_sentence):
    """
    given a sense-tagged corpus sentence,returns a list of lemmas and senses in that sentence
    """
    res = []
    for chunk in tagged_sentence:
        
        
        chunk_string = ' '.join(chunk.leaves())

        word = chunk_string.lower()
        lemma = lemmatizer.lemmatize(word)
        poss = chunk.pos()
        
        """
        if we find a wordnet sense (function words dont)
        then scoop it up

        """            
        if isinstance(chunk.label() , Lemma):
            sense = chunk.label()
            for wordform, pos in poss:
                res.append((lemma, sense, pos))
    # if we get to the end of the loop. we didn't find the word we were looking for
    return res


In [None]:
print(sents[0])
get_senses_in_tagged_sentence(tagged_sents[0])

In [None]:
"""
little script on semcor_lexicon to see where we should cut off most and least frequent
"""

print(semcor_lexicon.most_common(200))
n = 30000
print(semcor_lexicon.most_common()[:-n-1:-1])

# we want to keep words with a count < 600

# and with a count greater than > 10 (which is knocking off the l)

In [None]:
"""
get basic semcor stats
"""
print("number of sentences:")
print(len(sents))
print("number of tokens:")
print(len(words))

# Step 2: Create Token Index


In [None]:
"""
Next step is to create an index of all of the tokens of a single lemma. 
So, we build a data structure with all of the word forms found in semcor. With each word form,
we store a list of all of the sentences containing it.
"""

class Vividict(dict):
    def __missing__(self, key):
        value = self[key] = type(self)() # retain local pointer to value
        return value                     # faster to return than dict lookup


#word_index = {}
sense_index = Vividict()

semcor_indices = list(range(0,len(sents)))
#print(semcor_indices)
random.shuffle(semcor_indices)
#print(semcor_indices)


# go through the dataset sentence by sentence
for random_index in semcor_indices:

    sentence_id = random_index
    sent = tagged_sents[sentence_id]

    
    # go through the sentence word by word to get semcor senses in it
    for word in sent:
        senses = get_senses_in_tagged_sentence(sent)
        for lemma, sense, pos in senses:
            sense = str(sense)
            
            if pos != 'NN':
                continue
            # if this is our first time seeing this word, add it to the index and put the sentence id in the entry
            elif sense not in sense_index[lemma]:
                sense_index[lemma][sense] = {sentence_id}
            # if we have too many instances of this sense, stop
            elif len(sense_index[lemma][sense]) >= 30:
                continue
            # otherwise add it
            else:
                sense_index[lemma][sense].add(sentence_id)
        
#     # we need to make sure we are collecting only those tokens which have semcor senses, or we make note of which ones do
    
#         # if this is our first time seeing this word, add it to the index and put the sentence id in the entry
#         if word not in word_index:
#             word = word.lower()
#             word_index[word] = {sentence_id}
#         # otherwise, add the sentence id to the entry for the word
#         else:
#             word_index[word].add(sentence_id)
            

In [None]:
"""
let's take a look at it
"""
import pprint
#pprint.pprint(sense_index, width=40)

In [None]:
"""
this is a little nonsense to figure out how to use nltk lemma types - - not needed for script
"""

render1 = wn.lemma('render.v.07.return')
render2 = wn.lemma('return.v.01.return')

# """
# importnt point about nltk wordnet lemmas. their representation is confusing so be careful. i think equals or differentequals are implmementd in
# unsuspected ways, because you get issues where they dont act like their display name
# """

dictz = {render1: "foo", render2: "bar"}
print(dictz)

dixt = {str(render1): "foo"}
dixt[str(render2)] = "bar"
print(dixt)


dixt = {str(render1): "foo"}
dixt[str(render2)] = "bar"
print(dixt)

#re.findall(r"\.(.*?)\.", 'render.v.07.return')

In [None]:
"""
Now that we have our word index, we want to construct the evaluation dataset

for each word in the index, we want

We iterate through the words in the dictionary.
we shuffle these indices and access in random order. 
We go through the shuffled indices,
    and we check if we have collected < 50 of this sense.
    if not, we collect this token for the evaluation dataset

collection means:
    we construct a row of data like
        word lemma
        word sense
        token sentence
        
        
at the end we save the data in a csv file called 'semcor_wu_palmer_eval_data.csv'
"""

def get_sense_in_tagged_sentence(word, tagged_sentence):
    for chunk in tagged_sentence:

        chunk_string = ' '.join(chunk.leaves())

        """
        if we find the word we're looking for in this chunk,
        and that chunk has a wordnet sense (function words dont)
        then scoop it up

        """            
        if chunk_string.lower() == word:
            #print("found %s" % word)
            #print(chunk.label())

            #wn_lemma = cunk.label()
            if isinstance(chunk.label() , Lemma):
                return chunk.label()
    # if we get to the end of the loop. we didn't find the word we were looking for
    return None


def collect_tokens(indices, sents, tagged_sents):
    """
    takes a word and a list of indices
    returns tuples containing 
        word
        sentence_string
        sense
    """
    #sense_count = 0
    tokens = []
    
    # indices is a list of all of the sentence ids containing this word
    indices = list(indices)
    # visit these sentences in random order
    random.shuffle(indices)
    for index in indices[:25]:

        sentence = sents[index]
        sentence = ' '.join(sentence)

        tokens.append(sentence)
        #sense_count += 1
        
        
    
    #print(sense_count.items())
    return tokens


def collect_tokens_for_all_words_to_file(path, sense_path, sense_index, sents, tagged_sents):
    with open(path, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)

        with open(sense_path, 'w', newline='') as sensefile:
            sensewriter = csv.writer(sensefile)
        
            for lemma in sense_index.keys():
                #print(lemma)
                for sense, indices in sense_index[lemma].items():
                    print(lemma)
                    print(sense)
                    #print(indices)

                
                    frequency = len(indices)
                    tokens = collect_tokens(indices, sents, tagged_sents)
                    #print(tokens)
                    #raise Exception("nfwip")

                    for token in tokens:
                        row = (lemma, sense, token)
                        writer.writerow(row)                    
                    #sensewriter.writerow(sense_count.items())



In [None]:


collect_tokens_for_all_words_to_file('../data/semcor_wu_palmer_eval_datamcrae.csv', '../data/semcor_sense_counts_mcrae.csv', sense_index, sents, tagged_sents)

In [None]:
"""
models and save paths

 (mislabeled as buchanan.mc_rae but really mc_rae)
"""
# models = [
#     'trained_models/model.plsr.buchanan.mc_rae_real.allbuthomoyms.5k.300components.500max_iters',
#     'trained_models/model.plsr.buchanan.mc_rae_real.allbuthomoyms.1k.300components.500max_iters',
#     #'trained_models/model.plsr.buchanan.mc_rae_real.allbuthomoyms.glove.300components.300max_iters',
#     'trained_models/model.ffnn.buchanan.mc_rae_real.allbuthomoyms.5k.50epochs.0.5dropout.lr1e-4.hsize300',
#     'trained_models/model.ffnn.buchanan.mc_rae_real.allbuthomoyms.1k.50epochs.0.5dropout.lr1e-4.hsize300',
#     #'trained_models/model.ffnn.buchanan.mc_rae_real.allbuthomoyms.glove.50epochs.0.5dropout.lr1e-4.hsize300',
#     'trained_models/model.modabs.buchanan.mc_rae_real.allbuthomoyms.5k',
#     'trained_models/model.modabs.buchanan.mc_rae_real.allbuthomoyms.1k',
#     #'trained_models/model.modabs.buchanan.mc_rae_real.allbuthomoyms.glove'
# ]

models = [
    'trained_models/model.plsr.mc_rae_real.1k.50components.500max_iters',
    'trained_models/model.plsr.mc_rae_real.5k.100components.500max_iters',
    #'trained_models/model.plsr.mc_rae_real.glove.100components.300max_iters',
    'trained_models/model.ffnn.mc_rae_real.1k.50epochs.0.5dropout.lr1e-4.hsize300',
    'trained_models/model.ffnn.mc_rae_real.5k.50epochs.0.5dropout.lr1e-4.hsize300',
    #'trained_models/model.ffnn.mc_rae_real.glove.50epochs.0.5dropout.lr1e-4.hsize300',
    'trained_models/model.modabs.mc_rae_real.1k',
    'trained_models/model.modabs.mc_rae_real.5k',
    #'trained_models/model.modabs.mc_rae_real.glove'
    ]

In [None]:
def lemma_from_string(lemma_string):
    # grabs everything in between (' ') in a string
    # (needed to update from r"'(.*?)'" to deal with cases with quotes in word like o'clock)
    string = re.findall(r"\('(.*?)'\)", lemma_string)[0]
    #print(string)
    lemma = wn.lemma(string)
    return lemma

def lemma_name_from_string(lemma_string):
    # grabs everything in between (' ') in a string
    # (needed to update from r"'(.*?)'" to deal with cases with quotes in word like o'clock)
    string = re.findall(r"\('(.*?)'\)", lemma_string)[0]
    #print(string)
    return string

In [None]:
"""
Now, we have our dataset that we want to analyze. We just need to do:

for each model we want to evaluate, run the following script:

open the file of data

read it in as a dataframe

for each of the unique words in that dataset

    we calculate pairwise distances between each otoken and every otehr token
    and construct a similarities dataset. 
    
    then we run correlations for that word???
    and store into a file
"""


def make_predictions(df, model, bert):
    predictions = []
    for index, row in df.iterrows():

        #print(row.word_form)
        #print(row.context)

        predicted_vector = model.predict_in_context(row.word_form, row.context, bert)

        predictions.append(predicted_vector)
    return predictions

def get_pairwise_wu_palmer_data(model, df, bert, outfile):
    unique_words = df.word_form.unique()
    
    #run_stats = [0] * len(unique_words)
    run_stats = []

    for i in range(0, len(unique_words)):
        if i % 500 == 0:
            print("processed %s words" % i)
        
        # a dataframe containing all the tokens of this word
        word = unique_words[i]
        word_data = df[df.word_form == word].copy()

        n_senses = len(word_data['wn_lemma'].unique())

        predictions = make_predictions(word_data, model, bert)
        
        word_data['prediction'] = predictions
        
        #print(word_data)

        """

        then we calculate the pairwise distances between all of the vectors, only counting one pair one time

        """
        vals_for_this_word = []
#         cosines_for_this_word = []
#         wup_sims_for_this_word = []
#         sense_1 = []
#         sense_2 = []

        # pop the first token off the list
        num_toks = len(word_data)
        for i in range(0,num_toks):
            # compare it with each of the other tokens
            # dont have to compare to any earlier
            for j in range(i+1,num_toks):

                #print(df.iloc[i])
                #print(df.iloc[j])

                # calculate cosine similarity between the two vectors
                cos_sim = 1 - cosine(word_data.iloc[i].prediction, word_data.iloc[j].prediction)

                # and wu palmer similarity between the two wn lemmas
                lemma_1 = lemma_from_string(word_data.iloc[i].wn_lemma)
                lemma_2 = lemma_from_string(word_data.iloc[j].wn_lemma)
                synset1 = lemma_1.synset()
                synset2 = lemma_2.synset()
                wup_sim = synset1.wup_similarity(synset2)

                # if we can't compute a distance for these senses / recognize them, discard
                if type(wup_sim) == float:
                    # store this data point into a list
                    vals_for_this_word.append((word, lemma_1, lemma_2, cos_sim, wup_sim))
        
        
        token_similarities = pd.DataFrame.from_records(vals_for_this_word, columns = ["lemma", "token_sense_1", "token_sense_2", "cos_sim", "wup_sim"])
        token_similarities['n_senses'] = n_senses
        
        #print(token_similarities)
        token_similarities.to_csv(outfile, mode='a', header=True)
        
        
        # run correlation on this word
#         if sense_similarities is not None:
#             n = len(word_data)
#             n_senses = len(word_data['wn_lemma'].unique())
#             #print(word)
#             #print(n)
#             #print(sense_similarities)
#             pearson, pearson_p, spearman, spearman_p = run_correlation(sense_similarities)
#             # store the values for this word into a dataframe
#             #run_stats[i] = (word, n, pearson, pearson_p, spearman, spearman_p)
#             run_stats.append((word, n, n_senses, pearson, pearson_p, spearman, spearman_p))

    #res = pd.DataFrame.from_records(run_stats, columns = ["word", "n", "n_senses", "pearson", "pearson_p", "spearman", "spearman_p"])
    return None

    #print(all_sense_similarities)
    #return all_sense_similarities
        
        
def run_correlation(sense_similarities):
    """
    :sense_similarities: dataframe with columns
        cosine_sims 
        wup_sims
    """
    if sense_similarities is None:
        # not really sure why we're getting none values here it should be impossible
        return (float("nan"), float("nan"), float("nan"), float("nan"))
    elif len(sense_similarities['wup_sim'].unique()) == 1:
        # the correlation will be garbage with a constant y value; skip to avoid warnings
        return (float("nan"), float("nan"), float("nan"), float("nan"))

    if len(sense_similarities) > 1 :
        #print(word)

        cos_sims = sense_similarities['cos_sim']
        wup_sims = sense_similarities['wup_sim']

        pearson, pearson_p = pearsonr(cos_sims, wup_sims )
        #print('Pearsons correlation: %.3f, p-value: %s'  % (pearson, pearson_p))

        spearman, spearman_p = spearmanr(cos_sims, wup_sims )
        #print('Spearmans correlation: %.3f, p-value: %s'  % (spearman, spearman_p))

        return (pearson, pearson_p, spearman, spearman_p)
    return (float("nan"), float("nan"), float("nan"), float("nan"))

def plot_sims():
    cos_sims = sense_similarities['cos_sim']
    wup_sims = sense_similarities['wup_sim']
    plt.scatter(wup_sims, cos_sims)
    plt.title("Wordnet similarity of homonymous senses plotted against cosine similarity of predicted vectors of two tokens in semantic feature space")
    plt.xlabel("Wu and Palmer Similarity")
    plt.ylabel("Cosine Similarity")
    plt.show()
        

df = pd.read_csv('semcor_wu_palmer_eval_data.csv', names = ["word_form", "context", "wn_lemma"])



for save_path in models:
    print("****************************************")
    print("*** Evaluating %s model ***" % save_path)
    print("****************************************")
    model = torch.load(save_path)
    out_path = '../results/semcor_pairwise_data_' + os.path.split(save_path)[1] + '.csv'

    # remove results file if exists
    if os.path.exists(out_path):
        os.remove(out_path)
    get_pairwise_wu_palmer_data(model, df, bert, out_path)


In [None]:
brysbaert_filename = "/Users/gabriellachronis/data/Concreteness_ratings_Brysbaert_et_al_BRM.csv"
concreteness_df = pd.read_csv(brysbaert_filename, sep='\t')
concreteness_df= concreteness_df[["Word", "Conc.M"]]
concreteness_df = concreteness_df.set_index("Word")
concreteness_df.head()

In [None]:
"""
Now we need to go in and add abstractness value and 
bin number of senses into polysemy band
"""



import pandas as pd

cols = ["lemma", "token_sense_1", "token_sense_2", "cos_sim", "wup_sim", "n_senses"]

for save_path in models:
    print("****************************************")
    print("*** Saturating pairwise data for model: %s ***" % save_path)
    print("****************************************")
    infile = '../results/semcor_pairwise_data_' + os.path.split(save_path)[1] + '.csv'

    outfile = '../results/saturated_semcor_pairwise_data_' + os.path.split(save_path)[1] + '.csv'

    csv_input = pd.read_csv(infile, names=cols)
    
    # add polysemy bin
    csv_input['wn_bin'] = pd.cut(csv_input.n_senses, 
                        bins = [0, 2.1, 4.1, 6.1, 8.1, 10.1, 20.1, 50.1, 200], labels = False)

    # add POS rows
    pos1s = []
    pos2s = []
    sense1s = []
    sense2s = []
    for index, row in csv_input.iterrows():
        pos1 = re.findall(r"\.(.*?)\.", row.token_sense_1)[0]
        pos2 = re.findall(r"\.(.*?)\.", row.token_sense_2)[0]
        pos1s.append(pos1)
        pos2s.append(pos2)
        sense1 = lemma_name_from_string(row.token_sense_1)
        sense2 = lemma_name_from_string(row.token_sense_2)
        sense1s.append(sense1)
        sense2s.append(sense2)
        
    csv_input['sense1_pos'] = pos1s
    csv_input['sense2_pos'] = pos2s
    csv_input['token_sense_1'] = sense_1s
    csv_input['token_sense_2'] = sense_2s
    
    
    # add concreteness
    csv_input = csv_input.join(concreteness_df, how = "left", on = "lemma")
    
    csv_input['conc_bin'] = pd.cut(csv_input['Conc.M'], 
                        bins = [0, 2.3, 4.5, 10], labels = False)
    
    # remove token sense columns
#     csv_input.drop(['token_sense_1'], axis=1)
#     csv_input.drop(['token_sense_2'], axis=1)
    
    #print(csv_input.where(csv_input['Conc.M'].notnull()))
    print(csv_input.head(20))
    csv_input.to_csv(outfile, index=False)
    
    #raise Exception("dewfieow")

In [None]:
# just a little test
lemma_from_string(csv_input.iloc[0].token_sense_1)

In [None]:
"""
and now we make another dataset for each one with the correlations.

"""

def run_correlation(sense_similarities):
    """
    :sense_similarities: dataframe with columns
        cosine_sims 
        wup_sims
    """
    #print(sense_similarities.head())
    
    if sense_similarities is None:
        # not really sure why we're getting none values here it should be impossible
        return (float("nan"), float("nan"), float("nan"), float("nan"))
    elif len(sense_similarities['wup_sim'].unique()) == 1:
        # the correlation will be garbage with a constant y value; skip to avoid warnings
        return (float("nan"), float("nan"), float("nan"), float("nan"))

    if len(sense_similarities) > 1 :
        #print(word)

        cos_sims = sense_similarities['cos_sim']
        wup_sims = sense_similarities['wup_sim']

        pearson, pearson_p = pearsonr(cos_sims, wup_sims )
        #print('Pearsons correlation: %.3f, p-value: %s'  % (pearson, pearson_p))

        spearman, spearman_p = spearmanr(cos_sims, wup_sims )
        #print('Spearmans correlation: %.3f, p-value: %s'  % (spearman, spearman_p))

        return (pearson, pearson_p, spearman, spearman_p)
    return (float("nan"), float("nan"), float("nan"), float("nan"))

def plot_sims():
    cos_sims = sense_similarities['cos_sim']
    wup_sims = sense_similarities['wup_sim']
    plt.scatter(wup_sims, cos_sims)
    plt.title("Wordnet similarity of homonymous senses plotted against cosine similarity of predicted vectors of two tokens in semantic feature space")
    plt.xlabel("Wu and Palmer Similarity")
    plt.ylabel("Cosine Similarity")
    plt.show()
        
# i think this is obsolete 11/16/21
# df = pd.read_csv('semcor_wu_palmer_eval_data.csv', names = ["word_form", "context", "wn_lemma"])



# for save_path in models:
#     print("****************************************")
#     print("*** Evaluating %s model ***" % save_path)
#     print("****************************************")
#     model = torch.load(save_path)
#     run_stats = run_wu_palmer_analysis(model, df, bert)
#     #run_stats = run_correlation(similarities)
#     out_path = 'results/semcor_analysis_' + os.path.split(save_path)[1] + '.csv'
#     run_stats.to_csv(out_path)
#     print(run_stats)

In [204]:
for save_path in models:

    print("****************************************")
    print("*** Running correlation on  %s model ***" % save_path)
    print("****************************************")
    infile = '../results/semcor_pairwise_data_' + os.path.split(save_path)[1] + '.csv'
    outfile = '../results/semcor_analysis_' + os.path.split(save_path)[1] + '.csv'

    out_data = []
    
    names = ['index', 'lemma', 'token_sense_1', 'token_sense_2',  'cos_sim',  'wup_sim',  'n_senses', 'wn_bin', 'sense1_pos', 'sense2_pos',  'Conc.M',  'conc_bin']
    df = pd.read_csv(infile, names=names)
    #print(df.head())

    
    lemmas = df.lemma.unique()
    #print(lemmas[:10])

    for word in lemmas:
        word_data = df[df.lemma == word]
        
        n_senses = len(word_data.token_sense_1.unique())
        
        pearson, pearson_p, spearman, spearman_p = run_correlation(word_data)
        row = (word, len(word_data), n_senses, pearson, pearson_p, spearman, spearman_p)
        #print(corr)
        out_data.append(row)
        
        #raise Exception("hfjesh")
    
    out_df = pd.DataFrame.from_records(out_data, columns = ['word', 'n', 'n_senses', 'pearson', 'pearson_p', 'spearman', 'spearman_p'] )
    out_df.to_csv(outfile)
    
    
    #for word in 
    #run_correlation()
    #cols = ["lemma", "token_sense_1", "token_sense_2", "cos_sim", "wup_sim", "n_senses"]
    #df = csv_input = pd.read_csv(infile, names=cols)
    
    

****************************************
*** Running correlation on  trained_models/model.plsr.mc_rae_real.1k.50components.500max_iters model ***
****************************************
****************************************
*** Running correlation on  trained_models/model.plsr.mc_rae_real.5k.100components.500max_iters model ***
****************************************
****************************************
*** Running correlation on  trained_models/model.ffnn.mc_rae_real.1k.50epochs.0.5dropout.lr1e-4.hsize300 model ***
****************************************
****************************************
*** Running correlation on  trained_models/model.ffnn.mc_rae_real.5k.50epochs.0.5dropout.lr1e-4.hsize300 model ***
****************************************
****************************************
*** Running correlation on  trained_models/model.modabs.mc_rae_real.1k model ***
****************************************
****************************************
*** Running correlation on 