## Files used in bertFuncs.py

In [None]:
from bertFuncs import analyzeWord, getBert
from createDims import createPolarDimension
import nltk
from nltk.corpus import wordnet as wn
import numpy as np
import pickle
import json
import string
import ast
import torch

## Creating the required lookup files

The lookup files are needed to set up the POLAR dimensions and to match these dimensions to word sense definitions and example sentences, when analyzing the result.

The function ``create_lookup_files`` takes a list of lists as input. Each inner list contains a polar sense pair, where each word sense must be in WordNet readable format e.g. ``cold.a.01``), in orderder to automatically retrieve definitions and example sentences. All lookup files will be stored in the folder ``lookup_path``. 

In [None]:
# helper functions

def get_name(antonym):
    return wn.synset(antonym).lemma_names()[0]

def get_examples(antonym):
    examples = wn.synset(antonym).examples()
    # replace punctuation symbols with spaces
    examples = [sent.translate(str.maketrans({k: " " for k in string.punctuation})) for sent in examples]
    # add a space after each sentence
    return ['{} '.format(sent) for sent in examples]

In [None]:
def create_lookup_files(antonyms, lookup_path):
    if len(np.unique(antonyms, axis=0)) != len(antonyms):
        print("Your antonym list contains duplicates. Please try again!")
        return
    
    # get all word sense definitions
    synset_defs = [[wn.synset(anto).definition() for anto in pair] for pair in antonyms]
    # get example sentences from wordnet
    examples_readable = {str(pair):{get_name(anto): get_examples(anto) for anto in pair} for pair in antonyms}
    examples_lookup = [[[get_name(anto), get_examples(anto)] for anto in pair] for pair in antonyms]
    
    # save 
    with open(out_path + 'lookup_synset_dict.txt', 'w') as t:
        t.write(json.dumps(antonyms, indent=4))
    with open(out_path + 'lookup_synset_dict.pkl', 'wb') as p:
        pickle.dump(antonyms, p)
    with open(lookup_path + 'lookup_synset_definition.txt', 'w') as t:
        t.write(json.dumps(synset_defs, indent=4))  
    with open(lookup_path + 'lookup_synset_definition.pkl', 'wb') as p:
        pickle.dump(synset_defs, p)        
    with open(lookup_path + 'antonym_wordnet_example_sentences_readable_extended.txt', 'w') as t:
        t.write(json.dumps(examples_readable, indent=4))  
    with open(lookup_path + 'lookup_anto_example_dict.txt', 'w') as t:
        t.write(json.dumps(examples_lookup, indent=4))      
    with open(lookup_path + 'lookup_anto_example_dict.pkl', 'wb') as p:
        pickle.dump(examples_lookup, p)
    return


## Example usage

Polar dimensions should be __given__ as nested list of antonym pairs in wordnet representation (sense-annotated).

_Example:_   
`
[
    ['a_posteriori.a.01', 'a_priori.a.01'],
    ['abaxial.a.01', 'adaxial.a.01'],
    ['abridge.v.01', 'elaborate.v.01'],
    ...
]`

In [None]:
# folder in which all lookup files will be stored
out_path = 'antonyms/example/'

In [None]:
# define 3 exemplary POLAR dimensions
dims = [['cold.a.01', 'hot.a.01'], ['bad.a.01', 'good.a.01'], ['intelligent.a.01', 'unintelligent.a.01'], ['capable.a.01', 'incapable.a.01']]
    
# create all lookup files
create_lookup_files(dims, out_path)

In [None]:
# get the embedding model 
tokenizer, model = getBert()

Create the POLAR matrix (for base change or projection) from a given set of antonyms. The antonyms and their example sentences are forwarded to an embedding model (here: BERT) from which the required embeddings and difference vectors are created. ``antonym_path`` specifies where the readable example sentence lookup file is currently stored. ``out_path`` specifies where the POLAR matrix should be stored.

The corresponding function can be found in ``createDims.py`` which is not part of the official SensePOLAR repo.

In [None]:
# create the base change matrix (this might take some time)
createPolarDimension(model, tokenizer, out_path=out_path, antonym_path=out_path + "antonym_wordnet_example_sentences_readable_extended.txt")

In [None]:
# base change does not work well with only few dimensions -> compare with projection
antonym_path = out_path + "polar_dimensions.pkl"
word = "school"
context = "school teaches you a lot of smart things"
analyzeWord(word, context, model=model,tokenizer=tokenizer, antonym_path=antonym_path, lookup_path=out_path, numberPolar=4) #method="projection"

In [None]:
antonym_path = out_path + "polar_dimensions.pkl"
word = "fire"
context = "the fire is burning"

analyzeWord(word, context, model=model, tokenizer=tokenizer, antonym_path=antonym_path, lookup_path=out_path, numberPolar=4, method="projection")