In [1]:
from bert_functions_v2 import analyzeWord, getBert
from createDims import createPolarDimension
from get_data_from_file import create_lookup_from_data_file,create_lookupFiles_out_of_adjectives_list_using_file
import nltk
from nltk.corpus import wordnet as wn
import numpy as np
import pickle
import json
import string
import ast
import torch
import pandas as pd
import re

In [4]:
# helper functions

def get_name(antonym):
    return wn.synset(antonym).lemma_names()[0]

#def get_examples(antonym):
#    examples = wn.synset(antonym).examples()
    # replace punctuation symbols with spaces
#    examples = [sent.translate(str.maketrans({k: " " for k in string.punctuation})) for sent in examples]
    # add a space after each sentence
#    return ['{} '.format(sent) for sent in examples]


def get_examples(antonym):
    antonym= wn.synset(antonym)
    examples=antonym.examples()
    #save only examples that containt the required word
    correct_examples=[]
    for example in examples:
        if re.search(r'\b'+str(antonym.lemma_names()[0])+'\\b', example, re.I) is not None:
            correct_examples.append(example)
    

    
    examples = [sent.translate(str.maketrans({k: " " for k in string.punctuation})) for sent in correct_examples]
    # add a space after each sentence
    return ['{} '.format(sent) for sent in examples]

def create_lookup_files(antonyms, lookup_path):
    if len(np.unique(antonyms, axis=0)) != len(antonyms):
        print("Your antonym list contains duplicates. Please try again!")
        return
    
    # get all word sense definitions
    synset_defs = [[wn.synset(anto).definition() for anto in pair] for pair in antonyms]
    # get example sentences from wordnet
    examples_readable = {str(pair):{get_name(anto): get_examples(anto) for anto in pair} for pair in antonyms}
    examples_lookup = [[[get_name(anto), get_examples(anto)] for anto in pair] for pair in antonyms]
    
    # save 
    with open(out_path + 'lookup_synset_dict.txt', 'w') as t:
        t.write(json.dumps(antonyms, indent=4))
    with open(out_path + 'lookup_synset_dict.pkl', 'wb') as p:
        pickle.dump(antonyms, p)
    with open(lookup_path + 'lookup_synset_definition.txt', 'w') as t:
        t.write(json.dumps(synset_defs, indent=4))  
    with open(lookup_path + 'lookup_synset_definition.pkl', 'wb') as p:
        pickle.dump(synset_defs, p)        
    with open(lookup_path + 'antonym_wordnet_example_sentences_readable_extended.txt', 'w') as t:
        t.write(json.dumps(examples_readable, indent=4))  
    with open(lookup_path + 'lookup_anto_example_dict.txt', 'w') as t:
        t.write(json.dumps(examples_lookup, indent=4))      
    with open(lookup_path + 'lookup_anto_example_dict.pkl', 'wb') as p:
        pickle.dump(examples_lookup, p)
    return


In [5]:
dims=[['nasty.a.01', 'nice.a.01'],
 ['better.a.01', 'worse.a.01'],
 ['better.a.02', 'worse.a.02'],
 ['bad.a.01', 'good.a.01'],
 ['ill.a.01', 'well.a.01'],
 ['ill.r.01', 'well.r.01'],
 ['ill.r.02', 'well.r.06'],
 ['badly.r.07', 'well.r.10'],
 ['badly.r.08', 'well.r.11'],
 ['badly.r.10', 'well.r.13'],
 ['bloodless.a.02', 'bloody.a.01'],
 ['brave.n.02', 'timid.n.01'],
 ['brave.a.01', 'cowardly.a.01'],
 ['busy.a.01', 'idle.a.01'],
 ['cautious.a.01', 'incautious.a.01'],
 ['clean.a.01', 'dirty.a.01'],
 ['clean.a.07', 'dirty.a.04'],
 ['clean.a.08', 'dirty.a.02'],
 ['fairly.r.03', 'unfairly.r.01'],
 ['clear.a.11', 'cloudy.a.02'],
 ['crowded.a.01', 'uncrowded.a.01'],
 ['dead.n.01', 'living.n.02'],
 ['alive.a.01', 'dead.a.01'],
 ['dead.a.02', 'live.a.02'],
 ['different.a.01', 'same.a.02'],
 ['like.a.01', 'unlike.a.01'],
 ['distinct.a.02', 'indistinct.a.01'],
 ['eager.a.01', 'uneager.a.01'],
 ['discouraging.a.01', 'encouraging.a.01'],
 ['cheap.a.01', 'expensive.a.01'],
 ['faithful.a.01', 'unfaithful.a.01'],
 ['faithful.a.03', 'unfaithful.a.02'],
 ['coarse.a.01', 'fine.a.05'],
 ['frail.a.01', 'robust.a.01'],
 ['glorious.a.01', 'inglorious.a.02'],
 ['healthy.a.01', 'unhealthy.a.01'],
 ['reasonably.r.01', 'unreasonably.r.02'],
 ['lucky.a.02', 'unlucky.a.01'],
 ['imperfect.a.01', 'perfect.a.01'],
 ['real.a.01', 'unreal.a.01'],
 ['real.a.02', 'unreal.a.02'],
 ['nominal.a.04', 'real.a.06'],
 ['insubstantial.a.01', 'substantial.a.03'],
 ['poor.a.02', 'rich.a.01'],
 ['poor.a.04', 'rich.a.02'],
 ['lean.a.02', 'rich.a.07'],
 ['poor.a.03', 'rich.a.08'],
 ['glazed.a.03', 'unglazed.a.02'],
 ['intelligent.a.01', 'unintelligent.a.01'],
 ['tender.a.01', 'tough.a.01'],
 ['tender.a.06', 'tough.a.03'],
 ['grateful.a.01', 'ungrateful.a.01'],
 ['rested.a.01', 'tired.a.01'],
 ['beautiful.a.01', 'ugly.a.01'],
 ['right.n.07', 'wrong.n.01'],
 ['correct.a.01', 'incorrect.a.01'],
 ['right.a.04', 'wrong.a.02'],
 ['right.a.05', 'wrong.a.05'],
 ['correctly.r.01', 'incorrectly.r.02'],
 ['attractive.a.01', 'unattractive.a.01'],
 ['attractive.a.03', 'repulsive.a.02'],
 ['bad.n.01', 'good.n.03'],
 ['regretful.a.01', 'unregretful.a.01'],
 ['breakable.a.01', 'unbreakable.a.01'],
 ['calm.a.02', 'stormy.a.01'],
 ['clear.a.01', 'unclear.a.02'],
 ['clear.a.04', 'opaque.a.01'],

 ['comfortable.a.01', 'uncomfortable.a.02'],
 ['comfortable.a.02', 'uncomfortable.a.01'],
 ['dangerous.a.01', 'safe.a.01'],
 ['defeated.a.01', 'undefeated.a.01'],
 ['difficult.a.01', 'easy.a.01'],

 ['quickly.r.01', 'slowly.r.01'],
 ['energetic.a.01', 'lethargic.a.01'],
 ['evil.n.03', 'good.n.02'],
 ['foolish.a.01', 'wise.a.01'],
 ['hungry.a.01', 'thirsty.a.02'],
 ['important.a.01', 'unimportant.a.01'],
 ['guilty.a.01', 'innocent.a.01'],
 ['joyless.a.01', 'joyous.a.01'],
 ['heavy.a.01', 'light.a.01'],
 ['dark.a.02', 'light.a.02'],
 ['heavy.a.03', 'light.a.03'],
 ['heavy.a.02', 'light.a.04'],
 ['heavy.a.04', 'light.a.05'],
 ['dark.a.01', 'light.a.06'],
 ['heavy.a.08', 'light.a.13'],
 ['heavy.a.09', 'light.a.14'],
 ['long.a.01', 'short.a.01'],
 ['long.a.02', 'short.a.02'],
 ['long.a.05', 'short.a.06'],

 ['disobedient.a.01', 'obedient.a.01'],
 ['fancy.a.01', 'plain.a.02'],
 ['confident.a.01', 'diffident.a.02'],
 ['sparkling.a.02', 'still.a.05'],
 ['tame.a.02', 'wild.a.01'],
 ['tame.a.03', 'wild.a.02'],
 ['lax.a.03', 'tense.a.03'],
 ['thoughtful.a.02', 'thoughtless.a.01'],
 ['heedful.a.01', 'heedless.a.01'],
 ['unusual.a.01', 'usual.a.01'],
 ['familiar.a.02', 'strange.a.01'],
 ['black.a.01', 'white.a.01'],
 ['black.a.02', 'white.a.02'],
 ['bright.a.01', 'dull.a.02'],
 ['dimmed.a.01', 'undimmed.a.01'],
 ['careful.a.01', 'careless.a.01'],
 ['cheerful.a.01', 'depressing.a.01'],
 ['colorful.a.02', 'colorless.a.01'],
 ['colored.a.01', 'uncolored.a.01'],
 ['concerned.a.01', 'unconcerned.a.01'],
 ['cooperative.a.02', 'uncooperative.a.01'],
 ['curious.a.02', 'incurious.a.01'],
 ['compliant.a.01', 'defiant.a.01'],
 ['dull.a.01', 'lively.a.01'],
 ['dull.a.06', 'sharp.a.08'],
 ['dull.a.09', 'sharp.a.09'],
 ['dejected.a.01', 'elated.a.01'],
 ['enthusiastic.a.01', 'unenthusiastic.a.01'],
 ['fair.a.01', 'unfair.a.01'],
 ['friendly.a.01', 'unfriendly.a.02'],
 ['friendly.a.03', 'unfriendly.a.01'],
 ['friendly.a.04', 'hostile.a.02'],
 ['happy.a.01', 'unhappy.a.01'],
 ['impossible.a.01', 'possible.a.01'],
 ['kind.a.01', 'unkind.a.01'],
 ['open.a.01', 'shut.a.01'],
 ['closed.a.01', 'open.a.02'],
 ['closed.a.04', 'open.a.05'],
 ['covert.a.01', 'overt.a.01'],
 ['pleasant.a.01', 'unpleasant.a.01'],
 ['humble.a.02', 'proud.a.01'],
 ['foreign.a.02', 'native.a.01'],
 ['troubled.a.01', 'untroubled.a.01'],
 ['interested.a.01', 'uninterested.a.01']]

In [6]:
out_path='all_antonyms/'
create_lookup_files(dims, out_path)

In [7]:
tokenizer, model = getBert()
print("Model imported")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Model imported


In [None]:
createPolarDimension(model, tokenizer, out_path=out_path, antonym_path=out_path + "antonym_wordnet_example_sentences_readable_extended.txt")
print("dimensions created")

Start forwarding the Polar opposites ...
