In [1]:
import time, gensim, logging, os,sys, json
import numpy as np
import pandas as pd
from scipy.spatial import distance
from util import evaluate_word_analogies
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)

First of all, we define some useful functions. 

In [2]:
def rownorm(matrix):
    '''Calculate the row norms of a matrix.'''
    return np.apply_along_axis(np.linalg.norm, 1, matrix)

def gen_pos_neg(text):
    '''Generate postive and negative words given a text containing a miller test'''
    try:
        a,b,c,d = text.split(' ')
    except:
        return np.nan
    if a == '_':
        positives = [b,c]
        negatives = [d]
        return positives, negatives
    elif b == '_':
        positives = [a,d]
        negatives = [c]
        return positives, negatives
    elif c == '_':
        positives = [a,d]
        negatives = [b]
        return positives, negatives
    elif d == '_':
        positives = [b,c]
        negatives = [a]
        return positives, negatives
    else:
        return np.nan

def cosine_distance(positives, negatives, answer):
    '''Calculate the cosine distance given positive and negative words and a potential answer'''
    try:
        pos = [model.wv.vocab[word].index for word in positives]
        neg = [model.wv.vocab[word].index for word in negatives + [answer]]
        pos_vec = model.wv.vectors_norm[pos[0]] + model.wv.vectors_norm[pos[1]]
        neg_vec = model.wv.vectors_norm[neg[0]] + model.wv.vectors_norm[neg[1]]
        cos_dist = distance.cosine(pos_vec, neg_vec)
        return(cos_dist)
    except:
        return(np.nan)

def best_answer(positives, negatives, answers):
    '''Give the best answer given positive and negative words and several potential answers'''
    cos_dists = [cosine_distance(positives, negatives, answer) for answer in answers]
    if np.nan in cos_dists:
        return(np.nan)
    else:
        return(np.argmin(cos_dists))   

Then, we load a word-embedding model. In this case, the model was trained based on Google US-English Ngrams in 2009. 

In [3]:
model = gensim.models.Word2Vec.load('/share/evansshare/sjia/2012/eng-us/models/2009.sg')

INFO:gensim.utils:loading Word2Vec object from /share/evansshare/sjia/2012/eng-us/models/2009.sg
DEBUG:smart_open.smart_open_lib:{'uri': '/share/evansshare/sjia/2012/eng-us/models/2009.sg', 'mode': 'rb', 'buffering': -1, 'encoding': None, 'errors': None, 'newline': None, 'closefd': True, 'opener': None, 'ignore_ext': False, 'transport_params': None}
INFO:gensim.utils:loading wv recursively from /share/evansshare/sjia/2012/eng-us/models/2009.sg.wv.* with mmap=None
INFO:gensim.utils:loading vectors from /share/evansshare/sjia/2012/eng-us/models/2009.sg.wv.vectors.npy with mmap=None
INFO:gensim.utils:setting ignored attribute vectors_norm to None
INFO:gensim.utils:loading vocabulary recursively from /share/evansshare/sjia/2012/eng-us/models/2009.sg.vocabulary.* with mmap=None
INFO:gensim.utils:loading trainables recursively from /share/evansshare/sjia/2012/eng-us/models/2009.sg.trainables.* with mmap=None
INFO:gensim.utils:loading syn1 from /share/evansshare/sjia/2012/eng-us/models/2009.s

Next, we run a standard analogy test. The test file is constructed based on 150 practice Miller analogy questions. Here, we are asking given an analogy pair "A-B ~ C-D," does our work embedding model gives $v_B + v_C - v_A \approx v_D$? We are just using gensim's default test function.    

In [4]:
model.wv.evaluate_word_analogies('/share/evansshare/sjia/analogy/MillerAnalogy.txt')

INFO:gensim.models.keyedvectors:Evaluating word analogies for top 300000 words in the model on /share/evansshare/sjia/analogy/MillerAnalogy.txt
DEBUG:smart_open.smart_open_lib:{'uri': '/share/evansshare/sjia/analogy/MillerAnalogy.txt', 'mode': 'rb', 'buffering': -1, 'encoding': None, 'errors': None, 'newline': None, 'closefd': True, 'opener': None, 'ignore_ext': False, 'transport_params': None}
DEBUG:gensim.models.keyedvectors:Skipping line #1 with OOV words: cellini puccini sculpture opera
INFO:gensim.models.keyedvectors:precomputing L2-norms of word weight vectors
DEBUG:gensim.models.keyedvectors:articulate speech coordinated movement: expected MOVEMENT, predicted CONTRACEPTION
DEBUG:gensim.models.keyedvectors:inception conclusion departure arrival: expected ARRIVAL, predicted CONCLUSIONS
DEBUG:gensim.models.keyedvectors:Skipping line #4 with OOV words: scintillating dullness boisterous calm
DEBUG:gensim.models.keyedvectors:elucidate clarity illuminate light: expected LIGHT, predicte

DEBUG:gensim.models.keyedvectors:Skipping line #77 with OOV words: ream paper bale hay
DEBUG:gensim.models.keyedvectors:Skipping line #78 with OOV words: perforation seal laceration skin
DEBUG:gensim.models.keyedvectors:argon element water compound: expected COMPOUND, predicted LIQUID
DEBUG:gensim.models.keyedvectors:probity guile industry laziness: expected LAZINESS, predicted INDUSTRIES
DEBUG:gensim.models.keyedvectors:turnip root potato stem: expected STEM, predicted SYRUP
INFO:gensim.models.keyedvectors:Skipping invalid line #82 in /share/evansshare/sjia/analogy/MillerAnalogy.txt
DEBUG:gensim.models.keyedvectors:Skipping line #83 with OOV words: amalgamate separate join rend
DEBUG:gensim.models.keyedvectors:Skipping line #84 with OOV words: carouse sedate enthuse lukewarm
DEBUG:gensim.models.keyedvectors:Skipping line #85 with OOV words: zealot passion quack deception
DEBUG:gensim.models.keyedvectors:keynes jung economics psychiatry: expected PSYCHIATRY, predicted SOCIOLOGY
DEBUG:g

(0.012987012987012988,
 [{'section': 'miller-practice-test',
   'correct': [('OBOE', 'FLUTE', 'CELLO', 'VIOLIN')],
   'incorrect': [('ARTICULATE', 'SPEECH', 'COORDINATED', 'MOVEMENT'),
    ('INCEPTION', 'CONCLUSION', 'DEPARTURE', 'ARRIVAL'),
    ('ELUCIDATE', 'CLARITY', 'ILLUMINATE', 'LIGHT'),
    ('PENURY', 'MONEY', 'STARVATION', 'SUSTENANCE'),
    ('ARABLE', 'LAND', 'NAVIGABLE', 'WATERWAY'),
    ('FURNACE', 'SLAG', 'FIRE', 'ASHES'),
    ('17', '19', '31', '37'),
    ('PITCH', 'LOUDNESS', 'WAVELENGTH', 'AMPLITUDE'),
    ('ELECTED', 'INAUGURATION', 'CONDEMNED', 'EXECUTION'),
    ('DIVIDEND', 'STOCKHOLDER', 'ROYALTY', 'AUTHOR'),
    ('ARCHIPELAGO', 'ISLAND', 'CONSTELLATION', 'STAR'),
    ('TWAIN', 'CLEMENS', 'ELIOT', 'EVANS'),
    ('AUTHORITARIAN', 'LENIENT', 'MISER', 'CHARITABLE'),
    ('ALLAY', 'SUSPICION', 'CALM', 'FEAR'),
    ('MENDEL', 'NEWTON', 'GENETICS', 'MECHANICS'),
    ('DIRECTORY', 'ADDRESS', 'DICTIONARY', 'SPELLING'),
    ('FOOT', 'BOOT', 'COUGH', 'BOUGH'),
    ('VIRULENT',

It seems that we only got one right, which is "oboe - flute ~ cello - violin." By default, gensim's test function only compares the fourth word with the best answer given the first three words. We can see that "violin" is indeed the best answer in this case.

In [5]:
model.wv.most_similar(positive = ['flute', 'cello'], negative = ['oboe'])

[('violin', 0.6638264060020447),
 ('clarinet', 0.6103678941726685),
 ('harpsichord', 0.5635097026824951),
 ('softball', 0.5557732582092285),
 ('cricket', 0.554303765296936),
 ('piano', 0.5542600154876709),
 ('bassoon', 0.5472120046615601),
 ('hockey', 0.5445802211761475),
 ('bass', 0.5420377850532532),
 ('sonatas', 0.5394444465637207)]

What if we want to relax the criterion such that a model would pass a given test if the correct answer is among the top 5 best answers? gensim's default test function wouldn't allow you to do that. Here, I slighly modified gensim's source code and wrote a customized fucntion for accomplishing the tast. The fucntion is inside util.py. I've already imported it at the begining of my notebook. By default, the function checks the top 5 best answers.

In [6]:
evaluate_word_analogies(model.wv, '/share/evansshare/sjia/analogy/MillerAnalogy.txt')

INFO:root:Evaluating word analogies for top 300000 words in the model on /share/evansshare/sjia/analogy/MillerAnalogy.txt
DEBUG:smart_open.smart_open_lib:{'uri': '/share/evansshare/sjia/analogy/MillerAnalogy.txt', 'mode': 'rb', 'buffering': -1, 'encoding': None, 'errors': None, 'newline': None, 'closefd': True, 'opener': None, 'ignore_ext': False, 'transport_params': None}
DEBUG:root:Skipping line #1 with OOV words: cellini puccini sculpture opera
DEBUG:root:articulate speech coordinated movement: expected MOVEMENT, predicted CONTRACEPTION
DEBUG:root:articulate speech coordinated movement: expected MOVEMENT, predicted LOBBYING
DEBUG:root:articulate speech coordinated movement: expected MOVEMENT, predicted TYPIFIED
DEBUG:root:articulate speech coordinated movement: expected MOVEMENT, predicted KGB
DEBUG:root:articulate speech coordinated movement: expected MOVEMENT, predicted INITIATING
DEBUG:root:inception conclusion departure arrival: expected ARRIVAL, predicted CONCLUSIONS
DEBUG:root

DEBUG:root:Skipping line #31 with OOV words: boast language swagger gait
DEBUG:root:Skipping line #32 with OOV words: belittle disparage deride ridicule
DEBUG:root:virulent innocuous reprehensible praiseworthy: expected PRAISEWORTHY, predicted HYPNOTIZED
DEBUG:root:virulent innocuous reprehensible praiseworthy: expected PRAISEWORTHY, predicted BOTTLENECK
DEBUG:root:virulent innocuous reprehensible praiseworthy: expected PRAISEWORTHY, predicted UNJUSTIFIED
DEBUG:root:virulent innocuous reprehensible praiseworthy: expected PRAISEWORTHY, predicted CHARADE
DEBUG:root:virulent innocuous reprehensible praiseworthy: expected PRAISEWORTHY, predicted BLUFFING
DEBUG:root:Skipping line #34 with OOV words: rococo minimalist ornate unadorned
DEBUG:root:Skipping line #35 with OOV words: desecrate holy despoil beautiful
DEBUG:root:Skipping line #36 with OOV words: virtuoso accomplished philanthropist generous
DEBUG:root:emit mite evil vile: expected VILE, predicted SINFULNESS
DEBUG:root:emit mite evi

DEBUG:root:cottage mansion pond lake: expected LAKE, predicted BEACHED
DEBUG:root:Skipping line #65 with OOV words: hardy faulkner wessex yoknapatawpha
DEBUG:root:retaliate vengeful conciliate flexible: expected FLEXIBLE, predicted DESPISING
DEBUG:root:retaliate vengeful conciliate flexible: expected FLEXIBLE, predicted HAUGHTINESS
DEBUG:root:retaliate vengeful conciliate flexible: expected FLEXIBLE, predicted THEODORA
DEBUG:root:retaliate vengeful conciliate flexible: expected FLEXIBLE, predicted CHASTITY
DEBUG:root:retaliate vengeful conciliate flexible: expected FLEXIBLE, predicted ENAMOURED
DEBUG:root:Skipping line #67 with OOV words: sporadic continuous tranquil cacophonous
INFO:root:Skipping invalid line #68 in /share/evansshare/sjia/analogy/MillerAnalogy.txt
DEBUG:root:formulate hypothesis deduce theorem: expected THEOREM, predicted SUPPOSITION
DEBUG:root:formulate hypothesis deduce theorem: expected THEOREM, predicted COROLLARY
DEBUG:root:formulate hypothesis deduce theorem: ex

DEBUG:root:stanch flow damp oscillation: expected OSCILLATION, predicted FLOWING
DEBUG:root:stanch flow damp oscillation: expected OSCILLATION, predicted MOIST
DEBUG:root:Skipping line #102 with OOV words: proliferate number dilate size
DEBUG:root:somme waterloo 1916 1815: expected 1815, predicted 1876
DEBUG:root:somme waterloo 1916 1815: expected 1815, predicted 1856
DEBUG:root:somme waterloo 1916 1815: expected 1815, predicted 1882
DEBUG:root:somme waterloo 1916 1815: expected 1815, predicted 1926
DEBUG:root:somme waterloo 1916 1815: expected 1815, predicted 1814
DEBUG:root:corporeal body rational mind: expected MIND, predicted LEARNER
DEBUG:root:corporeal body rational mind: expected MIND, predicted FRAME
DEBUG:root:corporeal body rational mind: expected MIND, predicted ROBOT
DEBUG:root:corporeal body rational mind: expected MIND, predicted REFLECTIVE
DEBUG:root:corporeal body rational mind: expected MIND, predicted INTELLIGENT
DEBUG:root:Skipping line #105 with OOV words: venus min

DEBUG:root:portion dose food drug: expected DRUG, predicted MEAT
DEBUG:root:portion dose food drug: expected DRUG, predicted FOODS
DEBUG:root:portion dose food drug: expected DRUG, predicted CARBOHYDRATE
DEBUG:root:portion dose food drug: expected DRUG, predicted DOSAGE
DEBUG:root:essay thesis article topic: expected TOPIC, predicted PROSPECTUS
DEBUG:root:essay thesis article topic: expected TOPIC, predicted PURDUE
DEBUG:root:essay thesis article topic: expected TOPIC, predicted MIDWESTERN
DEBUG:root:essay thesis article topic: expected TOPIC, predicted DUMA
DEBUG:root:essay thesis article topic: expected TOPIC, predicted AUCKLAND
DEBUG:root:Skipping line #139 with OOV words: musician virtuoso bard poet
DEBUG:root:furtive behavior covert action: expected ACTION, predicted BEHAVIORS
DEBUG:root:furtive behavior covert action: expected ACTION, predicted DISRUPTIVE
DEBUG:root:furtive behavior covert action: expected ACTION, predicted EVALUATIONS
DEBUG:root:furtive behavior covert action: e

(0.05194805194805195,
 [{'section': 'miller-practice-test',
   'correct': [('MONOLOGUE', 'ACTOR', 'SOLO', 'SINGER'),
    ('HYPOTENUSE', 'PERIMETER', 'ARC', 'CIRCUMFERENCE'),
    ('OBOE', 'FLUTE', 'CELLO', 'VIOLIN'),
    ('UBIQUITOUS', 'EVERYWHERE', 'ETERNAL', 'FOREVER')],
   'incorrect': [('ARTICULATE', 'SPEECH', 'COORDINATED', 'MOVEMENT'),
    ('INCEPTION', 'CONCLUSION', 'DEPARTURE', 'ARRIVAL'),
    ('ELUCIDATE', 'CLARITY', 'ILLUMINATE', 'LIGHT'),
    ('PENURY', 'MONEY', 'STARVATION', 'SUSTENANCE'),
    ('ARABLE', 'LAND', 'NAVIGABLE', 'WATERWAY'),
    ('FURNACE', 'SLAG', 'FIRE', 'ASHES'),
    ('17', '19', '31', '37'),
    ('PITCH', 'LOUDNESS', 'WAVELENGTH', 'AMPLITUDE'),
    ('ELECTED', 'INAUGURATION', 'CONDEMNED', 'EXECUTION'),
    ('DIVIDEND', 'STOCKHOLDER', 'ROYALTY', 'AUTHOR'),
    ('ARCHIPELAGO', 'ISLAND', 'CONSTELLATION', 'STAR'),
    ('TWAIN', 'CLEMENS', 'ELIOT', 'EVANS'),
    ('AUTHORITARIAN', 'LENIENT', 'MISER', 'CHARITABLE'),
    ('ALLAY', 'SUSPICION', 'CALM', 'FEAR'),
    (

After relazing the threshold, we get 3 more lines correct. For instance, in our ground truth, "monologue-actor ~ solo-singer." If we check with our word-embedding model, "singer" is the 5th best answer.

In [7]:
model.wv.most_similar(positive = ['actor', 'solo'], negative = ['monologue'])

[('actress', 0.5315026044845581),
 ('amateur', 0.5283392667770386),
 ('player', 0.5044155120849609),
 ('engineer', 0.4954182505607605),
 ('singer', 0.49373847246170044),
 ('sailor', 0.4915875792503357),
 ('playwright', 0.4897642731666565),
 ('artist', 0.48775380849838257),
 ('swordsman', 0.48742973804473877),
 ('musician', 0.4852411150932312)]

My customized function also allows us to further reduce the difficulty. We can ask it to consider top 10 best answers. And we'll get two more lines correct. 

In [8]:
evaluate_word_analogies(model.wv, '/share/evansshare/sjia/analogy/MillerAnalogy.txt', topn = 10)

INFO:root:Evaluating word analogies for top 300000 words in the model on /share/evansshare/sjia/analogy/MillerAnalogy.txt
DEBUG:smart_open.smart_open_lib:{'uri': '/share/evansshare/sjia/analogy/MillerAnalogy.txt', 'mode': 'rb', 'buffering': -1, 'encoding': None, 'errors': None, 'newline': None, 'closefd': True, 'opener': None, 'ignore_ext': False, 'transport_params': None}
DEBUG:root:Skipping line #1 with OOV words: cellini puccini sculpture opera
DEBUG:root:articulate speech coordinated movement: expected MOVEMENT, predicted CONTRACEPTION
DEBUG:root:articulate speech coordinated movement: expected MOVEMENT, predicted LOBBYING
DEBUG:root:articulate speech coordinated movement: expected MOVEMENT, predicted TYPIFIED
DEBUG:root:articulate speech coordinated movement: expected MOVEMENT, predicted KGB
DEBUG:root:articulate speech coordinated movement: expected MOVEMENT, predicted INITIATING
DEBUG:root:articulate speech coordinated movement: expected MOVEMENT, predicted VALIDATED
DEBUG:root:

DEBUG:root:dividend stockholder royalty author: expected AUTHOR, predicted SEISIN
DEBUG:root:dividend stockholder royalty author: expected AUTHOR, predicted WRONGDOING
DEBUG:root:dividend stockholder royalty author: expected AUTHOR, predicted PEDANT
DEBUG:root:dividend stockholder royalty author: expected AUTHOR, predicted ENCUMBRANCE
DEBUG:root:dividend stockholder royalty author: expected AUTHOR, predicted ARTIFICER
DEBUG:root:dividend stockholder royalty author: expected AUTHOR, predicted RESPECTABILITY
DEBUG:root:dividend stockholder royalty author: expected AUTHOR, predicted CENSURES
DEBUG:root:dividend stockholder royalty author: expected AUTHOR, predicted PERVERT
DEBUG:root:dividend stockholder royalty author: expected AUTHOR, predicted FUNCTIONARY
DEBUG:root:archipelago island constellation star: expected STAR, predicted HILLOCK
DEBUG:root:archipelago island constellation star: expected STAR, predicted SUPERIMPOSED
DEBUG:root:archipelago island constellation star: expected STAR

DEBUG:root:Skipping line #36 with OOV words: virtuoso accomplished philanthropist generous
DEBUG:root:emit mite evil vile: expected VILE, predicted SINFULNESS
DEBUG:root:emit mite evil vile: expected VILE, predicted THRALL
DEBUG:root:emit mite evil vile: expected VILE, predicted SHARER
DEBUG:root:emit mite evil vile: expected VILE, predicted VIRGINITY
DEBUG:root:emit mite evil vile: expected VILE, predicted MESSIAHSHIP
DEBUG:root:emit mite evil vile: expected VILE, predicted WORTHIEST
DEBUG:root:emit mite evil vile: expected VILE, predicted FRAILTY
DEBUG:root:emit mite evil vile: expected VILE, predicted COVETOUS
DEBUG:root:emit mite evil vile: expected VILE, predicted LUSTS
DEBUG:root:emit mite evil vile: expected VILE, predicted DELIGHTETH
DEBUG:root:homogeneous kind contemporary time: expected TIME, predicted SORT
DEBUG:root:homogeneous kind contemporary time: expected TIME, predicted CURIOSITIES
DEBUG:root:homogeneous kind contemporary time: expected TIME, predicted REFLECTIONS
DEB

INFO:root:Skipping invalid line #51 in /share/evansshare/sjia/analogy/MillerAnalogy.txt
DEBUG:root:diffident confidence unstable balance: expected BALANCE, predicted PRECISION
DEBUG:root:diffident confidence unstable balance: expected BALANCE, predicted IMPARTIALITY
DEBUG:root:diffident confidence unstable balance: expected BALANCE, predicted EFFICACIOUS
DEBUG:root:diffident confidence unstable balance: expected BALANCE, predicted PROBLEMATIC
DEBUG:root:diffident confidence unstable balance: expected BALANCE, predicted EFFICIENCY
DEBUG:root:diffident confidence unstable balance: expected BALANCE, predicted DISSATISFACTION
DEBUG:root:diffident confidence unstable balance: expected BALANCE, predicted DISRUPTIVE
DEBUG:root:diffident confidence unstable balance: expected BALANCE, predicted ECONOMICAL
DEBUG:root:diffident confidence unstable balance: expected BALANCE, predicted INTUITIVE
DEBUG:root:diffident confidence unstable balance: expected BALANCE, predicted FLEXIBILITY
DEBUG:root:Ski

DEBUG:root:hypotenuse perimeter arc circumference: expected CIRCUMFERENCE, predicted INTERSECTS
DEBUG:root:hypotenuse perimeter arc circumference: expected CIRCUMFERENCE, predicted CENTROID
DEBUG:root:frown displeasure fidget restlessness: expected RESTLESSNESS, predicted GRANTEE
DEBUG:root:frown displeasure fidget restlessness: expected RESTLESSNESS, predicted DEROGATION
DEBUG:root:frown displeasure fidget restlessness: expected RESTLESSNESS, predicted FAVOURITES
DEBUG:root:frown displeasure fidget restlessness: expected RESTLESSNESS, predicted HESITANCY
DEBUG:root:frown displeasure fidget restlessness: expected RESTLESSNESS, predicted RULINGS
DEBUG:root:frown displeasure fidget restlessness: expected RESTLESSNESS, predicted SUZERAIN
DEBUG:root:frown displeasure fidget restlessness: expected RESTLESSNESS, predicted REPARATION
DEBUG:root:frown displeasure fidget restlessness: expected RESTLESSNESS, predicted DICTATION
DEBUG:root:frown displeasure fidget restlessness: expected RESTLESSN

DEBUG:root:partisan cause sectarian sect: expected SECT, predicted CAUSES
DEBUG:root:partisan cause sectarian sect: expected SECT, predicted LAWFULNESS
DEBUG:root:partisan cause sectarian sect: expected SECT, predicted PERSUASIONS
DEBUG:root:partisan cause sectarian sect: expected SECT, predicted RINGLEADERS
DEBUG:root:partisan cause sectarian sect: expected SECT, predicted INSENSIBILITY
DEBUG:root:partisan cause sectarian sect: expected SECT, predicted LAWLESSNESS
DEBUG:root:partisan cause sectarian sect: expected SECT, predicted REDOUND
DEBUG:root:partisan cause sectarian sect: expected SECT, predicted SUZERAIN
DEBUG:root:partisan cause sectarian sect: expected SECT, predicted THEBANS
DEBUG:root:partisan cause sectarian sect: expected SECT, predicted RESISTS
DEBUG:root:Skipping line #97 with OOV words: calorie joule inch meter
DEBUG:root:peerless surpassed indisputable challenged: expected CHALLENGED, predicted UNDENIABLE
DEBUG:root:peerless surpassed indisputable challenged: expecte

DEBUG:root:incarceration prison sequestration isolation: expected ISOLATION, predicted WORKHOUSE
DEBUG:root:incarceration prison sequestration isolation: expected ISOLATION, predicted SOHO
DEBUG:root:incarceration prison sequestration isolation: expected ISOLATION, predicted PATROCLUS
DEBUG:root:incarceration prison sequestration isolation: expected ISOLATION, predicted CONSULSHIP
DEBUG:root:incarceration prison sequestration isolation: expected ISOLATION, predicted CONVENT
DEBUG:root:Skipping line #115 with OOV words: ingrate gratitude profligate frugality
DEBUG:root:Skipping line #116 with OOV words: hypocritical sincere flustered composed
DEBUG:root:Skipping line #117 with OOV words: malinger work shirk obligation
DEBUG:root:ubiquitous everywhere eternal forever: expected FOREVER, predicted EVERLASTING
DEBUG:root:ubiquitous everywhere eternal forever: expected FOREVER, predicted HEAVEN
DEBUG:root:ubiquitous everywhere eternal forever: expected FOREVER, predicted RIGHTEOUSNESS
DEBUG:

DEBUG:root:criteria data criterion datum: expected DATUM, predicted ASCII
DEBUG:root:criteria data criterion datum: expected DATUM, predicted REAGENT
DEBUG:root:Skipping line #132 with OOV words: bellicose aggression pliant tractability
DEBUG:root:choleric anger sanguine optimism: expected OPTIMISM, predicted RESENTMENT
DEBUG:root:choleric anger sanguine optimism: expected OPTIMISM, predicted SADNESS
DEBUG:root:choleric anger sanguine optimism: expected OPTIMISM, predicted EMOTION
DEBUG:root:choleric anger sanguine optimism: expected OPTIMISM, predicted EXCITEMENT
DEBUG:root:choleric anger sanguine optimism: expected OPTIMISM, predicted DISAPPOINTMENT
DEBUG:root:choleric anger sanguine optimism: expected OPTIMISM, predicted ANGUISH
DEBUG:root:choleric anger sanguine optimism: expected OPTIMISM, predicted INTENSE
DEBUG:root:choleric anger sanguine optimism: expected OPTIMISM, predicted FEELINGS
DEBUG:root:choleric anger sanguine optimism: expected OPTIMISM, predicted GRIEF
DEBUG:root:ch

(0.07792207792207792,
 [{'section': 'miller-practice-test',
   'correct': [('SULFUR', 'IODINE', 'YELLOW', 'PURPLE'),
    ('STANZA', 'POEM', 'CHAPTER', 'NOVEL'),
    ('MONOLOGUE', 'ACTOR', 'SOLO', 'SINGER'),
    ('HYPOTENUSE', 'PERIMETER', 'ARC', 'CIRCUMFERENCE'),
    ('OBOE', 'FLUTE', 'CELLO', 'VIOLIN'),
    ('UBIQUITOUS', 'EVERYWHERE', 'ETERNAL', 'FOREVER')],
   'incorrect': [('ARTICULATE', 'SPEECH', 'COORDINATED', 'MOVEMENT'),
    ('INCEPTION', 'CONCLUSION', 'DEPARTURE', 'ARRIVAL'),
    ('ELUCIDATE', 'CLARITY', 'ILLUMINATE', 'LIGHT'),
    ('PENURY', 'MONEY', 'STARVATION', 'SUSTENANCE'),
    ('ARABLE', 'LAND', 'NAVIGABLE', 'WATERWAY'),
    ('FURNACE', 'SLAG', 'FIRE', 'ASHES'),
    ('17', '19', '31', '37'),
    ('PITCH', 'LOUDNESS', 'WAVELENGTH', 'AMPLITUDE'),
    ('ELECTED', 'INAUGURATION', 'CONDEMNED', 'EXECUTION'),
    ('DIVIDEND', 'STOCKHOLDER', 'ROYALTY', 'AUTHOR'),
    ('ARCHIPELAGO', 'ISLAND', 'CONSTELLATION', 'STAR'),
    ('TWAIN', 'CLEMENS', 'ELIOT', 'EVANS'),
    ('AUTHORITAR

But the original Miller tests are multiple-choice questions. To make the test fair, we can also ask multiple-choice questions to our word-embedding model. 

In [9]:
fulltest = pd.read_csv('Miller/PracticeTests150.csv')

Our full tests look like this:

In [10]:
fulltest

Unnamed: 0,text,A,B,C,D,correct_answer
0,_ puccini sculpture opera,cellini,rembrandt,wagner,petrarch,0
1,_ speech coordinated movement,predictive,rapid,prophetic,articulate,3
2,inception conclusion _ arrival,upshot,culmination,departure,escapade,2
3,scintillating dullness _ calm,erudite,boisterous,cautious,exalted,1
4,elucidate clarity illuminate _,memory,problem,oblivion,light,3
...,...,...,...,...,...,...
145,fetter _ gag speech,leg,movement,catacomb,law breaking,1
146,_ deadlock quandary dilemma,moratorium,impasse,exegesis,paradigm,1
147,catharsis emotion absolution _,malady,innocence,guilt,mourning,2
148,lyell dalton geology _,biology,chemistry,sociology,music,1


In the original tests, the missing word is represented as '_'. We can parse each text and determine the positive and negative words.

In [11]:
fulltest['test'] = fulltest['text'].apply(lambda x: gen_pos_neg(x))

Some original tests contain phrases. I haven't implemented a function for testing phrases. So, I'll skip those lines. 

In [12]:
validtest = fulltest[-fulltest.test.isna()].copy()

Then we can use a function I defined at the begining of the notebook to yield the best answer given all choices and also see wether the best answer is accurate. 

In [13]:
validtest['best_answer'] = validtest.apply(lambda row: best_answer(row['test'][0], row['test'][1], row[['A','B','C','D']].tolist()), axis = 1)

In [14]:
validtest['accurate'] = validtest.best_answer == validtest['correct_answer']

My fucntion is not able to generate an answer if any of the words in a line is not in the vocab of my word-embedding model. So, let' focus on rows with an answer.  

In [15]:
validtest[-validtest.best_answer.isna()]

Unnamed: 0,text,A,B,C,D,correct_answer,test,best_answer,accurate
1,_ speech coordinated movement,predictive,rapid,prophetic,articulate,3,"([speech, coordinated], [movement])",3.0,True
4,elucidate clarity illuminate _,memory,problem,oblivion,light,3,"([clarity, illuminate], [elucidate])",3.0,True
7,penury money starvation _,sustenance,infirmity,illness,care,0,"([money, starvation], [penury])",0.0,True
8,arable land _ waterway,impenetrable,navigable,fertile,shallow,1,"([arable, waterway], [land])",0.0,False
11,17 19 _ 37,39,36,34,31,3,"([17, 37], [19])",0.0,False
14,pitch _ wavelength amplitude,sound,timbre,loudness,color,2,"([pitch, amplitude], [wavelength])",1.0,False
15,elected _ condemned execution,graduation,inauguration,dismissal,exhibition,1,"([elected, execution], [condemned])",1.0,True
16,dividend stockholder _ author,patent,royalty,wage,interest,1,"([dividend, author], [stockholder])",1.0,True
17,archipelago island constellation _,hamlet,zodiac,sea,star,3,"([island, constellation], [archipelago])",1.0,False
20,twain clemens eliot _,george,mary,bronte,evans,3,"([clemens, eliot], [twain])",3.0,True


An the averate accuracy is:

In [16]:
validtest[-validtest.best_answer.isna()].accurate.mean()

0.4117647058823529