In [65]:
# Import the necessary libraries/modules
import gensim
import cardgen as cg
import gs_probdist as gspd
import pandas as pd

In [3]:
# Load the pre-trained embeddings
model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary=True)

In [4]:
# Prepare the probability distribution.
goldstd_data = gspd.read_in_categorised()
category_prob_dist = gspd.freq_dist_to_prob_dist(goldstd_data)

In [5]:
def gen_many_cards(list_of_mws, prob_dist, model):
    """
    Calls card_generator() for every main word in the input list.
    
    Args:
        list_of_mws: a list of strings, each representing a main word to generate.
        prob_dist: probability distribution of categories, output of freq_dist_to_prob_dist()
        model: gensim word2vec embeddings model
    Returns:
        A dictionary where each key is a MW and each dictionary is a list of TWs.
    """
    
    # Initialise new dictionary to collect the cards as we generate them.
    card_dict = dict()
    
    # Go through all MWs in the input list, getting their card and adding it to the dictionary.
    for mw in list_of_mws:
        card = cg.card_generator(mw, prob_dist, model)
        card_dict[mw] = card[mw]
        
    return card_dict

In [68]:
mws = ['coffee','cake','uncle','mother','book','nose','summer','heart','baby','pencil','flower','happy','house','love','wife','toilet','glasses','shoe','hello','game']
dict_of_cards = gen_many_cards(mws, category_prob_dist, model)
dict_of_cards

{'coffee': ['chocolate', 'cappuccino', 'java', 'espresso', 'latté'],
 'cake': ['dessert', 'biscuit', 'frosting', 'brownie', 'pastry'],
 'uncle': ['cousin', 'grandfather', 'father', 'nephew', 'brother'],
 'mother': ['beget', 'granddaughter', 'daughter', 'aunt', 'son'],
 'book': ['koran', 'memoir', 'autobiography', 'hardback', 'paperback'],
 'nose': ['forehead', 'finger', 'schnozz', 'cheekbone', 'schnoz'],
 'summer': ['week', 'weekend', 'autumn', 'springtime', 'month'],
 'heart': ['center', 'content', 'coronary', 'arrhythmia', 'artery'],
 'baby': ['girl', 'newborn', 'infant', 'triplet', 'toddler'],
 'pencil': ['notepad', 'Crayolas', 'crayon', 'ballpoint', 'eraser'],
 'flower': ['bloom', 'angiosperm', 'orchid', 'tulip', 'blossom'],
 'happy': ['ecstatic', 'disappointed', 'excited', 'glad', 'delighted'],
 'house': ['firm', 'accommodate', 'bungalow', 'residence', 'mansion'],
 'love': ['sleep_together', 'care_for', 'affection', 'passion', 'cherish'],
 'wife': ['woman', 'fiancee', 'girlfriend'

In [71]:
# create csv file of dict_of_cards for later evaluation

df = pd.DataFrame({key: pd.Series(value) for key, value in dict_of_cards.items()})
df.to_csv("evaluation_cards.csv", encoding='utf-8', index=False)

In [72]:
# show csv file

eval_cards = pd.read_csv("evaluation_cards.csv")
eval_cards

Unnamed: 0,coffee,cake,uncle,mother,book,nose,summer,heart,baby,pencil,flower,happy,house,love,wife,toilet,glasses,shoe,hello,game
0,chocolate,dessert,cousin,beget,koran,forehead,week,center,girl,notepad,bloom,ecstatic,firm,sleep_together,woman,gutter,field_glass,slipper,greeting,plot
1,cappuccino,biscuit,grandfather,granddaughter,memoir,finger,weekend,content,newborn,Crayolas,angiosperm,disappointed,accommodate,care_for,fiancee,plumbing_fixture,sunglass,sandal,goodbye,conspiracy
2,java,frosting,father,daughter,autobiography,schnozz,autumn,coronary,infant,crayon,orchid,excited,bungalow,affection,girlfriend,commode,trifocals,footwear,greet,matchup
3,espresso,brownie,nephew,aunt,hardback,cheekbone,springtime,arrhythmia,triplet,ballpoint,tulip,glad,residence,passion,fiancée,loo,spectacle,sneaker,goodnight,opener
4,latté,pastry,brother,son,paperback,schnoz,month,artery,toddler,eraser,blossom,delighted,mansion,cherish,husband,restroom,eyeglass,footware,g'day,tournament
