In [1]:
from gensim.models import word2vec

sentences = word2vec.LineSentence('groceries.txt')

In [2]:
model = word2vec.Word2Vec(sentences, iter = 500, min_count = 1, sg = 1)

In [3]:
model.wv.most_similar(['pork'])

[('onions', 0.35001248121261597),
 ('turkey', 0.34213584661483765),
 ('yogurt', 0.3391532003879547),
 ('butter', 0.32306376099586487),
 ('sausage', 0.3135703206062317),
 ('chicken', 0.3089650571346283),
 ('beef', 0.30136048793792725),
 ('curd', 0.2895019054412842),
 ('citrus_fruit', 0.2771354913711548),
 ('root_vegetables', 0.27564483880996704)]

In [4]:
model.wv.most_similar('beef')

[('butter', 0.4024900794029236),
 ('onions', 0.3838678002357483),
 ('pip_fruit', 0.37157630920410156),
 ('meat', 0.33756184577941895),
 ('other_vegetables', 0.32065755128860474),
 ('chicken', 0.316648006439209),
 ('tropical_fruit', 0.3147188425064087),
 ('yogurt', 0.3107911944389343),
 ('whole_milk', 0.30543917417526245),
 ('pork', 0.301360547542572)]

In [5]:
model.predict_output_word(['bottled_beer', 'red/blush_wine'])

[('liquor', 0.076620705),
 ('prosecco', 0.030791236),
 ('liquor_(appetizer)', 0.027123762),
 ('sparkling_wine', 0.02171228),
 ('tea', 0.019144028),
 ('rum', 0.018046917),
 ('brandy', 0.017771726),
 ('white_wine', 0.015121205),
 ('nut_snack', 0.014966749),
 ('canned_fish', 0.014825281)]

In [6]:
model.predict_output_word(['hamburger_meat', 'soda'])

[('Instant_food_products', 0.022627866),
 ('pasta', 0.018009944),
 ('canned_vegetables', 0.01685342),
 ('mayonnaise', 0.015356236),
 ('meat_spreads', 0.015302317),
 ('sweet_spreads', 0.0151319755),
 ('sauces', 0.013697033),
 ('spread_cheese', 0.012117581),
 ('dish_cleaner', 0.011641624),
 ('frozen_potato_products', 0.011515584)]

In [7]:
model.predict_output_word(['ham', 'white_bread'])

[('processed_cheese', 0.039820198),
 ('frozen_potato_products', 0.019321121),
 ('sweet_spreads', 0.018111335),
 ('spread_cheese', 0.01726797),
 ('potato_products', 0.016594684),
 ('soft_cheese', 0.014807216),
 ('hard_cheese', 0.014403561),
 ('specialty_cheese', 0.013614273),
 ('sound_storage_medium', 0.013440796),
 ('condensed_milk', 0.01304468)]

In [8]:
model.predict_output_word(['root_vegetables', 'other_vegetables', 'whole_milk', 'yogurt'])

[('herbs', 0.015105391),
 ('turkey', 0.014365919),
 ('rice', 0.01316431),
 ('frozen_fish', 0.0130173275),
 ('specialty_cheese', 0.012956117),
 ('mayonnaise', 0.012815042),
 ('roll_products_', 0.0125050405),
 ('soft_cheese', 0.012039127),
 ('packaged_fruit/vegetables', 0.011681836),
 ('onions', 0.010940962)]

In [9]:
model.predict_output_word(['curd', 'sugar'])

[('flour', 0.025570681),
 ('pudding_powder', 0.02354227),
 ('roll_products_', 0.01748644),
 ('cereals', 0.015268401),
 ('baking_powder', 0.015201763),
 ('salt', 0.014872063),
 ('frozen_dessert', 0.014219358),
 ('rice', 0.013953114),
 ('curd_cheese', 0.013767071),
 ('jam', 0.013077002)]

In [10]:
model.predict_output_word(['soda', 'salty_snack'])

[('popcorn', 0.02322538),
 ('nut_snack', 0.019715898),
 ('make_up_remover', 0.013758556),
 ('cake_bar', 0.013404904),
 ('canned_vegetables', 0.012790451),
 ('instant_coffee', 0.012667293),
 ('kitchen_towels', 0.012493766),
 ('tidbits', 0.011753058),
 ('sweet_spreads', 0.011741863),
 ('Instant_food_products', 0.011501943)]

In [11]:
model.predict_output_word(['sugar', 'baking_powder'])

[('flour', 0.030803043),
 ('pudding_powder', 0.026207035),
 ('rice', 0.022308681),
 ('vinegar', 0.01965483),
 ('specialty_fat', 0.01958772),
 ('soups', 0.017477596),
 ('cocoa_drinks', 0.016962666),
 ('cooking_chocolate', 0.01643515),
 ('meat_spreads', 0.015689712),
 ('salt', 0.0156014785)]

In [12]:
from collections import Counter
import itertools

tri_counter = Counter([c for ws in sentences for c in itertools.combinations(sorted(ws), 3)])

In [13]:
pairs = itertools.combinations(model.wv.vocab.keys(), 2)

sorted([
    (p, item, prob) for p in pairs for item, prob in model.predict_output_word(p)
    if prob >= 0.02 and tri_counter[tuple(sorted([p[0], p[1], item]))] >= 10
], key = lambda x: -x[2])


[(('bottled_beer', 'red/blush_wine'), 'liquor', 0.076620705),
 (('bottled_beer', 'liquor'), 'red/blush_wine', 0.0712179),
 (('white_bread', 'ham'), 'processed_cheese', 0.039820198),
 (('red/blush_wine', 'liquor'), 'bottled_beer', 0.031292748),
 (('sugar', 'baking_powder'), 'flour', 0.030803043),
 (('sugar', 'whipped/sour_cream'), 'flour', 0.029322423),
 (('margarine', 'sugar'), 'flour', 0.027827),
 (('beef', 'root_vegetables'), 'herbs', 0.02740662),
 (('curd', 'sugar'), 'flour', 0.025570681),
 (('flour', 'sugar'), 'baking_powder', 0.025403246),
 (('tropical_fruit', 'root_vegetables'), 'turkey', 0.025329975),
 (('whole_milk', 'ham'), 'processed_cheese', 0.024535457),
 (('rolls/buns', 'hamburger_meat'), 'Instant_food_products', 0.02427808),
 (('flour', 'baking_powder'), 'sugar', 0.023779714),
 (('tropical_fruit', 'white_bread'), 'processed_cheese', 0.023528077),
 (('sugar', 'root_vegetables'), 'flour', 0.023394365),
 (('soda', 'salty_snack'), 'popcorn', 0.02322538),
 (('whole_milk', 'sug