In [1]:
import pandas as pd
import numpy as np
import tqdm
import json
import networkx as nx
import matplotlib.pyplot as plt
import collections

### load data

In [2]:
cleaned_recipes = json.load(open("./../generated/high_score_repr_recipes.json"))
repr_per_id = json.load(open("./../generated/high_score_key_representative.json"))

In [3]:
cleaned_recipes[0]

['usda_id=11251',
 'black olife',
 'grape tomato',
 'usda_id=11215',
 'usda_id=11981',
 'purple onion',
 'usda_id=2075',
 'usda_id=16056',
 'usda_id=1019']

#### create graph

In [4]:
def name_ingredients(recipe) :
    
    named_ingredients = []
    
    for r in recipe :
        if r[:8] == "usda_id=" :
            named_ingredients.append(repr_per_id[r[8:]])
            
        else :
            named_ingredients.append(r)
            
    return named_ingredients
            

def ingredients_tuples(recipe) :
    
    tuples = []
    for ing1 in range(len(recipe) - 1) :
        for ing2 in range(ing1 + 1, len(recipe)) :
            if recipe[ing1] < recipe[ing2] :
                tuples.append((recipe[ing1], recipe[ing2]))
                
    return tuples
                
asso_counter = collections.Counter()
for recipe in tqdm.tqdm(cleaned_recipes) :
        asso_counter.update(ingredients_tuples(name_ingredients(recipe)))
            

100%|██████████| 96529/96529 [00:01<00:00, 50814.75it/s]


In [5]:
asso_counter.most_common()[271299]

(('buckwheat flour', 'salt & pepper'), 1)

In [6]:
ing_graph = nx.Graph()

for item in asso_counter.most_common() :
    ing_graph.add_edge(item[0][0],item[0][1],weight=item[1])

In [7]:
len(ing_graph.nodes)

4618

In [8]:
def nb_connections(from_ing, to_ing) :
    try :
        return ing_graph[from_ing][to_ing]['weight']
    except :
        return 0
    
def nb_asso(ing) :
    return sum([ing_graph[ing][c]['weight'] for c in ing_graph[ing]])
    
def nb_outgoing_edges(ing) :
    try :
        return ing_graph.degree[ing]
    except :
        return 0
    
def max_association(ing) :
    return max([ing_graph[ing][c]['weight'] for c in ing_graph[ing]])


#compute number of associations dictionnary
nb_assos = dict()
for ing in tqdm.tqdm(ing_graph.nodes) : 
    nb_assos[ing] = nb_asso(ing)


100%|██████████| 4618/4618 [00:00<00:00, 7558.92it/s]


#### define compatibility

In [9]:
def friendship(ing1, ing2) :
    ing1_likes_ing2 = (nb_connections(ing1, ing2) / nb_assos[ing1])
    ing2_likes_ing1 = (nb_connections(ing2, ing1) / nb_assos[ing2])
    friendship     =  max(ing1_likes_ing2, ing2_likes_ing1)
    return friendship


def recipe_compatibility(ingredient, recipe) :
    if ingredient in recipe : 
        return 0    
    else :         
        return sum([friendship(ingredient, ing) for ing in recipe]) / len(recipe)
    
def swap_candidates(replace_index, recipe) :
    if (replace_index < 0) or (replace_index >= len(recipe)) :
        raise AttributeError("replace")
    

#save compatibilities
compatibilities = dict()
for ing1 in tqdm.tqdm(ing_graph.nodes) :    
    per_ing = dict()
    
    for ing2 in ing_graph.nodes :
        per_ing[ing2] = friendship(ing1, ing2)
        
    compatibilities[ing1] = per_ing
        
json.dump(compatibilities, open("./../generated/high_score_ing_friendship.json", 'w'))

100%|██████████| 4618/4618 [01:01<00:00, 74.59it/s]


In [13]:
friendships = json.load(open("./../generated/high_score_ing_friendship.json"))

In [11]:
friendship("egg", "salt")

0.06702144475472892