In [1]:
import pandas as pd
import numpy as np
import os
if os.path.basename(os.getcwd()) != 'food-pairing':
    os.chdir(os.path.dirname(os.getcwd()))

from utils.data_loading import read_foods, read_molecules

from itertools import combinations
import networkx as nx
from tqdm import tqdm

from collections import Counter
from more_itertools import collapse

In [10]:
def merge_duplicate_keys(input_dict):
    merged_dict = {}
    for key, value in input_dict.items():
        if key in merged_dict:
            merged_dict[key] += value
        else:
            merged_dict[key] = value
    return merged_dict

In [11]:
# Add edges for shared molecules
def find_shared_molecules(food1, food2, molecules_to_include, fm_dict):
    molecules1 = set(fm_dict[food1])
    molecules2 = set(fm_dict[food2])
    shared_molecules = molecules1.intersection(molecules2)
    shared_molecules_to_include = shared_molecules.intersection(molecules_to_include)
    return shared_molecules_to_include

In [12]:
def generate_results_graph(target, nodes_dict, df):
    """Function to generate NetworkX graphs with selection of foods&molecules"""
    # Create an empty graph
    G = nx.MultiGraph()

    # Add nodes for each food
    for food in nodes_dict.keys():
        if food == target:
            food_cat = 'target'
        else:
            food_cat =  df.loc[df['food'] == food, 'category'].values[0]
        # food_cat = nodes_cats[i]
        G.add_node(food, nodes_cats = food_cat)

    # Add edges for shared molecules
    for food, weight in nodes_dict.items():
        # shared_molecules = find_shared_molecules(food, target, molecules_to_include, fm_dict)
        G.add_edge(food, target, weight=weight)
    
    return G

In [13]:
flavor_molecules = read_molecules()
food_df = read_foods()
molecules = collapse(food_df['foodb_ids'].values.tolist())
c = Counter(molecules)
molecules_to_include = [k for k, v in c.items() if 100 > v]

foods = food_df['food'].values.tolist()
food_molecule_dict = dict(zip(food_df['food'], food_df['foodb_ids']))

In [14]:
FOODS = ['tomato', 'pepper', 'onion', 'cinnamon']
METHODS = ['Cooccurences', 'Nearest neighbors', 'Panther similarity']

In [16]:
for target in FOODS:
    target_df = pd.read_csv(f"results/{target}_pairings.csv", index_col=None)
    results_all = []
    for method in METHODS:
        results_all.append({target:distance for target, distance in zip(target_df[method].values.tolist(), target_df['Similarity'].values.tolist())})
    z = dict(list(results_all[0].items()) + list(results_all[1].items()) + list(results_all[2].items()))
    # results = merge_duplicate_keys(z)
    print(z)
    G = generate_results_graph(target, z, food_df)
    nx.write_gexf(G, f"networks/{target}.gexf")
        

{'tea': 1.0, 'beer': 0.7644230769230769, 'soybean': 0.6538461538461539, 'red wine': 0.6586538461538461, 'blackberry': 0.6490384615384616, 'potato': 0.6538461538461539, 'angelica': 0.6538461538461539, 'mango': 0.6490384615384616, 'apple': 0.6442307692307693, 'corn': 0.6394230769230769, 'rhubarb': 1.0, 'beans': 0.7644230769230769, 'red currant': 0.6826923076923077, 'kiwifruit': 0.6586538461538461, 'pecans': 0.6586538461538461, 'green beans': 0.6538461538461539, 'litchi': 0.6538461538461539, 'buckwheat': 0.6490384615384616, 'brussels sprout': 0.6442307692307693, 'artichoke': 0.6394230769230769, 'achilleas': 0.6826923076923077, 'fruits': 0.6586538461538461, 'evergreen blackberry': 0.6586538461538461, 'cognac brandy': 0.6538461538461539, 'black tea': 0.6442307692307693, 'rum': 0.6394230769230769}
{'capsicum': 0.7509025270758123, 'pepper (spice)': 1.0, 'yam': 0.7509025270758123, 'ginger': 0.740072202166065, 'green bell pepper': 0.740072202166065, 'red bell pepper': 0.7184115523465704, 'yello

## Evaluate with Recipe1M
___

In [2]:
import ast
def string_to_list(string: str) -> list:
    try:
        return ast.literal_eval(string)
    except:
        return [s.strip("'") for s in string[1:-1].split(', ')]

recipes_df = pd.read_csv('data/recipe_ingredients.csv', sep=';', index_col=None)
recipes_df['Ingredients'] = recipes_df['Ingredients'].apply(string_to_list)
recipes = recipes_df['Ingredients'].tolist()

In [3]:
# Function to normalize ingredient names
def normalize_ingredient(ingredient):
    if ingredient == 'tomato':
        ingredient = 'tomatoe'
    ingredient = ingredient.lower()
    if ingredient.endswith('s'):
        ingredient = ingredient[:-1]  # Remove trailing 's' for plural forms
    return ingredient

In [4]:
def check_pairs_in_ingredients_lists(ingredients_lists, normalized_pairs):
    pair_found = {pair: False for pair in normalized_pairs}
    
    for ingredients in ingredients_lists:
        normalized_ingredients = [normalize_ingredient(ing) for ing in ingredients]
        for a, b in combinations(normalized_ingredients, 2):
            if (a, b) in pair_found:
                # print(a,b)
                pair_found[(a, b)] = True
            elif (b, a) in pair_found:
                # print(a,b)
                pair_found[(b, a)] = True
    
    return pair_found, sum(found for found in pair_found.values())


In [5]:
FOODS = ['tomato', 'pepper', 'onion', 'cinnamon']
METHODS = ['Co-occurrences', 'Nearest Neighbors', 'Panther', 'node2vec', 'FlavorGraph', 'FlavorDB']

In [16]:
valid_df = pd.DataFrame(columns=FOODS, index=METHODS)
for method in METHODS:
    for food in FOODS:
        target_df = pd.read_csv(f"results_fin/{food}.txt", index_col=None)
        food_pairings = target_df[method].dropna().tolist()
        pairings = [(normalize_ingredient(food), normalize_ingredient(pair)) for pair in food_pairings]
        pairs_found, pairs_count = check_pairs_in_ingredients_lists(recipes, pairings)
        print(pairs_found)
        valid_df.loc[method, food] = pairs_count/10

{('tomatoe', 'tea'): True, ('tomatoe', 'beer'): True, ('tomatoe', 'soybean'): True, ('tomatoe', 'red wine'): True, ('tomatoe', 'blackberry'): False, ('tomatoe', 'potato'): True, ('tomatoe', 'angelica'): True, ('tomatoe', 'mango'): True, ('tomatoe', 'apple'): True, ('tomatoe', 'corn'): True}
{('pepper', 'capsicum'): True, ('pepper', 'pepper (spice)'): False, ('pepper', 'yam'): True, ('pepper', 'ginger'): True, ('pepper', 'green bell pepper'): True, ('pepper', 'red bell pepper'): True, ('pepper', 'yellow bell pepper'): True, ('pepper', 'orange bell pepper'): True, ('pepper', 'soybean'): True, ('pepper', 'tea'): True}
{('onion', 'garden onion'): False, ('onion', 'welsh onion'): False, ('onion', 'green onion'): True, ('onion', 'red onion'): True, ('onion', 'garlic'): True, ('onion', 'beer'): True, ('onion', 'tea'): True, ('onion', 'soybean'): True, ('onion', 'peanut'): True}
{('cinnamon', 'pepper'): True, ('cinnamon', 'ginger'): True, ('cinnamon', 'ceylon cinnamon'): False, ('cinnamon', 'c

In [15]:
valid_df

Unnamed: 0,tomato,pepper,onion,cinnamon
Co-occurrences,0.9,0.9,0.7,0.8
Nearest Neighbors,0.6,0.9,0.3,0.7
Panther,0.8,0.7,0.6,0.5
node2vec,0.6,0.6,0.4,0.3
FlavorGraph,1.0,1.0,1.0,1.0
FlavorDB,1.0,0.9,0.9,0.8


## Pairings compliance
___

In [6]:
import plotly.express as px
import plotly.io as pio
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [3]:
FOODS = ['tomato', 'pepper', 'onion', 'cinnamon']
MY_METHODS = ['Cooccurences', 'Nearest neighbors', 'Panther', 'node2vec'] #, 'FlavorGraph', 'FlavorDB']

In [4]:
def plot_line(food, method, name):
    target_df = pd.read_csv(f"results/100_{food}_pairings.csv", index_col=None)
    similarities = target_df[f"{method} Similarity"].values.tolist()
    indexes = range(0, len(similarities)) 
    fig = px.line(x = indexes, y = similarities,
              labels={'x':'entry',
                'y': 'similarity',
                },)
    fig.update_layout(title=f"{method}", height=400, width=700, showlegend=False, font=dict(family="CMU Serif",size=14))
    
    config = {
    'toImageButtonOptions': {
        'format': 'png', # one of png, svg, jpeg, webp
        'height': 600, 'width': 900, 'scale':6
    }}
    
    fig.show(config=config)
    
    pio.write_image(fig, f"images/{name}.png", scale=6, width=900, height=500)

In [34]:
fig = make_subplots(rows=2, cols=2, subplot_titles=MY_METHODS,
                    horizontal_spacing = 0.1,
                    vertical_spacing=0.15,
                    shared_xaxes=True,
                    shared_yaxes=True)

In [40]:
row, col = 0, 1
food = 'pepper'
for method in MY_METHODS:
    row = row + 1
    if row == 3:
        row = 1
        col = 2
    target_df = pd.read_csv(f"results/100_{food}_pairings.csv", index_col=None)
    similarities = target_df[f"{method} Similarity"].values.tolist()
    indexes = list(range(0, len(similarities)))
    fig.append_trace(go.Scatter(
            x = indexes, 
            y = similarities,
            mode='lines'
            ),
            row=row, col=col)
    if row == 2:
        fig.update_xaxes(title_text='entry', row=row, col=col)
    if col == 1:
        fig.update_yaxes(title_text='similarity score', row=row, col=col, range=[0,1])

fig.update_layout(height=600, width=800, showlegend=False)
fig.update_layout(
        font=dict(
            family="CMU Serif",
            size=14, 
        ),
        template = 'ggplot2', 
        height=600,
        width = 900,
        margin=dict(l=20, r=20, t=20, b=20),
    )
fig.show()
pio.write_image(fig, f"images/scores.png", scale=6, width=800, height=600)
    # plot_line('tomato', method, f"{method}_score")

In [12]:
iou_df = pd.DataFrame(columns=METHODS, index=METHODS)
for i in METHODS:
    for j in METHODS:
        iou = 0
        for food in FOODS:
            target_df = pd.read_csv(f"results_fin/{food}.txt", index_col=None)
            set1 = set(target_df[i].values.tolist())
            set2 = set(target_df[j].values.tolist())
            iou = iou + len(set1 & set2)/len(set1 | set2)
        iou_df.loc[i, j] = iou/4

In [13]:
iou_df

Unnamed: 0,Co-occurrences,Nearest Neighbors,Panther,node2vec,FlavorGraph,FlavorDB
Co-occurrences,1.0,0.162393,0.389194,0.221895,0.117325,0.225658
Nearest Neighbors,0.162393,1.0,0.040936,0.158991,0.088816,0.054094
Panther,0.389194,0.040936,1.0,0.201389,0.026316,0.222222
node2vec,0.221895,0.158991,0.201389,1.0,0.103436,0.112831
FlavorGraph,0.117325,0.088816,0.026316,0.103436,1.0,0.116594
FlavorDB,0.225658,0.054094,0.222222,0.112831,0.116594,1.0
