# DME project - Best match
This notebook contains the best match function developed for suggesting ingredients using the `recipes.csv` data file from Bellosi (2011)

In [1]:
# Install required packages
!pip install -q -r requirements.txt

In [2]:
# Display plots inline
%matplotlib inline

# Data libraries

import pandas as pd
import numpy as np
import geopandas as gp

# Plotting libraries
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import seaborn as sns

# Plotting defaults
plt.rcParams['figure.figsize'] = (8,5)
plt.rcParams['figure.dpi'] = 80

# Misc libraries
import os
import time
from datetime import date

# sklearn modules
from sklearn.model_selection import train_test_split

# Helper script
import get_recipe as gr

## Import datasets

In [11]:
# Get datasets

recipes_df, cuisines_df = gr.get_data()

In [16]:
cuisine_names = cuisines_df["cuisine_name"].to_list()

In [5]:
# Split data
train, val = train_test_split(recipes_df, test_size=0.3, random_state=42, stratify=recipes_df['cuisine'])
val, test = train_test_split(val, test_size=(1/3), random_state=42, stratify=val['cuisine'])

## Best match function

In [13]:
df_to_use = train # choice of the dataframe to use, full data or train set
occurrence_dict = {}
for c, names in enumerate(cuisine_names): #for each cuisine we create a matrix of occurrence (called corr_matrix)
  only_one_cuisine = df_to_use[df_to_use['cuisine']==c]
  only_one_cuisine.columns = df_to_use.columns
  only_one_cuisine=only_one_cuisine.drop('cuisine', axis=1) 
  
  corr_matrix = pd.DataFrame(np.zeros((only_one_cuisine.shape[1], only_one_cuisine.shape[1]+1)))
  corr_matrix.columns = df_to_use.columns
  corr_matrix.columns = [*corr_matrix.columns[:-1], 'occurrence']
  for i in range(0,only_one_cuisine.shape[1]): #for each ingredient we compute the total occurrence of all the other ingredients
    this_ingredient = only_one_cuisine.columns[i]
    where_is_used = np.where(only_one_cuisine[this_ingredient]==1)
    if(np.size(where_is_used)==0):  # it does not contain this ingredient in any of the recipes
      continue

    corr_matrix.iloc[i,:-1]=corr_matrix.iloc[i,:-1] + only_one_cuisine.iloc[where_is_used].sum(axis=0)
    corr_matrix.iloc[i,i] = 0
    corr_matrix.iloc[i,:-1]=corr_matrix.iloc[i,:-1]/np.size(where_is_used)
    corr_matrix.iloc[i,-1:] = np.size(where_is_used)
  occurrence_dict[names] = corr_matrix

In [14]:
# for each cuisine, for each ingredient we will consider the most common match
from collections import defaultdict

best_match = defaultdict(dict)

for c, names in enumerate(cuisine_names): #for each cuisine
  corr_matrix = occurrence_dict[names]
  for i in range(corr_matrix.shape[0]): #for each ingredient
    ingredient_matches = corr_matrix.columns[i]
    my_max = np.max(corr_matrix.iloc[i,:corr_matrix.shape[0]])    
    if my_max==0:
      best_match[names][ingredient_matches] = -1
    else:
      matches = corr_matrix.columns[np.where(corr_matrix.iloc[i,:corr_matrix.shape[0]]==my_max)].tolist()
      best_match[names][ingredient_matches] = {"matches":matches,
                                           "power":int(my_max),
                                           "relative":int(corr_matrix.iloc[[i]]['occurrence'])}

In [15]:
example_matches = defaultdict(dict)

list_of_common_ingredients = ['salt'] # in this list we can insert the ingredients that we exclude from the matches
threshold = 11 # minimum number of recipes that ingredients should appear 
for c, names in enumerate(cuisine_names): #for each cuisine
  for i,ing in enumerate(best_match[names]):
    if best_match[names][ing] != -1 and best_match[names][ing]['relative'] >= threshold:
      no_common = [x for x in best_match[names][ing]['matches'] if x not in list_of_common_ingredients]
      if(len(no_common) != 0):
        example_matches[names][ing] = no_common

for c,names in enumerate(cuisine_names): # we show the most common matches of each cuisine 
  print(names)
  print(example_matches[names])

Chinese
{'bean_sprouts': ['soy_sauce'], 'beef': ['soy_sauce'], 'broccoli': ['soy_sauce'], 'broth': ['soy_sauce'], 'brown_sugar': ['soy_sauce'], 'canola_oil': ['soy_sauce'], 'carrot': ['soy_sauce'], 'cashew_nut': ['soy_sauce'], 'celery': ['soy_sauce'], 'chicken': ['soy_sauce'], 'chicken_stock': ['green_onion'], 'chile_pepper': ['soy_sauce'], 'chili_sauce': ['garlic'], 'cilantro': ['garlic'], 'cornstarch': ['soy_sauce'], 'egg': ['soy_sauce'], 'five_spice_powder': ['soy_sauce'], 'flour': ['soy_sauce'], 'garlic': ['soy_sauce'], 'garlic_powder': ['soy_sauce'], 'ginger': ['soy_sauce'], 'green_onion': ['soy_sauce'], 'hoisin_sauce': ['soy_sauce'], 'honey': ['soy_sauce'], 'ketchup': ['soy_sauce'], 'lemon_juice': ['soy_sauce'], 'mushroom': ['soy_sauce'], 'olive_oil': ['soy_sauce'], 'onion': ['soy_sauce'], 'orange_juice': ['soy_sauce'], 'oyster_sauce': ['soy_sauce'], 'pea': ['soy_sauce'], 'peanut_oil': ['soy_sauce'], 'pepper': ['soy_sauce'], 'pineapple': ['soy_sauce'], 'pork': ['soy_sauce'], 'red