In [1]:
import numpy as np
import pandas as pd

In [2]:
ulam_df = pd.read_csv("ulam_cleaned.csv")
ulam_df.shape

(117, 7)

In [3]:
ulam_df.head()

Unnamed: 0,ulam_titles,ingredients,ulam_descriptions,ulam_instructions,ulam_credits,ulam_instruction_link,num_ingredients
0,talunan,chicken|vinegar|ginger|garlic|fish sauce|black...,Talunan is a unique Filipino dish that is usua...,1. Marinate chicken in 2 tablespoons of Datu P...,NutriAsia,https://nutriasia.com/recipes/talunang-adobong...,8
1,filipino spaghetti,noodles|beef|hot dog|banana ketchup|sugar,Filipino spaghetti is a dish that has slowly b...,"1. In a pot over medium heat, cook spaghetti i...",Lalaine Manalo\r,https://www.kawalingpinoy.com/filipino-style-s...,5
2,tokneneng,eggs|annatto powder|flour|salt|black pepper,Tokneneng is a traditional Filipino dish that'...,1. Put the cornstarch in a container and dredg...,Vanjo Merano,https://panlasangpinoy.com/tokneneng-filipino-...,5
3,bukayo,coconut|sugar,The sweet bukayo is a traditional Filipino del...,"1. In a large saucepan, combine Muscovado suga...",Ed Joven,https://www.pinoyrecipe.net/bukayo-recipe/,2
4,papaitan,offal|onion|ginger|garlic|black pepper|salt|tl...,The rich and flavorful papaitan is a popular F...,1.\tHeat a cooking pot and pour-in 4 cups of w...,Vanjo Merano,https://panlasangpinoy.com/papaitan-recip/,7


In [4]:
from sklearn.feature_extraction.text import CountVectorizer 

# represent ingredients data as bag-of-words

cv = CountVectorizer(analyzer=lambda text: text.split('|')).fit(ulam_df['ingredients'])
print("Vocabulary size: {}".format(len(cv.vocabulary_)))

# convert the bag-of-words ingredients data to dataframe

bow = cv.transform(ulam_df['ingredients'])
pd.DataFrame(bow.toarray(), columns=cv.get_feature_names_out(), index=ulam_df['ulam_titles']).head()

Vocabulary size: 122


Unnamed: 0_level_0,annatto powder,baking powder,banana ketchup,bananas,bangus,bay leaf,beef,bell pepper,bitter melon,black pepper,...,tlaola serrano chili pepper,tocino,tofu,tomato,tomato sauce,tuna,vanilla,vinegar,water spinach,yam
ulam_titles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
talunan,0,0,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
filipino spaghetti,0,0,1,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
tokneneng,1,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
bukayo,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
papaitan,0,0,0,0,0,0,0,0,0,1,...,1,0,0,0,0,0,0,0,0,0


In [5]:
from sklearn.feature_extraction.text import TfidfTransformer

# transform the BoW representation to TF-IDF representation

tfidf = TfidfTransformer(use_idf=False).fit(bow)
tfidf_matrix = tfidf.transform(bow)
print('tfidf_matrix.shape: {}'.format(tfidf_matrix.shape))

tfidf_matrix.shape: (117, 122)


In [6]:
from scipy import sparse

# apply larger weights to these ingredients (MAIN)

common_main_ingredients = (
    'bangus', 'beef', 'chicken', 'goat', 'offal', 'oxtail',
    'pork', 'sardines', 'seafood', 'squid', 'tuna', 
    'tapa (philippines)', 'hot dog', 'tocino', 'spam'      # silog main variants
)


ingredients = cv.get_feature_names_out()
keyword_list = [np.where(ingredients == ingredient)[0][-1] for ingredient in common_main_ingredients]

# customize weights to main ingredients

MULTIPLIER = 1.5
my_matrix = tfidf_matrix.toarray()
for i in range(0, len(my_matrix)):
    for key in keyword_list:
        if key != None:
            key = (int)(key)
        if my_matrix[i][key] > 0.0:
            my_matrix[i][key] *= MULTIPLIER


# tf-idf matrix representation final
tfidf_matrix = sparse.csr_matrix(my_matrix)
pd.DataFrame(tfidf_matrix.toarray(), columns=cv.get_feature_names_out(), index=ulam_df['ulam_titles'])

Unnamed: 0_level_0,annatto powder,baking powder,banana ketchup,bananas,bangus,bay leaf,beef,bell pepper,bitter melon,black pepper,...,tlaola serrano chili pepper,tocino,tofu,tomato,tomato sauce,tuna,vanilla,vinegar,water spinach,yam
ulam_titles,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
talunan,0.000000,0.0,0.000000,0.0,0.0,0.353553,0.00000,0.0,0.0,0.353553,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.353553,0.0,0.0
filipino spaghetti,0.000000,0.0,0.447214,0.0,0.0,0.000000,0.67082,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
tokneneng,0.447214,0.0,0.000000,0.0,0.0,0.000000,0.00000,0.0,0.0,0.447214,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
bukayo,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.00000,0.0,0.0,0.000000,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
papaitan,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.00000,0.0,0.0,0.377964,...,0.377964,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
chicken adobo,0.000000,0.0,0.000000,0.0,0.0,0.333333,0.00000,0.0,0.0,0.333333,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0
pork adobo,0.000000,0.0,0.000000,0.0,0.0,0.333333,0.00000,0.0,0.0,0.333333,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0
squid adobo,0.000000,0.0,0.000000,0.0,0.0,0.333333,0.00000,0.0,0.0,0.333333,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0
offal sisig,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.00000,0.0,0.0,0.353553,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.353553,0.0,0.0


In [7]:
from sklearn.neighbors import NearestNeighbors

model = NearestNeighbors(metric="cosine")
model.fit(tfidf_matrix)

NearestNeighbors(metric='cosine')

In [8]:
# sample query

query = "chicken|soy sauce|garlic|bay leaf"
query_tfidf = tfidf.transform(cv.transform([query]))
distance, indices = model.kneighbors(query_tfidf, n_neighbors=10)
ulam_df.iloc[indices.flatten()]

Unnamed: 0,ulam_titles,ingredients,ulam_descriptions,ulam_instructions,ulam_credits,ulam_instruction_link,num_ingredients
112,chicken adobo,chicken|vinegar|bay leaf|garlic|salt|oil|black...,Adobo is the closest thing to a national dish ...,1. Slice the garlic into thin pieces. Set asid...,Vanjo Merano,https://panlasangpinoy.com/pinakamasarap-na-po...,9
89,chicken isaw,chicken|ketchup|vinegar|garlic|onion|hot peppe...,Isaw is a popular Filipino street food dish co...,1. Pour the water in a cooking pot and bring t...,Vanjo Merano,https://panlasangpinoy.com/filipino-street-foo...,7
0,talunan,chicken|vinegar|ginger|garlic|fish sauce|black...,Talunan is a unique Filipino dish that is usua...,1. Marinate chicken in 2 tablespoons of Datu P...,NutriAsia,https://nutriasia.com/recipes/talunang-adobong...,8
101,chicken silog,chicken|eggs|garlic|rice,Silog refers to a group of Filipino dishes tha...,1. Pour water in a small cooking pot. Let boil...,Vanjo Merano,https://panlasangpinoy.com/hotsilog-hotdog-sin...,4
53,inasal na manok,chicken|vinegar|lemongrass|garlic|ginger,Inasal na manok is a unique Filipino grilled c...,1. Chop the lemongrass and crush. Combine all ...,Vanjo Merano,https://panlasangpinoy.com/inasal/,5
114,squid adobo,squid|vinegar|bay leaf|garlic|salt|oil|black p...,Adobo is the closest thing to a national dish ...,1. Slice the garlic into thin pieces. Set asid...,Vanjo Merano,https://panlasangpinoy.com/pinakamasarap-na-po...,9
113,pork adobo,pork|vinegar|bay leaf|garlic|salt|oil|black pe...,Adobo is the closest thing to a national dish ...,1. Slice the garlic into thin pieces. Set asid...,Vanjo Merano,https://panlasangpinoy.com/pinakamasarap-na-po...,9
92,chicken tinola,chicken|ginger|tlaola serrano chili pepper|fis...,The term tinola refers to a vast group of hear...,"1. In a pot over medium heat, heat oil. Add on...",Lalaine Manalo,https://www.kawalingpinoy.com/tinolang-palaka/,6
46,tinolang manok,chicken|papaya|tlaola serrano chili pepper|gin...,Tinolang manok or chicken tinola is a nourishi...,"1. In a pot over medium heat, heat oil. Add on...",Lalaine Manalo,https://www.kawalingpinoy.com/tinolang-manok-c...,6
34,lugaw,rice|chicken|fish sauce|ginger|garlic|scallion...,Lugaw is a Filipino rice porridge that is trad...,"1.\tIn a pot, heat the cooking oil then saute ...",Vanjo Merano,https://panlasangpinoy.com/arroz-caldo/,7


In [9]:
x = ulam_df.iloc[indices.flatten()]
for elem in x.index:
    print(x.loc[elem].ulam_titles)

chicken adobo
chicken isaw
talunan
chicken silog
inasal na manok
squid adobo
pork adobo
chicken tinola
tinolang manok
lugaw


In [12]:

def get_recipe_recommendations(ingredients):
    
    """get similar recipes based on given ingredients"""
    
    ingredients_tfidf = tfidf.transform(cv.transform([ingredients]))
    indices = model.kneighbors(ingredients_tfidf, n_neighbors=20, return_distance=False)
    return ulam_df.iloc[indices.flatten()]


def filter_recommendations(ingredient_arr, allergen_arr):
    
    """filters recommendations based on user allergens"""
    
    ingredient_arr = np.array(ingredient_arr.split("|"))
    allergen_arr = np.array(allergen_arr)
    
    intersection = np.intersect1d(ingredient_arr,
                                  allergen_arr, 
                                  assume_unique=True).size
    if intersection > 0:
        return False
    else:
        return True
    

# sample user interface

my_ingredients = list(map(str, input("Enter Ingredients: ").split(',')))
my_allergens = list(map(str, input("Enter Allergen(s): ").split(',')))
my_ingredients = [ingredient.strip().lower() for ingredient in my_ingredients]
my_allergens = [allergen.strip().lower() for allergen in my_allergens]
my_ingredients = "|".join(my_ingredients)
recommendations = get_recipe_recommendations(my_ingredients)


# filter the recommendations based on user allergens

if my_allergens != []:
    mask = recommendations.ingredients.apply(filter_recommendations, allergen_arr=my_allergens)
    recommendations = recommendations.loc[mask]

    
# check top recommendations

print("\nTop Recommendations:")
print("=" * 50)
for idx, n in zip(recommendations.index, range(1, len(recommendations.index))):
    ulam = recommendations.loc[idx]
    
    print(f"{n}. {ulam.ulam_titles.title()}")
    print(f"Ulam credits: {ulam.ulam_credits.title()}")
    print("")
    print("Ingredients: ", ulam.ingredients.replace("|", ","))
    print()
    print("Description: \n\n", ulam.ulam_descriptions.strip())
    print()
    print("Instructions: \n\n", ulam.ulam_instructions.lstrip())
    print("=" * 50)

Enter Ingredients: chicken, soy sauce, garlic, bay leaf
Enter Allergen(s): 

Top Recommendations:
1. Chicken Adobo
Ulam credits: Vanjo Merano

Ingredients:  chicken,vinegar,bay leaf,garlic,salt,oil,black pepper,sugar,soy sauce

Description: 

 Adobo is the closest thing to a national dish in the Philippines, consisting of seared and browned chunks of meat, seafood, fruit, or vegetables mixed with white vinegar or soy sauce (or both), bay leaves, garlic, salt, sugar, oil, and black pepper.

The combination of these ingredients is left to simmer over low heat, resulting in succulent, juicy, and tender ingredients covered in thick, rich, and savory sauce. Adobo got its name from the Spanish word adobar, meaning marinade or pickling sauce.

Instructions: 

 1. Slice the garlic into thin pieces. Set aside.
2. Start making the garlic chips by heating the cooking oil in pan. Add all the garlic. Continue cooking using low heat while stirring occasionally until the garlic turns golden brown and