# Final Modeling

In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import OneHotEncoder
from scipy.sparse import hstack
import re
import ast
import joblib

In [3]:
# Load data
def load_data(path):

    df = pd.read_csv(path)

    return df

In [5]:
recipe_df_filtered = load_data('recipe_df_filtered_2.csv')

In [7]:
recipe_df_filtered.isna().sum()

title                       0
link                        0
source                      0
serving_size                0
ingredient_counter          0
Meal_Type                   0
ingredient_list             9
preparation_instructions    0
ingredient_measures         0
dtype: int64

In [9]:
recipe_df_filtered.dropna(subset = ['ingredient_list'], inplace = True)

In [11]:
recipe_df_filtered.isna().sum()

title                       0
link                        0
source                      0
serving_size                0
ingredient_counter          0
Meal_Type                   0
ingredient_list             0
preparation_instructions    0
ingredient_measures         0
dtype: int64

In [64]:
recipe_df_filtered.shape

(60808, 9)

In [60]:
recipe_df_filtered.to_csv('recipe_df_filtered_3.csv', index = False, sep =',', lineterminator='\n')

In [27]:
# Function to vectorize ingredients list

def vectorizer(sampled_data, column):
    vectorizer = CountVectorizer()
    ingredients_matrix = vectorizer.fit_transform(sampled_data[column])

    return ingredients_matrix, vectorizer

In [27]:
meal_type_selected

'dinner'

In [29]:
ingredients_matrix, vectorizer = vectorizer(recipe_df_filtered, 'ingredient_list')

In [28]:
# saving sparse matrix
joblib.dump(ingredients_matrix, 'ingredients_matrix.pkl')

['ingredients_matrix.pkl']

In [32]:
# saving vectorizer for later use
joblib.dump(vectorizer_instance, 'vectorizer.pkl')

['vectorizer.pkl']

In [63]:
# Encode meal type
meal_type_encoder = OneHotEncoder(sparse_output=True)
meal_type_matrix = meal_type_encoder.fit_transform(recipe_df_filtered[['Meal_Type']])

In [13]:
def create_combined_matrix(data):
    # Vectorize the ingredients
    vectorizer = CountVectorizer()
    ingredients_matrix = vectorizer.fit_transform(recipe_df_filtered['ingredient_list'])

    # One-hot encode meal types
    encoder = OneHotEncoder(sparse_output=True)  # Use sparse_output
    meal_type_matrix = encoder.fit_transform(recipe_df_filtered[['Meal_Type']])

    # Combine both matrices
    combined_matrix = hstack([ingredients_matrix, meal_type_matrix])

    return combined_matrix, vectorizer, encoder

In [15]:
recipe_df_filtered.reset_index(drop=True, inplace=True)

In [17]:
combined_matrix, vectorizer, encoder = create_combined_matrix(recipe_df_filtered)

In [19]:
joblib.dump(combined_matrix, 'combined_matrix.pkl')
joblib.dump(vectorizer, 'ingredients_vectorizer.pkl')
joblib.dump(encoder, 'meal_type_encoder.pkl')

['meal_type_encoder.pkl']

In [21]:
# Test

In [23]:
combined_matrix = joblib.load('combined_matrix.pkl')

In [25]:
loaded_vectorizer = joblib.load('ingredients_vectorizer.pkl')

In [27]:
loaded_encoder = joblib.load('meal_type_encoder.pkl')

In [29]:
user_input = ['beef, carrots, onion, potato']
meal_type_selected = 'dinner'

In [31]:
user_ingredients_vector = loaded_vectorizer.transform(user_input)

In [33]:
meal_type_vector = loaded_encoder.transform([[meal_type_selected]])



In [35]:
user_vector = hstack([user_ingredients_vector, meal_type_vector])

In [37]:
filtered_recipes = recipe_df_filtered[recipe_df_filtered['Meal_Type'] == meal_type_selected]

In [39]:
filtered_matrix = combined_matrix[filtered_recipes.index]

In [41]:
similarities = cosine_similarity(user_vector, filtered_matrix)

In [43]:
top_indices = similarities[0].argsort()[-5:][::-1]

In [45]:
recommended_recipes = filtered_recipes.iloc[top_indices]

In [47]:
for index, row in recommended_recipes.iterrows():
    print(f"- {row['title']}")

- Tender Pot Roast
- Mother's Lentil Soup Recipe
- Lo-Cal, Low-Fat Hot Dog Stir-Fry
- Easy Pattie Dish
- Speedy Shepherd'S Pie


In [None]:
# Same as above but displays the dataframe with more information

In [53]:
sim_df = pd.DataFrame({
        'recipe': filtered_recipes['title'],
        'ingredients list': filtered_recipes['ingredient_list'],
        'serving size': filtered_recipes['serving_size'],
        'instructions': filtered_recipes['preparation_instructions'],
        'measures': filtered_recipes['ingredient_measures'],
        'similarity': np.array(similarities).squeeze()
    })

In [55]:
top_similar_recipes = sim_df.sort_values(by='similarity', ascending=False).head(5)

In [57]:
top_similar_recipes

Unnamed: 0,recipe,ingredients list,serving size,instructions,measures,similarity
36624,Tender Pot Roast,"beef, onion, carrots, potato, onion",5,Moisten both sides of the roast with water and...,"3 lb. beef arm roast, Microshake onion and gar...",0.948683
53282,Mother's Lentil Soup Recipe,"beef, onion, dry lentils, potato, carrots",4,"Brown meat in a large saucepan., Add in salt, ...","1 1/2 pound beef (chuck), cut into sm. pcs, 1 ...",0.845154
41632,"Lo-Cal, Low-Fat Hot Dog Stir-Fry","potato, zucchini, carrots, onion",1,"Spray skillet or wok with Pam., Heat 1 teaspoo...","4 oz. baked potato, 1 c. zucchini, 1/2 c. carr...",0.8
22826,Easy Pattie Dish,"hamburger, potato, carrots, onion",6,"Put patties in foil., Top with potatoes, onion...","6 hamburger patties, 1 large potato, sliced, c...",0.8
29895,Speedy Shepherd'S Pie,"lean ground beef, onion, carrots, beef broth, ...",4,"In 10-inch nonstick skillet, cook beef over me...","12 oz. lean ground beef, 1/2 c. chopped onion,...",0.774597
