# Recipe Recommendation Web App

## 1. Cleaning Web-Scraped Data 

In [5]:
import pandas as pd
import numpy as np

In [6]:
# load recipe data scraped from allrecipes.com
df = pd.read_csv('recipes.csv')
na_counts = df.isna().sum()

print(na_counts)

category              0
title                 2
total_time          560
prep_time           638
cook_time          2538
ingredients           0
serving_info       2819
nutrition_facts       0
rating                0
recipe_link           0
dtype: int64


In [7]:
# Drop rows where the ingredients column has "No ingredients listed"
df_cleaned = df[df['ingredients'] != "No ingredients listed"]

# Reset index after dropping rows
df_cleaned = df_cleaned.reset_index(drop=True)

df_cleaned.head()

Unnamed: 0,category,title,total_time,prep_time,cook_time,ingredients,serving_info,nutrition_facts,rating,recipe_link
0,Buffalo Chicken Dips,Buffalo Chicken Cheese Ball,8 hrs 10 mins,10 mins,,"2 (8 ounce) packages cream cheese, at room tem...",Original recipe (1X) yields 12 servings,"{'Calories': '182', 'Fat': '16g', 'Carbs': '2g...",5.0,https://www.allrecipes.com/recipe/282972/buffa...
1,Bulgogi,Tri-Tip Bulgogi,4 hrs 40 mins,30 mins,10 mins,"1 pear, 1/2 onion, 1 thumb-sized piece fresh...",Original recipe (1X) yields 6 to 8 servings,"{'Calories': '700', 'Fat': '33g', 'Carbs': '33...",No rating available,https://www.allrecipes.com/tri-tip-bulgogi-rec...
2,Air Fryer Recipes,Air Fryer Turkey Breast,1 hr,10 mins,40 mins,"1 tablespoon finely chopped fresh rosemary, 1 ...",Original recipe (1X) yields 6 servings,"{'Calories': '263', 'Fat': '10g', 'Carbs': '0g...",4.9,https://www.allrecipes.com/recipe/275372/air-f...
3,Bruschetta,Hunt's® Easy Tomato Bruschetta,27 mins,15 mins,12 mins,"1 (18-inch) French baguette, cut into 3/4-inc...",Original recipe (1X) yields 8 servings,"{'Calories': '196', 'Fat': '8g', 'Carbs': '26g...",4.7,https://www.allrecipes.com/recipe/256046/hunts...
4,Broccoli Salads,Broccoli Crunch Salad,15 mins,15 mins,,"1 cup mayonnaise, 1/4 cup apple cider vinegar,...",Original recipe (1X) yields 8 servings,"{'Calories': '371', 'Fat': '29g', 'Carbs': '24...",5.0,https://www.allrecipes.com/broccoli-crunch-sal...


In [8]:
# Extract nutrition facts into separate columns
nutrition_df = df_cleaned['nutrition_facts'].apply(eval).apply(pd.Series)
nutrition_df.rename(columns=lambda x: x.lower(), inplace=True)

# Merge the extracted nutrition facts back into the main DataFrame
df_cleaned = pd.concat([df_cleaned, nutrition_df], axis=1).drop(columns=['nutrition_facts'])

df_cleaned.head()

Unnamed: 0,category,title,total_time,prep_time,cook_time,ingredients,serving_info,rating,recipe_link,calories,fat,carbs,protein
0,Buffalo Chicken Dips,Buffalo Chicken Cheese Ball,8 hrs 10 mins,10 mins,,"2 (8 ounce) packages cream cheese, at room tem...",Original recipe (1X) yields 12 servings,5.0,https://www.allrecipes.com/recipe/282972/buffa...,182,16g,2g,8g
1,Bulgogi,Tri-Tip Bulgogi,4 hrs 40 mins,30 mins,10 mins,"1 pear, 1/2 onion, 1 thumb-sized piece fresh...",Original recipe (1X) yields 6 to 8 servings,No rating available,https://www.allrecipes.com/tri-tip-bulgogi-rec...,700,33g,33g,66g
2,Air Fryer Recipes,Air Fryer Turkey Breast,1 hr,10 mins,40 mins,"1 tablespoon finely chopped fresh rosemary, 1 ...",Original recipe (1X) yields 6 servings,4.9,https://www.allrecipes.com/recipe/275372/air-f...,263,10g,0g,40g
3,Bruschetta,Hunt's® Easy Tomato Bruschetta,27 mins,15 mins,12 mins,"1 (18-inch) French baguette, cut into 3/4-inc...",Original recipe (1X) yields 8 servings,4.7,https://www.allrecipes.com/recipe/256046/hunts...,196,8g,26g,6g
4,Broccoli Salads,Broccoli Crunch Salad,15 mins,15 mins,,"1 cup mayonnaise, 1/4 cup apple cider vinegar,...",Original recipe (1X) yields 8 servings,5.0,https://www.allrecipes.com/broccoli-crunch-sal...,371,29g,24g,8g


In [9]:
# Extract the numeric value only if it exists, ignoring NaN
df_cleaned['serving_info'] = df_cleaned['serving_info'].dropna().str.extract(r'yields (\d+)(?: servings)?')

# Convert to integer only for non-NaN values
df_cleaned['serving_info'] = pd.to_numeric(df_cleaned['serving_info'], errors='coerce')

df_cleaned.head()

Unnamed: 0,category,title,total_time,prep_time,cook_time,ingredients,serving_info,rating,recipe_link,calories,fat,carbs,protein
0,Buffalo Chicken Dips,Buffalo Chicken Cheese Ball,8 hrs 10 mins,10 mins,,"2 (8 ounce) packages cream cheese, at room tem...",12.0,5.0,https://www.allrecipes.com/recipe/282972/buffa...,182,16g,2g,8g
1,Bulgogi,Tri-Tip Bulgogi,4 hrs 40 mins,30 mins,10 mins,"1 pear, 1/2 onion, 1 thumb-sized piece fresh...",6.0,No rating available,https://www.allrecipes.com/tri-tip-bulgogi-rec...,700,33g,33g,66g
2,Air Fryer Recipes,Air Fryer Turkey Breast,1 hr,10 mins,40 mins,"1 tablespoon finely chopped fresh rosemary, 1 ...",6.0,4.9,https://www.allrecipes.com/recipe/275372/air-f...,263,10g,0g,40g
3,Bruschetta,Hunt's® Easy Tomato Bruschetta,27 mins,15 mins,12 mins,"1 (18-inch) French baguette, cut into 3/4-inc...",8.0,4.7,https://www.allrecipes.com/recipe/256046/hunts...,196,8g,26g,6g
4,Broccoli Salads,Broccoli Crunch Salad,15 mins,15 mins,,"1 cup mayonnaise, 1/4 cup apple cider vinegar,...",8.0,5.0,https://www.allrecipes.com/broccoli-crunch-sal...,371,29g,24g,8g


In [10]:
# Replace "No rating available" with NaN
df_cleaned['rating'] = df_cleaned['rating'].replace("No rating available", np.nan)

df_cleaned.head()

Unnamed: 0,category,title,total_time,prep_time,cook_time,ingredients,serving_info,rating,recipe_link,calories,fat,carbs,protein
0,Buffalo Chicken Dips,Buffalo Chicken Cheese Ball,8 hrs 10 mins,10 mins,,"2 (8 ounce) packages cream cheese, at room tem...",12.0,5.0,https://www.allrecipes.com/recipe/282972/buffa...,182,16g,2g,8g
1,Bulgogi,Tri-Tip Bulgogi,4 hrs 40 mins,30 mins,10 mins,"1 pear, 1/2 onion, 1 thumb-sized piece fresh...",6.0,,https://www.allrecipes.com/tri-tip-bulgogi-rec...,700,33g,33g,66g
2,Air Fryer Recipes,Air Fryer Turkey Breast,1 hr,10 mins,40 mins,"1 tablespoon finely chopped fresh rosemary, 1 ...",6.0,4.9,https://www.allrecipes.com/recipe/275372/air-f...,263,10g,0g,40g
3,Bruschetta,Hunt's® Easy Tomato Bruschetta,27 mins,15 mins,12 mins,"1 (18-inch) French baguette, cut into 3/4-inc...",8.0,4.7,https://www.allrecipes.com/recipe/256046/hunts...,196,8g,26g,6g
4,Broccoli Salads,Broccoli Crunch Salad,15 mins,15 mins,,"1 cup mayonnaise, 1/4 cup apple cider vinegar,...",8.0,5.0,https://www.allrecipes.com/broccoli-crunch-sal...,371,29g,24g,8g


In [11]:
from recipe_functions import convert_to_minutes

In [12]:
# Apply convert_to_minutes function to relevant columns
df_cleaned['total_time_mins'] = df_cleaned['total_time'].apply(convert_to_minutes)

df_cleaned.head()

Unnamed: 0,category,title,total_time,prep_time,cook_time,ingredients,serving_info,rating,recipe_link,calories,fat,carbs,protein,total_time_mins
0,Buffalo Chicken Dips,Buffalo Chicken Cheese Ball,8 hrs 10 mins,10 mins,,"2 (8 ounce) packages cream cheese, at room tem...",12.0,5.0,https://www.allrecipes.com/recipe/282972/buffa...,182,16g,2g,8g,490.0
1,Bulgogi,Tri-Tip Bulgogi,4 hrs 40 mins,30 mins,10 mins,"1 pear, 1/2 onion, 1 thumb-sized piece fresh...",6.0,,https://www.allrecipes.com/tri-tip-bulgogi-rec...,700,33g,33g,66g,280.0
2,Air Fryer Recipes,Air Fryer Turkey Breast,1 hr,10 mins,40 mins,"1 tablespoon finely chopped fresh rosemary, 1 ...",6.0,4.9,https://www.allrecipes.com/recipe/275372/air-f...,263,10g,0g,40g,60.0
3,Bruschetta,Hunt's® Easy Tomato Bruschetta,27 mins,15 mins,12 mins,"1 (18-inch) French baguette, cut into 3/4-inc...",8.0,4.7,https://www.allrecipes.com/recipe/256046/hunts...,196,8g,26g,6g,27.0
4,Broccoli Salads,Broccoli Crunch Salad,15 mins,15 mins,,"1 cup mayonnaise, 1/4 cup apple cider vinegar,...",8.0,5.0,https://www.allrecipes.com/broccoli-crunch-sal...,371,29g,24g,8g,15.0


In [13]:
# Drop total time, prep time and cook time (we will only be sorting with total_time_mins)
df_cleaned.drop(columns=['total_time','prep_time','cook_time'], inplace=True)


df_cleaned.head()

Unnamed: 0,category,title,ingredients,serving_info,rating,recipe_link,calories,fat,carbs,protein,total_time_mins
0,Buffalo Chicken Dips,Buffalo Chicken Cheese Ball,"2 (8 ounce) packages cream cheese, at room tem...",12.0,5.0,https://www.allrecipes.com/recipe/282972/buffa...,182,16g,2g,8g,490.0
1,Bulgogi,Tri-Tip Bulgogi,"1 pear, 1/2 onion, 1 thumb-sized piece fresh...",6.0,,https://www.allrecipes.com/tri-tip-bulgogi-rec...,700,33g,33g,66g,280.0
2,Air Fryer Recipes,Air Fryer Turkey Breast,"1 tablespoon finely chopped fresh rosemary, 1 ...",6.0,4.9,https://www.allrecipes.com/recipe/275372/air-f...,263,10g,0g,40g,60.0
3,Bruschetta,Hunt's® Easy Tomato Bruschetta,"1 (18-inch) French baguette, cut into 3/4-inc...",8.0,4.7,https://www.allrecipes.com/recipe/256046/hunts...,196,8g,26g,6g,27.0
4,Broccoli Salads,Broccoli Crunch Salad,"1 cup mayonnaise, 1/4 cup apple cider vinegar,...",8.0,5.0,https://www.allrecipes.com/broccoli-crunch-sal...,371,29g,24g,8g,15.0


In [14]:
# Explicitly convert each column to string, handling edge cases
columns_to_convert = ['category', 'title', 'ingredients', 'recipe_link']

for col in columns_to_convert:
    df_cleaned[col] = df_cleaned[col].astype('string')


# Ensure numeric columns are converted to string before applying string operations
numeric_columns = ['calories', 'fat', 'carbs', 'protein']
for col in numeric_columns:
    df_cleaned[col] = df_cleaned[col].astype(str)  # Convert to string
    df_cleaned[col] = pd.to_numeric(
        df_cleaned[col].str.replace(r'[^\d.]+', '', regex=True), errors='coerce'
    )

df_cleaned['calories'] = df_cleaned['calories'].astype('float64')
df_cleaned['fat'] = df_cleaned['fat'].astype('float64')
df_cleaned['carbs'] = df_cleaned['carbs'].astype('float64')
df_cleaned['protein'] = df_cleaned['protein'].astype('float64')

# Verify the changes
print(df_cleaned.dtypes)


category           string[python]
title              string[python]
ingredients        string[python]
serving_info              float64
rating                     object
recipe_link        string[python]
calories                  float64
fat                       float64
carbs                     float64
protein                   float64
total_time_mins           float64
dtype: object


## 2. Storing Cleaned Data in SQLite Database

In [15]:
import sqlite3

# connect to SQLite database 
conn = sqlite3.connect('recipes.db')

# save the cleaned DataFrame to a table called 'recipes'
df_cleaned.to_sql('recipes', conn, if_exists='replace', index=False)


conn.close()

In [16]:
# Verify data was stored properly
conn = sqlite3.connect('recipes.db')

# Query 10 rows from table
query = "SELECT * FROM recipes LIMIT 10;"
df_from_db = pd.read_sql(query, conn)

conn.close()

print(df_from_db)

               category                                  title  \
0  Buffalo Chicken Dips            Buffalo Chicken Cheese Ball   
1               Bulgogi                        Tri-Tip Bulgogi   
2     Air Fryer Recipes                Air Fryer Turkey Breast   
3            Bruschetta         Hunt's® Easy Tomato Bruschetta   
4       Broccoli Salads                  Broccoli Crunch Salad   
5               Burgers         Almost White Castle Hamburgers   
6               Burgers                      Air Fryer Burgers   
7                Brines                 Mesquite Chicken Brine   
8                Brines  Peppered Maple and Thyme Turkey Brine   
9    Breakfast Burritos         Slow Cooker Breakfast Burritos   

                                         ingredients  serving_info rating  \
0  2 (8 ounce) packages cream cheese, at room tem...          12.0    5.0   
1  1  pear, 1/2  onion, 1 thumb-sized piece fresh...           6.0   None   
2  1 tablespoon finely chopped fresh rosem

## 3. Data Filtering and Dash Interface Implementation

In [19]:
from dash import Dash, html, dcc, callback, Output, Input, State, callback_context
import sqlite3
import random
import pandas as pd

# import filtering functions
from recipe_functions import get_random_recipes, filter_by_preferences, exclude_restricted_items, apply_dietary_restrictions, filter_by_ranges, convert_rating_to_stars


# Initialize Dash app
app = Dash()

# App layout
app.layout = html.Div([
    html.H1("Recipe Recommendation System", style={'text-align': 'center', 'color': '#4CAF50'}),

    html.Hr(),

    # Ingredients input
    html.Div([
        html.Label("Enter Food Preference:"),
        dcc.Input(id='ingredients-input', type='text', placeholder='e.g., chicken, garlic, olive oil',
                  style={'width': '100%', 'padding': '10px'}),
    ], style={'margin-bottom': '10px'}),

    # Dietary restrictions
    html.Div([
        html.Label("Enter Any Dietary Restrictions:"),
        dcc.Input(id='restrictions-input', type='text', placeholder='e.g., milk, peanuts, sesame',
                  style={'width': '100%', 'padding': '10px'}),
    ], style={'margin-bottom': '10px'}),

    # Dietary preferences dropdown
    html.Div([
        html.Label("Dietary Preferences:"),
        dcc.Dropdown(
            id='dietary-preferences-dropdown',
            options=[
                {'label': 'Vegetarian', 'value': 'vegetarian'},
                {'label': 'Vegan', 'value': 'vegan'},
                {'label': 'Gluten-Free', 'value': 'gluten-free'},
                {'label': 'Keto', 'value': 'keto'},
            ],
            placeholder="Select dietary preferences",
            multi=True,
            style={'cursor': 'pointer'}
        )
    ], style={'margin-bottom': '10px'}),

    html.Div([
        html.Div([
            # Left side: Calorie and Protein Range
            html.Div([
                html.Label("Calorie Range:", style={'width': '140px', 'text-align': 'right', 'margin-right': '10px'}),
                dcc.Input(id='calorie-min', type='number', placeholder='Min', style={'margin-right': '10px', 'width': '70px'}),
                dcc.Input(id='calorie-max', type='number', placeholder='Max', style={'width': '70px'}),
            ], style={'display': 'flex', 'align-items': 'center', 'margin-bottom': '10px'}),
    
            html.Div([
                html.Label("Protein Range:", style={'width': '140px', 'text-align': 'right', 'margin-right': '10px'}),
                dcc.Input(id='protein-min', type='number', placeholder='Min', style={'margin-right': '10px', 'width': '70px'}),
                dcc.Input(id='protein-max', type='number', placeholder='Max', style={'width': '70px'}),
            ], style={'display': 'flex', 'align-items': 'center', 'margin-bottom': '10px'}),
        ], style={'width': '45%', 'display': 'inline-block'}),
    
        html.Div([
            # Right side: Carbs and Fat Range
            html.Div([
                html.Label("Carbs Range:", style={'width': '140px', 'text-align': 'right', 'margin-right': '10px'}),
                dcc.Input(id='carbs-min', type='number', placeholder='Min', style={'margin-right': '10px', 'width': '70px'}),
                dcc.Input(id='carbs-max', type='number', placeholder='Max', style={'width': '70px'}),
            ], style={'display': 'flex', 'align-items': 'center', 'margin-bottom': '10px'}),
    
            html.Div([
                html.Label("Fat Range:", style={'width': '140px', 'text-align': 'right', 'margin-right': '10px'}),
                dcc.Input(id='fat-min', type='number', placeholder='Min', style={'margin-right': '10px', 'width': '70px'}),
                dcc.Input(id='fat-max', type='number', placeholder='Max', style={'width': '70px'}),
            ], style={'display': 'flex', 'align-items': 'center', 'margin-bottom': '10px'}),
        ], style={'width': '45%', 'display': 'inline-block', 'margin-left': '5px'}),  
    ], style={'display': 'flex', 'justify-content': 'space-between', 'align-items': 'flex-start'}),

    # Cooking time slider
    html.Div([
        html.Label("Maximum Cooking Time (minutes):"),
        dcc.Slider(
            id='cooking-time-slider',
            min=10,
            max=150,  
            step=5,
            value=30,
            marks={10: '10', 30: '30', 60: '60', 90: '90', 120: '120', 150: '150'},  
            tooltip={"placement": "bottom", "always_visible": False}
        ),
    ], style={'margin-bottom': '10px'}),


    # Max Search Results
    html.Div([
        html.Label("Max Search Results:", style={'margin-right': '10px', 'font-weight': 'bold'}),
        dcc.Input(id='search-input', type='number', min=1, max=25, step=1, value=10,
                  style={'width': '70px'}),
    ], style={'display': 'flex', 'align-items': 'center', 'margin-bottom': '10px'}),

    # Submit button
    html.Div([
        html.Button('Search Recipes', id='search-button',
                    style={'text-align': 'center', 'background-color': '#4CAF50', 'color': 'white', 'padding': '10px 20px', 'border': 'none', 'cursor': 'pointer'}),
    ], style={'text-align': 'center', 'padding': '5px'}),


    # Surprise me button
    html.Div([
        html.Button('Surprise Me', id='surprise-me', style={'text-align': 'center', 'background-color': '#4CAF50', 'color': 'white', 'padding': '10px 20px', 'border': 'none', 'cursor': 'pointer'})
    ], style={'text-align': 'center', 'padding': '5px'}),

    html.Hr(),

    
    # Placeholder for displaying results
    html.Div(id='recipe-results')
])


# Callbacks for functionality
@app.callback(
    Output('recipe-results', 'children'),
    [Input('search-button', 'n_clicks'), Input('surprise-me', 'n_clicks')],
    State('ingredients-input', 'value'),
    State('restrictions-input', 'value'),
    State('dietary-preferences-dropdown', 'value'),
    State('calorie-min', 'value'),
    State('calorie-max', 'value'),
    State('carbs-min', 'value'),
    State('carbs-max', 'value'),
    State('protein-min', 'value'),
    State('protein-max', 'value'),
    State('fat-min', 'value'),
    State('fat-max', 'value'),
    State('cooking-time-slider', 'value'),
    State('search-input', 'value')
)
def recommend_recipes(search_clicks, surprise_clicks, ingredients, restrictions, dietary_preferences,
                      cal_min, cal_max, prot_min, prot_max, fat_min, fat_max, carbs_min, carbs_max, 
                      max_time, max_results):

    # Check if no buttons have been clicked
    if not search_clicks and not surprise_clicks:
        return html.Div(
            "Enter your preferences and click 'Search Recipes.'", 
            style={'text-align': 'center', 'padding': '5px'}
        )
    
    # Identify which button triggered the callback
    ctx = callback_context
    button_id = ctx.triggered[0]['prop_id'].split('.')[0]

    # Proceed only if a button was actually clicked
    if button_id not in ['search-button', 'surprise-me']:
        return html.Div(
            "Enter your preferences and click 'Search Recipes.'", 
            style={'text-align': 'center', 'padding': '5px'}
        )

    # Connect to SQLite database
    conn = sqlite3.connect('recipes.db')
    query = "SELECT * FROM recipes"
    df = pd.read_sql(query, conn)

    if button_id == 'surprise-me':
        # "Surprise Me" button clicked
        df = get_random_recipes(df, max_results)
    else:
        # "Search Recipes" button clicked

        # 1. Filter by dietary preferences
        if dietary_preferences:
            df = apply_dietary_restrictions(dietary_preferences, df)

        # 2. Exclude restricted items
        if restrictions:
            df = exclude_restricted_items(restrictions, df)

        # 3. Filter by desired ingredients
        if ingredients:
            df = filter_by_preferences(ingredients, df)
        
        # 4. Apply numeric range filters
        df = filter_by_ranges(df, cal_min, cal_max, prot_min, prot_max, fat_min, fat_max, carbs_min, carbs_max, max_time)

        # Shuffle the DataFrame after applying filters
        df = df.sample(frac=1).reset_index(drop=True)  # Shuffle rows

        # Sort by reviews (rating) and limit results
        df = df.sort_values('rating', ascending=False).head(max_results)

    # Close database connection
    conn.close()

    # Handle empty results
    if df.empty:
        return html.Div(
            "No recipes matched your preferences!", 
            style={'text-align': 'center', 'padding': '5px', 'color': '#fc0345'}
        )

    return html.Div([
        html.Div([
            html.H3(row['title'], style={'color': '#4CAF50'}),
            html.P(f"Category: {row['category']}"),
            html.P(f"Calories: {row['calories']} | Protein: {row['protein']}g | Carbs: {row['carbs']}g | Fat: {row['fat']}g (Per Serving)"),
            html.P(f"Rating: {convert_rating_to_stars(row['rating'])}"),
            html.P(f"Cooking Time: {row['total_time_mins']} minutes"),
            # Format ingredients with each line starting with a number
            html.Div([
                html.Span("Ingredients:", style={'font-weight': 'bold'}),
                html.Ul([
                    html.Li(ingredient.strip()) 
                    for ingredient in row['ingredients'].split(', ') 
                    if ingredient.strip()[0].isdigit() or not ingredient.strip()[0].isdigit()
                ], style={'margin-top': '10px'})
            ]),
            html.P(f"Servings: {int(row['serving_info']) if pd.notnull(row['serving_info']) else 'N/A'}"),  
            html.A("View Recipe", href=row['recipe_link'], target="_blank", style={'color': '#4CAF50', 'text-decoration': 'underline'})
        ], style={'margin-bottom': '20px'})
        for _, row in df.iterrows()
    ])

if __name__ == '__main__':
    app.run_server(port=8060, debug=True)
