In [1]:
import networkx as nx 
import pandas as pd
import json
import ast

In [2]:
df = pd.DataFrame(pd.read_csv('recipe_details.csv'))

In [3]:
df.columns

Index(['item_name', 'info_json'], dtype='object')

In [6]:
details = ast.literal_eval(df['info_json'].iloc[1015])

In [7]:
details

{'Dietary Category': 'non-veg',
 'Specific Cuisine': 'Indonesian',
 'Ingredients': ['beef',
  'chicken',
  'pork',
  'fish balls',
  'noodles',
  'soy sauce',
  'oyster sauce',
  'garlic',
  'ginger'],
 'Preparation Time': 30}

In [8]:
def clean_and_parse_json(json_str):
    try:
        if "}{".encode() in json_str.encode():
            fixed_json_str = f"[{json_str.replace('}{', '},{')}]"
            parsed_data = json.loads(fixed_json_str)
            return parsed_data[0]
        else:
            return json.loads(json_str)
    except json.JSONDecodeError as e:
        return None  # Return None for invalid JSON


#### Creating the graph with each node having info related to the recipe including its:
1. Cuisine type
2. Dieteray Preference
3. Ingredients Needed
4. Preparation time for the dish

#### And the edges are created by playing on the weight of the edge or the importance of the relationship between the nodes. Similarity between every 2 nodes is accounted on the 4 factors mentioned in the details above


In [10]:
# Initialise the graph 
graph = nx.Graph()
valid_node_count = 0  

# Adding recipe nodes to the graph
for idx, row in df.iterrows():
    details = clean_and_parse_json(row["info_json"])
    if details:  
        try:
            graph.add_node(
                row["item_name"],
                ingredients=details["Ingredients"],
                cuisine=details["Specific Cuisine"],
                time_taken=details["Preparation Time"],
                dietary_preference=details.get("Dietary category"),
            )
            valid_node_count += 1
        except KeyError as e:
            print(f"Missing key in row {idx}: {e}")
            print(f"Row data: {details}")
    else:
        print(f"Error in row number {idx}")

print(f"total valid nodes: {valid_node_count}")

total valid nigga nodes: 1016


In [11]:
for i, row_i in df.iterrows():
    for j, row_j in df.iterrows():
        if i != j:
            try:
                details_i = clean_and_parse_json(row_i["info_json"])
                details_j = clean_and_parse_json(row_j["info_json"])

                if not details_i or not details_j:
                    raise ValueError(f"Unable to fix JSON for {row_i['item_name']} or {row_j['item_name']}")

            except (json.JSONDecodeError, ValueError) as e:
                print(f"JSON decode error for {row_i['item_name']} or {row_j['item_name']}: {e}")

                continue

            # Ingredient Similarity
            shared_ingredients = set(details_i.get("Ingredients", [])) & set(details_j.get("Ingredients", []))
            ingredient_similarity = len(shared_ingredients)

            # Similarity in the preparation time
            prep_time_i = int(details_i.get("Preparation Time", 0)) 
            prep_time_j = int(details_j.get("Preparation Time", 0)) 
            prep_time_similarity = abs(prep_time_i - prep_time_j) <= 10

            # Similarity in the cuising
            cuisine_similarity = details_i.get("Specific Cuisine", "") == details_j.get("Specific Cuisine", "")

            # Similarity in the dietary preferences
            dietary_similarity = details_i.get("Dietary category", "") == details_j.get("Dietary category", "")

            # Calculate similarity score
            similarity_score = (
                ingredient_similarity +
                (1 if prep_time_similarity else 0) +
                (1 if cuisine_similarity else 0) +
                (1 if dietary_similarity else 0)
            )

            # Add edge to graph if similarity score > 0
            if similarity_score > 0:
                graph.add_edge(
                    row_i["item_name"], 
                    row_j["item_name"],
                    weight=similarity_score  
                )

# Output graph edges with weights
for u, v, attrs in graph.edges(data=True):
    print(f"Edge between {u} and {v}, Weight: {attrs['weight']}")

Edge between Air Fryer Recipes and Allrecipes Allstar Recipes, Weight: 4
Edge between Air Fryer Recipes and Angel Food Cakes, Weight: 2
Edge between Air Fryer Recipes and Antipasti, Weight: 2
Edge between Air Fryer Recipes and Appetizers and Snacks, Weight: 2
Edge between Air Fryer Recipes and Apple Pie, Weight: 1
Edge between Air Fryer Recipes and Applesauce, Weight: 2
Edge between Air Fryer Recipes and Artichoke Dips, Weight: 4
Edge between Air Fryer Recipes and Bagels, Weight: 2
Edge between Air Fryer Recipes and Baked Beans, Weight: 2
Edge between Air Fryer Recipes and Banana Breads, Weight: 2
Edge between Air Fryer Recipes and Bar Cookies, Weight: 2
Edge between Air Fryer Recipes and Beef Recipes, Weight: 3
Edge between Air Fryer Recipes and Beef Stews, Weight: 1
Edge between Air Fryer Recipes and Beef Stroganoff, Weight: 3
Edge between Air Fryer Recipes and Beef Tenderloin, Weight: 2
Edge between Air Fryer Recipes and Biscotti, Weight: 1
Edge between Air Fryer Recipes and Biscuit

#### Once the graph is ready, we can start finding relevant nodes from the graph based on the user preferences selected in the preferences section in the frontend

In [21]:
import networkx as nx
import pandas as pd
import json

class RecipeRecommendationSystem:
    def __init__(self, dataframe):
        self.df = dataframe
        self.graph = nx.Graph()
        self._clean_and_parse_json = self._create_json_cleaner()
        self._build_recipe_graph()

    def _create_json_cleaner(self):
        def clean_and_parse_json(json_str):
            try:
                # Remove any leading/trailing whitespace and newlines
                json_str = json_str.strip()
                
                # Handle potential encoding issues
                json_str = json_str.encode('utf-8', errors='ignore').decode('utf-8')
                
                # Parse JSON
                details = json.loads(json_str)
                
                # Normalize keys
                details = {k.replace(' ', '_').lower(): v for k, v in details.items()}
                
                return details
            except (json.JSONDecodeError, AttributeError) as e:
                print(f"JSON parsing error: {e}")
                return None
        
        return clean_and_parse_json

    def _build_recipe_graph(self):
        for i, row_i in self.df.iterrows():
            for j, row_j in self.df.iterrows():
                if i != j:
                    try:
                        details_i = self._clean_and_parse_json(row_i["info_json"])
                        details_j = self._clean_and_parse_json(row_j["info_json"])

                        if not details_i or not details_j:
                            continue

                        # Ingredient Similarity
                        shared_ingredients = set(details_i.get("ingredients", [])) & set(details_j.get("ingredients", []))
                        ingredient_similarity = len(shared_ingredients)

                        # Preparation Time Similarity
                        prep_time_i = int(details_i.get("preparation_time", 0)) 
                        prep_time_j = int(details_j.get("preparation_time", 0)) 
                        prep_time_similarity = abs(prep_time_i - prep_time_j) <= 10

                        # Cuisine Similarity
                        cuisine_similarity = details_i.get("specific_cuisine", "") == details_j.get("specific_cuisine", "")

                        # Dietary Similarity
                        dietary_similarity = details_i.get("dietary_category", "") == details_j.get("dietary_category", "")

                        # Calculate Similarity Score
                        similarity_score = (
                            ingredient_similarity +
                            (1 if prep_time_similarity else 0) +
                            (1 if cuisine_similarity else 0) +
                            (1 if dietary_similarity else 0)
                        )

                        # Add Edge if Similarity Score > 0
                        if similarity_score > 0:
                            # Store recipe details as node attributes
                            self.graph.add_node(row_i["item_name"], **details_i)
                            self.graph.add_node(row_j["item_name"], **details_j)
                            
                            self.graph.add_edge(
                                row_i["item_name"], 
                                row_j["item_name"],
                                weight=similarity_score  
                            )

                    except Exception as e:
                        print(f"Error processing {row_i['item_name']} or {row_j['item_name']}: {e}")

    def recommend_recipes(self, user_preferences, top_n=5):
        recommendations = []
        
        for recipe_name in self.graph.nodes():
            try:
                recipe_data = self.graph.nodes[recipe_name]
                
                # Calculate Matching Scores
                ingredient_match = len(
                    set(user_preferences.get('ingredients', [])) & 
                    set(recipe_data.get('ingredients', []))
                ) / len(set(recipe_data.get('ingredients', [])))
                
                cuisine_match = user_preferences.get('cuisine', '') == recipe_data.get('specific_cuisine', '')
                dietary_match = user_preferences.get('dietary_preference', '') == recipe_data.get('dietary_category', '')
                
                time_match = abs(
                    user_preferences.get('available_time', 0) - 
                    recipe_data.get('preparation_time', 0)
                ) <= 15

                # Calculate Final Score
                final_score = (
                    0.4 * ingredient_match + 
                    0.3 * cuisine_match + 
                    0.2 * dietary_match + 
                    0.1 * time_match
                ) * 100

                recommendations.append({
                    'recipe_name': recipe_name,
                    'match_score': round(final_score, 2),
                    'details': {
                        'ingredients_match': f"{ingredient_match * 100:.1f}%",
                        'cuisine_match': f"{cuisine_match * 100:.1f}%",
                        'dietary_match': f"{dietary_match * 100:.1f}%",
                        'time_match': f"{time_match * 100:.1f}%",
                        'recipe_details': recipe_data
                    }
                })
            except Exception as e:
                print(f"Error processing recommendation for {recipe_name}: {e}")
        
        # Sort and return top recommendations
        return sorted(recommendations, key=lambda x: x['match_score'], reverse=True)[:top_n]

# Example Usage
def main():
    # Load your DataFrame
    df = pd.read_csv('recipe_details.csv')
    
    # Initialize Recommendation System
    recommender = RecipeRecommendationSystem(df)
    
    # Generate Recommendations
    user_preferences = {
        'ingredients': ['chicken', 'onion'],
        'cuisine': 'Indonesian',
        'dietary_preference': 'non-veg',
        'available_time': 30
    }
    
    recommendations = recommender.recommend_recipes(user_preferences)
    
    # Print Recommendations
    for rec in recommendations:
        print(f"Recipe: {rec['recipe_name']}")
        print(f"Match Score: {rec['match_score']}%")
        print("Matching Details:")
        for key, value in rec['details'].items():
            print(f"  {key}: {value}")
        print("\n")

if __name__ == '__main__':
    main()

Recipe: Nasi Goreng
Match Score: 68.0%
Matching Details:
  ingredients_match: 20.0%
  cuisine_match: 100.0%
  dietary_match: 100.0%
  time_match: 100.0%
  recipe_details: {'dietary_category': 'non-veg', 'specific_cuisine': 'Indonesian', 'ingredients': ['rice', 'chicken', 'vegetables', 'soy sauce', 'kecap manis'], 'preparation_time': 30}


Recipe: Mie Ayam
Match Score: 68.0%
Matching Details:
  ingredients_match: 20.0%
  cuisine_match: 100.0%
  dietary_match: 100.0%
  time_match: 100.0%
  recipe_details: {'dietary_category': 'non-veg', 'specific_cuisine': 'Indonesian', 'ingredients': ['noodles', 'chicken', 'soy sauce', 'garlic', 'ginger'], 'preparation_time': 30}


Recipe: Mie Kocok
Match Score: 68.0%
Matching Details:
  ingredients_match: 20.0%
  cuisine_match: 100.0%
  dietary_match: 100.0%
  time_match: 100.0%
  recipe_details: {'dietary_category': 'non-veg', 'specific_cuisine': 'Indonesian', 'ingredients': ['noodles', 'chicken', 'vegetables', 'soy sauce', 'oyster sauce'], 'preparati

In [19]:
# def validate_recipe(recipe_name, recipe_data):
#     """Validate if a recipe is properly formatted and complete."""
#     generic_names = {'dinner', 'lunch', 'breakfast', 'meal', 'food', 'snack'}
#     if recipe_name.lower() in generic_names:
#         return False
    
#     required_fields = {
#         'ingredients': lambda x: isinstance(x, list) and len(x) >= 2,
#         'time_taken': lambda x: str(x).isdigit() and int(x) > 0,
#         'cuisine': lambda x: isinstance(x, str) and len(x) > 0
#     }
    
#     return all(
#         field in recipe_data and check(recipe_data[field])
#         for field, check in required_fields.items()
#     )



# def calculate_ingredient_match_score(available_ingredients, recipe_ingredients):
#     """Calculate how many of the recipe ingredients are available to the user."""
#     available_set = set(ingredient.lower() for ingredient in available_ingredients)
#     recipe_set = set(ingredient.lower() for ingredient in recipe_ingredients)
#     matching_ingredients = recipe_set.intersection(available_set)
#     return len(matching_ingredients) / len(recipe_set) if recipe_set else 0

# def calculate_time_match_score(available_time, recipe_time):
#     """Calculate how well the recipe time matches user's available time."""
#     try:
#         recipe_time = int(recipe_time)
#         if recipe_time <= available_time:
#             # Perfect if recipe takes less time than available
#             return 1.0
#         else:
#             # Decreasing score based on how much longer it takes
#             return max(0, 1 - (recipe_time - available_time) / available_time)
#     except (ValueError, TypeError):
#         return 0

# def calculate_cuisine_match_score(preferred_cuisine, recipe_cuisine):
#     """Calculate cuisine match score."""
#     if not preferred_cuisine or not recipe_cuisine:
#         return 0.5
#     return 1.0 if preferred_cuisine.lower() == recipe_cuisine.lower() else 0

# def calculate_dietary_match_score(dietary_preferences, recipe_dietary):
#     """Calculate dietary preference match score."""
#     if not dietary_preferences or not recipe_dietary:
#         return 0.5
#     return 1.0 if dietary_preferences.lower() == recipe_dietary.lower() else 0

# def recommend_recipes(graph, user_preferences, top_n=10):
#     """
#     Recommend recipes based on user preferences and graph structure.
    
#     Parameters:
#     - graph: NetworkX graph containing recipe nodes
#     - user_preferences: dict containing:
#         - available_ingredients: list of ingredients user has
#         - available_time: int (minutes)
#         - preferred_cuisine: str
#         - dietary_preferences: str
#     - top_n: number of recommendations to return
    
#     Returns:
#     - list of tuples (recipe_name, score, details)
#     """
#     recommendations = []
    
#     # Weights for different factors
#     weights = {
#         'ingredients': 0.5,
#         'time': 0.3,
#         'cuisine': 0.1,
#         'dietary': 0.1
#     }
    
#     for recipe_name, recipe_data in graph.nodes(data=True):
#         try:
#             # Calculate individual scores
#             ingredient_score = calculate_ingredient_match_score(
#                 user_preferences['available_ingredients'],
#                 recipe_data.get('ingredients', [])
#             )
            
#             time_score = calculate_time_match_score(
#                 user_preferences['available_time'],
#                 recipe_data.get('time_taken', 0)
#             )
            
#             cuisine_score = calculate_cuisine_match_score(
#                 user_preferences['preferred_cuisine'],
#                 recipe_data.get('cuisine', '')
#             )
            
#             dietary_score = calculate_dietary_match_score(
#                 user_preferences['dietary_preferences'],
#                 recipe_data.get('dietary_preference', '')
#             )
            
#             # Calculate weighted total score
#             total_score = (
#                 weights['ingredients'] * ingredient_score +
#                 weights['time'] * time_score +
#                 weights['cuisine'] * cuisine_score +
#                 weights['dietary'] * dietary_score
#             )
            
#             # Get connected recipes (similar recipes) from graph
#             connected_recipes = list(graph.neighbors(recipe_name))
#             connectivity_bonus = len(connected_recipes) * 0.05  # Small bonus for well-connected recipes
            
#             final_score = total_score + min(connectivity_bonus, 0.2)  # Cap the bonus at 0.2
            
#             # Store recommendation with details
#             recommendation_details = {
#                 'ingredients_score': round(ingredient_score, 2),
#                 'time_score': round(time_score, 2),
#                 'cuisine_score': round(cuisine_score, 2),
#                 'dietary_score': round(dietary_score, 2),
#                 'connectivity_score': round(connectivity_bonus, 2),
#                 'recipe_data': recipe_data
#             }
            
#             recommendations.append((recipe_name, final_score, recommendation_details))
            
#         except Exception as e:
#             print(f"Error processing recipe {recipe_name}: {e}")
#             continue
    
#     # Sort by score and get top N recommendations
#     recommendations.sort(key=lambda x: x[1], reverse=True)
#     return recommendations[:top_n]

# # Example usage:
# def get_personalized_recommendations(graph, available_ingredients, available_time, 
#                                    preferred_cuisine=None, dietary_preferences=None):
#     """
#     Wrapper function to get personalized recommendations with simplified input.
#     """
#     user_preferences = {
#         'available_ingredients': available_ingredients,
#         'available_time': available_time,
#         'preferred_cuisine': preferred_cuisine,
#         'dietary_preferences': dietary_preferences
#     }
    
#     recommendations = recommend_recipes(graph, user_preferences)
    
#     # Format and return results
#     formatted_recommendations = []
#     for recipe_name, score, details in recommendations:
#         formatted_recommendations.append({
#             'recipe_name': recipe_name,
#             'match_score': round(score * 100, 1),
#             'details': {
#                 'ingredients_match': f"{details['ingredients_score'] * 100:.1f}%",
#                 'time_match': f"{details['time_score'] * 100:.1f}%",
#                 'cuisine_match': f"{details['cuisine_score'] * 100:.1f}%",
#                 'dietary_match': f"{details['dietary_score'] * 100:.1f}%",
#                 'recipe_time': details['recipe_data'].get('time_taken', 'Unknown'),
#                 'cuisine': details['recipe_data'].get('cuisine', 'Not specified'),
#                 'dietary_type': details['recipe_data'].get('dietary_preference', 'Not specified'),
#                 'required_ingredients': details['recipe_data'].get('ingredients', [])
#             }
#         })
    
#     return formatted_recommendations

In [16]:
recommendations = get_personalized_recommendations(
    graph=graph,
    available_ingredients=['chicken','lemon', 'onion' ],
    available_time=30,
    preferred_cuisine='Italian',
    dietary_preferences='Non-vegetarian'
)

In [17]:
recommendations

[{'recipe_name': 'Quesadillas',
  'match_score': 75.0,
  'details': {'ingredients_match': '40.0%',
   'time_match': '100.0%',
   'cuisine_match': '0.0%',
   'dietary_match': '50.0%',
   'recipe_time': 20,
   'cuisine': 'Mexican',
   'dietary_type': None,
   'required_ingredients': ['tortilla',
    'chicken',
    'onion',
    'cheese',
    'salsa']}},
 {'recipe_name': 'Dinner',
  'match_score': 73.3,
  'details': {'ingredients_match': '17.0%',
   'time_match': '100.0%',
   'cuisine_match': '100.0%',
   'dietary_match': '50.0%',
   'recipe_time': 30,
   'cuisine': 'Italian',
   'dietary_type': None,
   'required_ingredients': ['chicken',
    'onions',
    'garlic',
    'olive oil',
    'tomato sauce',
    'pasta']}},
 {'recipe_name': 'Ravioli',
  'match_score': 73.3,
  'details': {'ingredients_match': '17.0%',
   'time_match': '100.0%',
   'cuisine_match': '100.0%',
   'dietary_match': '50.0%',
   'recipe_time': 30,
   'cuisine': 'Italian',
   'dietary_type': None,
   'required_ingredien

#### Generating a tailored message for the user giving him/her recipe recommendations. The insights are passed through an LLM call for the response to be more structured and defined


In [None]:
import cohere  # type: ignore

co = cohere.ClientV2("Q8mY5CRKzgiNqzIMjeT1Ed24tSukj0bFcn8lbtp7")

def get_structured_response(recipe_insights):

    system_message = system_message = """ You are an expert Chef who has been in the Cullinary Industry for over 30 years.
                        I want you to structure the recipe insights into a well-structured paragraoh(not too long) and also
                          provide precise steps to prepare the dish in the stipulated time period mentioned  ."""

    messages= [{'role': 'system', 'content': system_message},
               {'role': 'user', 'content': recipe_insights}]
    
    response= co.chat(model='command-r-plus-08-2024', messages=messages)
    return response.message.content[0].text
