# Recipes kaggle link : 
https://www.kaggle.com/datasets/michau96/recipes-from-reddit

In [2]:
import pandas as pd

# Reading the dataframe :

In [3]:
df = pd.read_csv('Recipes.csv',encoding='latin')

In [4]:
df.head()

Unnamed: 0,date,num_comments,title,user,comment,n_char
0,2021-02-23,28,Easy Tiramisu (Raw-Egg-Free) Recipe,yvonnemeetsfood,"This is by no means ""authentic"" tiramisu in th...",2044
1,2021-02-23,45,I was inspired by the Thai Ramen posted here e...,BushyEyes,"Thank you for the great idea, u/oliveyoil!\r\n...",3490
2,2021-02-22,14,Homemade Cinnamon Rolls Recipe,pangibear,**Homemade Cinnamon Rolls**\r\n\r\nLayers of s...,3432
3,2021-02-22,1,Classic Creme Brulee at home with only 4 ingre...,Doesntpayfullprice,\r\n\r\nOne of my favorite at home dessert re...,1639
4,2021-02-22,19,Thai Ramen with Tumeric Shrimp,oliveyoil,I am obsessed with making different ramen dish...,1270


# The element to extract information from

In [6]:
df.comment[1]

'Thank you for the great idea, u/oliveyoil!\r\n\r\n**Recipe here originally:** [**Spicy Thai Shrimp Noodle Soup**](https://www.triedandtruerecipe.com/2021/02/23/spicy-thai-shrimp-noodle-soup/)\r\n\r\nThis spicy Thai shrimp noodle soup is an explosion of heat and rich flavor paired with beautifully charred greens.\r\n\r\nPrep Time: 15 minutes\r\n\r\nCook Time: 30 minutes\r\n\r\nTotal Time: 45 minutes\r\n\r\nServings: 4\r\n\r\nCalories: 542kcal\r\n\r\nAuthor: Kylie Perrotti\r\n\r\nEquipment\r\n\r\n* Large pot\r\n* Medium pot\r\n* Sheet pan\r\n* Strainer or sieve\r\n\r\nIngredientsBroth and Noodles:\r\n\r\n* 2 tablespoons neutral oil for frying\r\n* 1 yellow onion peeled and thinly sliced\r\n* 4 cloves garlic peeled and minced\r\n* 1-3 Thai chili peppers trimmed and minced (adjust quantity based on spice preference)\r\n* 4 scallions white parts minced, green parts thinly sliced on an angle\r\n* 2 tablespoons Thai curry paste red, green, or yellow\r\n* 6 cups chicken stock\r\n* 1 tablespoo

# Processing 

In [8]:
import pandas as pd
import google.generativeai as genai
from typing import Dict, List

def create_recipe_prompt(comment: str) -> str:
    """Creates a structured prompt for the LLM to extract recipe information."""
    return f"""Analyze this recipe and answer the following questions. Provide ONLY the answers in a simple format.
    If information is not available, respond with 'N/A'.

    Recipe text:
    {comment}

    Questions:
    1. What is the recipe name?
    2. What is the main category of this recipe (dessert, main dish, etc.)?
    3. What are the key ingredients (list up to 5 main ones)?
    4. How many total ingredients are listed?
    5. What is the preparation time mentioned (if any)?
    6. What is the difficulty level (easy, medium, hard) based on the instructions?
    7. Are there any dietary notes (vegetarian, vegan, etc.)?
    8. What special equipment is needed (if any)?
    9. What is the primary cooking method (baking, frying, no-cook, etc.)?
    10. Are there any specific temperature requirements mentioned?
    11. How is it prepared (steps to follow)?

    Format your response exactly like this:
    1. [Recipe Name]
    2. [Category]
    3. [Key Ingredients]
    4. [Total Ingredients Count]
    5. [Prep Time]
    6. [Difficulty]
    7. [Dietary Notes]
    8. [Equipment]
    9. [Cooking Method]
    10. [Temperature Requirements]
    11. [Preparing_method]
    """

def parse_llm_response(response: str) -> Dict:
    """Parses the LLM's response into a structured dictionary."""
    lines = response.strip().split('\n')
    keys = ['recipe_name', 'category', 'key_ingredients', 'ingredient_count', 
            'prep_time', 'difficulty', 'dietary_notes', 'equipment', 
            'cooking_method', 'temperature','Preparing_method']
    
    parsed_data = {}
    for i, line in enumerate(lines):
        if i < len(keys):
            value = line.split('. ')[1].strip('[]')
            parsed_data[keys[i]] = value
    
    return parsed_data

def process_recipes(df: pd.DataFrame, api_key: str, max_rows: int = 3) -> pd.DataFrame:
    """
    Process recipes using Google's Gemini Pro and return structured data.
    
    Args:
        df: Input DataFrame with recipe data
        api_key: Google API key for Gemini Pro
        max_rows: Maximum number of rows to process
    
    Returns:
        DataFrame with extracted recipe information
    """
    # Configure Gemini Pro
    genai.configure(api_key=api_key)
    model = genai.GenerativeModel('gemini-pro')
    
    # Store results
    results = []
    
    # Process each row
    for idx, row in df.head(max_rows).iterrows():
        # Create prompt for this recipe
        prompt = create_recipe_prompt(row['comment'])
        
        try:
            # Get response from Gemini
            response = model.generate_content(prompt)
            parsed_data = parse_llm_response(response.text)
            
            # Add original metadata
            parsed_data.update({
                'date': row['date'],
                'num_comments': row['num_comments'],
                'title': row['title'],
                'user': row['user']
            })
            
            results.append(parsed_data)
            
        except Exception as e:
            print(f"Error processing row {idx}: {e}")
            continue
    
    # Create new DataFrame with results
    results_df = pd.DataFrame(results)
    
    # Reorder columns to put original metadata first
    original_cols = ['date', 'num_comments', 'title', 'user']
    other_cols = [col for col in results_df.columns if col not in original_cols]
    results_df = results_df[original_cols + other_cols]
    
    return results_df

# Getting new df with new columns

In [9]:
# Your Google API key
api_key = 'YOUR_API_KEY'

# Process the recipes
results_df = process_recipes(df, api_key)

In [10]:
results_df.head(3)

Unnamed: 0,date,num_comments,title,user,recipe_name,category,key_ingredients,ingredient_count,prep_time,difficulty,dietary_notes,equipment,cooking_method,temperature,Preparing_method
0,2021-02-23,28,Easy Tiramisu (Raw-Egg-Free) Recipe,yvonnemeetsfood,Easy Tiramisu Recipe (no raw eggs!),Dessert,"Lady fingers, mascarpone, coffee, rum, cocoa p...",11,,Easy,,Hand mixer,No-cook,,See recipe method in the provided text
1,2021-02-23,45,I was inspired by the Thai Ramen posted here e...,BushyEyes,Spicy Thai Shrimp Noodle Soup,Main Dish,"Vermicelli noodles, chicken stock, shrimp, oni...",36,15 minutes,Easy,,"Sheet pan, strainer or sieve","Broiling, simmering",,"Cook broth aromatics, simmer broth, marinate s..."
2,2021-02-22,14,Homemade Cinnamon Rolls Recipe,pangibear,Homemade Cinnamon Rolls,Dessert,"Flour, Yeast, Milk, Brown Sugar, Cinnamon",14,,Easy,,"Stand mixer (Optional), Silicone baking mats (...",Baking,350 degrees Fahrenheit,1


# The retrievel

In [1]:
import pandas as pd
import google.generativeai as genai
from typing import Dict, List, Tuple, Optional
import json
from functools import lru_cache

class RecipeRetriever:
    def __init__(self, api_key: str, data_path: str):
        """
        Initialize the recipe retriever with API key and data path.
        
        Args:
            api_key: Google API key for Gemini Pro
            data_path: Path to the saved recipe DataFrame
        """
        # Configure Gemini
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel('gemini-pro')
        
        # Load and cache data
        self.df = pd.read_pickle(data_path)
        self.column_info = self._get_column_info()
        
    def _get_column_info(self) -> str:
        """Generate column information description."""
        columns = self.df.columns.tolist()
        sample_values = {col: self.df[col].iloc[0] for col in columns}
        
        return f"""DataFrame columns and sample values:
        {json.dumps({col: str(val)[:100] + '...' if len(str(val)) > 100 else str(val) 
                    for col, val in sample_values.items()}, indent=2)}"""

    @lru_cache(maxsize=128)
    def _get_filter_query(self, user_query: str) -> str:
        """
        Get filter query from LLM based on user question.
        Uses caching to avoid repeated API calls for similar questions.
        """
        prompt = f"""Based on this user question: "{user_query}"
        And these available columns: {self.column_info}
        
        Generate a Python dictionary with filtering conditions that would help answer the question.
        Use only exact match conditions or 'contains' string operations.
        
        Return ONLY a Python dictionary like this example:
        {{'column_name': 'value'}} for exact match
        or {{'column_name': {{'contains': 'value'}}}} for partial match
        
        If no relevant filters apply, return an empty dictionary {{}}.
        
        RESPOND WITH ONLY THE DICTIONARY, NO OTHER TEXT."""
        
        response = self.model.generate_content(prompt)
        
        return response.text.strip()

    def _apply_filters(self, filter_dict: Dict) -> pd.DataFrame:
        """Apply filters to DataFrame based on filter dictionary."""
        filtered_df = self.df.copy()
        
        if not filter_dict:
            return filtered_df.head(5)  # Return top 5 if no filters
            
        for col, condition in filter_dict.items():
            if col not in filtered_df.columns:
                continue
                
            if isinstance(condition, dict) and 'contains' in condition:
                filtered_df = filtered_df[filtered_df[col].str.contains(condition['contains'], 
                                                                      case=False, 
                                                                      na=False)]
            else:
                filtered_df = filtered_df[filtered_df[col] == condition]
        
        return filtered_df.head(5)  

    def _get_answer(self, user_query: str, filtered_data: pd.DataFrame) -> str:
        """Generate answer based on filtered data and user query."""
        if filtered_data.empty:
            return "I couldn't find any recipes matching your criteria."
        
        data_str = filtered_data.to_string()
        prompt = f"""Question: {user_query}

        Available recipe data:
        {data_str}

        Please provide a helpful answer based on this data. If the data doesn't contain enough information to answer the question properly, please say so.
        Focus on being accurate and concise. Include specific details from the recipes when relevant."""
        
        response = self.model.generate_content(prompt)
        return response.text.strip()

    def query(self, user_query: str) -> Tuple[str, Optional[pd.DataFrame]]:
        """
        Process user query and return answer with optional filtered DataFrame.
        
        Args:
            user_query: User's question about recipes
            
        Returns:
            Tuple of (answer string, filtered DataFrame or None)
        """
        try:
            # Get and parse filter conditions
            filter_str = self._get_filter_query(user_query)
            filter_dict = eval(filter_str)  # Convert string to dictionary
            
            # Apply filters
            filtered_df = self._apply_filters(filter_dict)
            
            # Generate answer
            answer = self._get_answer(user_query, filtered_df)
            
            return answer, filtered_df
            
        except Exception as e:
            return f"Sorry, I encountered an error: {str(e)}", None

# Function to save the DataFrame
def save_recipe_data(df: pd.DataFrame, path: str):
    """Save DataFrame to pickle file."""
    df.to_pickle(path)

# Testing 

In [39]:
save_recipe_data(results_df, 'recipe_data.pkl')

# Initialize the retriever
retriever = RecipeRetriever(api_key, 'recipe_data.pkl')

# Query the system
question = "suggest for me a list of Dessert"
answer, filtered_data = retriever.query(question)

print("Answer:", answer)
if filtered_data is not None:
    print("\nRelevant Recipes:")
    print(filtered_data[['title', 'recipe_name', 'difficulty']])

Answer: - Easy Tiramisu (Raw-Egg-Free) Recipe
- Homemade Cinnamon Rolls Recipe
- Creme Brulee
- Soft and Chewy Snickerdoodle Cookie Bars
- Almond Sandies

Relevant Recipes:
                                                title  \
0                 Easy Tiramisu (Raw-Egg-Free) Recipe   
2                      Homemade Cinnamon Rolls Recipe   
3   Classic Creme Brulee at home with only 4 ingre...   
8            Soft and Chewy Snickerdoodle Cookie Bars   
10                                     Almond Sandies   

                            recipe_name difficulty  
0   Easy Tiramisu Recipe (no raw eggs!)       Easy  
2               Homemade Cinnamon Rolls     Medium  
3                          Creme Brulee       Easy  
8                                   N/A       Easy  
10                       Almond Sandies       Easy  
