In [None]:
import re
import pickle
import warnings
import logging
from collections import Counter, defaultdict
from typing import List, Dict, Tuple, Optional, Union

# Data Science & Plotting
import pandas as pd
import numpy as np
from scipy import stats
from scipy.stats import spearmanr, kendalltau

# Machine Learning & NLP
import torch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

# NLTK
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer, PorterStemmer

# GUI (Tkinter)
import tkinter as tk
from tkinter import ttk, scrolledtext, messagebox
from tkinter import font as tkfont

# Configuration & Downloads
warnings.filterwarnings('ignore')

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# NLTK Downloads - with check to avoid re-downloading
def download_nltk_resources():
    """Download required NLTK resources if not already present."""
    resources = [
        ('tokenizers/punkt', 'punkt'),
        ('corpora/stopwords', 'stopwords'),
        ('corpora/wordnet', 'wordnet'),
        ('taggers/averaged_perceptron_tagger', 'averaged_perceptron_tagger'),
        ('tokenizers/punkt_tab', 'punkt_tab')
    ]
    
    for path, name in resources:
        try:
            nltk.data.find(path)
        except LookupError:
            print(f"Downloading {name}...")
            nltk.download(name, quiet=True)

download_nltk_resources()

# Test mode
TEST_MODE = True       # Set to False for full dataset
SAMPLE_SIZE = 5000     # Number of recipes when TEST_MODE is True

# =============================================================================
# DATA LOADING
# =============================================================================

print("=" * 70)
print("LOADING DATA")
print("=" * 70)

recipes_df = pd.read_csv("RAW_recipes.csv")
interactions_df = pd.read_csv("RAW_interactions.csv")
if TEST_MODE:
    recipes_df = recipes_df.sample(n=min(SAMPLE_SIZE, len(recipes_df)), random_state=42)
    interactions_df = interactions_df[interactions_df['recipe_id'].isin(recipes_df['id'])]
print(f"Recipes: {len(recipes_df):,} | Interactions: {len(interactions_df):,}")


print(f"Recipes dataset: {recipes_df.shape[0]:,} recipes, {recipes_df.shape[1]} features")
print(f"Interactions dataset: {interactions_df.shape[0]:,} interactions, {interactions_df.shape[1]} features")

# Preview recipe structure
print("\nRecipe columns:", list(recipes_df.columns))
print("\nSample recipe entry:")
print(recipes_df.iloc[0])

# =============================================================================
# DATA MERGING
# =============================================================================

print("\n" + "=" * 70)
print("MERGING DATA")
print("=" * 70)

# Join recipes with user interactions
merged_data = recipes_df.merge(
    interactions_df,
    how="inner",
    left_on="id",
    right_on="recipe_id"
)

# Remove duplicate column after merge
if "recipe_id" in merged_data.columns:
    merged_data = merged_data.drop(columns=["recipe_id"])

print(f"Merged dataset size: {merged_data.shape[0]:,} rows, {merged_data.shape[1]} columns")

# Check for missing data
null_summary = merged_data.isnull().sum()
null_present = null_summary[null_summary > 0]
print("\nColumns with missing values:")
if len(null_present) > 0:
    for col, count in null_present.items():
        pct = (count / len(merged_data)) * 100
        print(f"  {col}: {count:,} ({pct:.2f}%)")
else:
    print("  None found")

# =============================================================================
# NUTRITIONAL DATA EXTRACTION
# =============================================================================

print("\n" + "=" * 70)
print("EXTRACTING NUTRITIONAL DATA")
print("=" * 70)

nutrition_columns = ["calories", "total_fat", "sugar", "sodium", "protein", "saturated_fat", "carbohydrates"]

def parse_nutrition_string(nutrition_str: str) -> List[float]:
    """Extract numerical values from nutrition string."""
    try:
        cleaned = str(nutrition_str).strip("[]")
        values = [float(v.strip()) for v in cleaned.split(",")]
        if len(values) != 7:
            logger.warning(f"Unexpected nutrition values count: {len(values)}")
            return [np.nan] * 7
        return values
    except (ValueError, AttributeError) as e:
        logger.debug(f"Failed to parse nutrition string: {nutrition_str[:50]}... Error: {e}")
        return [np.nan] * 7

# Apply parsing and create new columns
nutrition_values = merged_data["nutrition"].apply(parse_nutrition_string)
nutrition_df = pd.DataFrame(nutrition_values.tolist(), columns=nutrition_columns, index=merged_data.index)

# Merge with dataframe
merged_data = pd.concat([merged_data, nutrition_df], axis=1)

print("Nutritional features added:")
print(merged_data[nutrition_columns].describe().round(2))

# =============================================================================
# RECIPE CHARACTERISTICS ANALYSIS
# =============================================================================

print("\n" + "=" * 70)
print("ANALYZING RECIPE CHARACTERISTICS")
print("=" * 70)

# Get unique recipes for analysis
unique_recipes = merged_data.drop_duplicates(subset=["id"])
print(f"Unique recipes in dataset: {len(unique_recipes):,}")

# =============================================================================
# INGREDIENT ANALYSIS
# =============================================================================

def extract_ingredients_list(ing_string: str) -> List[str]:
    """Parse ingredient string into list."""
    try:
        cleaned = str(ing_string).replace("[", "").replace("]", "").replace("'", "")
        ingredients = [i.strip().lower() for i in cleaned.split(",") if i.strip()]
        return ingredients
    except (AttributeError, TypeError):
        return []

# Extract all ingredients
all_ingredients = []
for ing_str in unique_recipes["ingredients"]:
    all_ingredients.extend(extract_ingredients_list(ing_str))

# Count frequency
ingredient_counts = Counter(all_ingredients)
top_ingredients = pd.DataFrame(
    ingredient_counts.most_common(20),
    columns=["ingredient", "count"]
)

print("\nTop 20 most common ingredients:")
print(top_ingredients.to_string(index=False))

# =============================================================================
# TAG ANALYSIS
# =============================================================================

def extract_tags_list(tag_string: str) -> List[str]:
    """Parse tag string into list."""
    try:
        cleaned = str(tag_string).replace("[", "").replace("]", "").replace("'", "")
        tags = [t.strip().lower() for t in cleaned.split(",") if t.strip()]
        return tags
    except (AttributeError, TypeError):
        return []

# Extract all tags
all_tags = []
for tag_str in unique_recipes["tags"]:
    all_tags.extend(extract_tags_list(tag_str))

# Count frequency
tag_counts = Counter(all_tags)
top_tags = pd.DataFrame(
    tag_counts.most_common(30),
    columns=["tag", "frequency"]
)

print("\nTop 30 most common tags:")
print(top_tags.to_string(index=False))

# Semantic tags for analysis
semantic_tags = ["comfort-food", "healthy", "quick", "easy", "romantic", "vegetarian", 
                 "low-carb", "dessert", "dinner", "lunch", "breakfast", "italian",
                 "mexican", "asian", "mediterranean", "summer", "winter", "holiday"]

print("\nSemantic tags present in dataset:")
for tag in semantic_tags:
    count = tag_counts.get(tag, 0)
    if count > 0:
        print(f"  ✓ {tag}: {count:,} recipes")

# =============================================================================
# CORPUS CREATION
# =============================================================================

print("\n" + "=" * 70)
print("CREATING SEARCH CORPUS")
print("=" * 70)

# Create corpus dataframe with unique recipes
corpus = unique_recipes[["id", "name", "tags", "description", "ingredients", "steps"]].copy()

# Clean each text field
text_columns = ["name", "tags", "description", "ingredients", "steps"]

for col in text_columns:
    corpus[col + "_clean"] = (
        corpus[col]
        .fillna("")
        .astype(str)
        .str.replace("[", "", regex=False)
        .str.replace("]", "", regex=False)
        .str.replace("'", "", regex=False)
        .str.replace('"', "", regex=False)
        .str.lower()
        .str.replace(r"\s+", " ", regex=True)
        .str.strip()
    )
    print(f"  Cleaned: {col}")

# Combine all text into single document per recipe
corpus["document"] = (
    corpus["name_clean"] + " " +
    corpus["tags_clean"] + " " +
    corpus["description_clean"] + " " +
    corpus["ingredients_clean"] + " " +
    corpus["steps_clean"]
)

# Final cleaning
corpus["document"] = (
    corpus["document"]
    .str.replace(r"\s+", " ", regex=True)
    .str.strip()
)

# Add word count
corpus["word_count"] = corpus["document"].str.split().str.len()

# Remove recipes with empty documents
initial_count = len(corpus)
corpus = corpus[corpus["document"].str.len() > 10].copy()
print(f"\nRemoved {initial_count - len(corpus)} recipes with insufficient text")
print(f"Final corpus size: {len(corpus):,} recipes")

# =============================================================================
# EXPORT CORPUS
# =============================================================================

print("\n" + "=" * 70)
print("EXPORTING CORPUS")
print("=" * 70)

# Save full corpus with metadata
corpus_export = corpus[["id", "name", "tags_clean", "document", "word_count"]].copy()
corpus_export.columns = ["recipe_id", "recipe_name", "tags", "document", "word_count"]
corpus_export.to_csv("search_corpus.csv", index=False)
print(f"Saved corpus to 'search_corpus.csv'")

# Save recipe metadata
metadata = unique_recipes[["id", "name", "minutes", "n_ingredients", "n_steps", "description"]].copy()
metadata.columns = ["recipe_id", "recipe_name", "cooking_time", "num_ingredients", "num_steps", "description"]
metadata.to_csv("recipe_metadata.csv", index=False)
print(f"Saved metadata to 'recipe_metadata.csv'")

# =============================================================================
# CORPUS QUALITY CHECK
# =============================================================================

print("\n" + "=" * 70)
print("CORPUS QUALITY CHECK")
print("=" * 70)

test_queries = [
    "comfort food",
    "healthy dinner",
    "quick breakfast",
    "romantic dinner",
    "vegetarian lunch",
    "low carb",
    "summer dessert",
    "holiday cookies"
]

print("Checking corpus coverage for potential search queries:\n")
for query in test_queries:
    query_terms = query.lower().split()
    mask = pd.Series([True] * len(corpus), index=corpus.index)
    for term in query_terms:
        mask = mask & corpus["document"].str.contains(term, regex=False)
    matches = mask.sum()
    print(f"  '{query}': {matches:,} potential matches")

# =============================================================================
# LOAD CORPUS FOR SEARCH ENGINES
# =============================================================================

print("\n" + "=" * 70)
print("LOADING CORPUS FOR SEARCH ENGINES")
print("=" * 70)

corpus_df = pd.read_csv("search_corpus.csv")
metadata_df = pd.read_csv("recipe_metadata.csv")

print(f"Loaded {len(corpus_df):,} recipes")
print(f"Average document length: {corpus_df['word_count'].mean():.0f} words")

# =============================================================================
# TEXT PREPROCESSING CLASS
# =============================================================================

class TextPreprocessor:
    """
    Advanced text preprocessor for recipe documents.
    Handles tokenization, stopword removal, and lemmatization.
    """
    
    def __init__(self, 
                 remove_stopwords: bool = True, 
                 use_lemmatization: bool = True,
                 use_stemming: bool = False,
                 min_word_length: int = 2,
                 custom_stopwords: Optional[set] = None):
        """
        Initialize preprocessor with configuration options.
        
        Args:
            remove_stopwords: Whether to remove English stopwords
            use_lemmatization: Whether to apply lemmatization
            use_stemming: Whether to apply stemming (alternative to lemmatization)
            min_word_length: Minimum word length to keep
            custom_stopwords: Additional domain-specific stopwords
        """
        self.remove_stopwords = remove_stopwords
        self.use_lemmatization = use_lemmatization
        self.use_stemming = use_stemming
        self.min_word_length = min_word_length
        
        # Initialize NLTK tools
        self.lemmatizer = WordNetLemmatizer() if use_lemmatization else None
        self.stemmer = PorterStemmer() if use_stemming else None
        
        # Build stopword set
        self.stopwords = set(stopwords.words('english')) if remove_stopwords else set()
        
        # Add recipe-specific stopwords
        recipe_stopwords = {
            'cup', 'cups', 'tablespoon', 'tablespoons', 'teaspoon', 'teaspoons',
            'tbsp', 'tsp', 'oz', 'ounce', 'ounces', 'pound', 'pounds', 'lb', 'lbs',
            'inch', 'inches', 'minute', 'minutes', 'hour', 'hours',
            'medium', 'large', 'small', 'fresh', 'chopped', 'minced', 'diced',
            'add', 'place', 'put', 'make', 'use', 'take', 'get', 'set',
            'recipe', 'recipes', 'ingredient', 'ingredients', 'step', 'steps',
            'one', 'two', 'three', 'four', 'five', 'six', 'time', 'preparation',
            'optional', 'needed', 'taste', 'degree', 'degrees'
        }
        self.stopwords.update(recipe_stopwords)
        
        if custom_stopwords:
            self.stopwords.update(custom_stopwords)
    
    def preprocess(self, text: str) -> str:
        """
        Full preprocessing pipeline for a single document.
        
        Args:
            text: Raw text string
            
        Returns:
            Preprocessed text string
        """
        if pd.isna(text) or not isinstance(text, str) or not text.strip():
            return ""
        
        # Convert to lowercase
        text = text.lower()
        
        # Remove special characters and digits
        text = re.sub(r'[^a-zA-Z\s]', ' ', text)
        
        # Tokenize
        try:
            tokens = word_tokenize(text)
        except Exception as e:
            logger.debug(f"Tokenization failed: {e}")
            tokens = text.split()
        
        # Process tokens
        processed_tokens = []
        for token in tokens:
            if len(token) < self.min_word_length:
                continue
            
            if self.remove_stopwords and token in self.stopwords:
                continue
            
            if self.use_lemmatization and self.lemmatizer:
                token = self.lemmatizer.lemmatize(token, pos='v')
                token = self.lemmatizer.lemmatize(token, pos='n')
            elif self.use_stemming and self.stemmer:
                token = self.stemmer.stem(token)
            
            processed_tokens.append(token)
        
        return ' '.join(processed_tokens)
    
    def preprocess_batch(self, texts: Union[List[str], pd.Series], show_progress: bool = True) -> List[str]:
        """
        Preprocess a batch of documents.
        
        Args:
            texts: Iterable of text strings
            show_progress: Whether to show progress updates
            
        Returns:
            List of preprocessed text strings
        """
        # Convert to list to ensure len() works
        if isinstance(texts, pd.Series):
            texts = texts.tolist()
        else:
            texts = list(texts)
        
        processed = []
        total = len(texts)
        
        iterator = tqdm(texts, desc="Preprocessing", disable=not show_progress)
        for text in iterator:
            processed.append(self.preprocess(text))
        
        return processed


# =============================================================================
# TF-IDF SEARCH ENGINE CLASS
# =============================================================================

class TFIDFSearchEngine:
    """
    TF-IDF based recipe search engine.
    Supports both unigrams and n-grams for better phrase matching.
    """
    
    def __init__(self, 
                 ngram_range: Tuple[int, int] = (1, 2),
                 max_features: int = 50000,
                 min_df: int = 2,
                 max_df: float = 0.95,
                 sublinear_tf: bool = True):
        """
        Initialize the TF-IDF search engine.
        """
        self.ngram_range = ngram_range
        self.max_features = max_features
        
        self.vectorizer = TfidfVectorizer(
            ngram_range=ngram_range,
            max_features=max_features,
            min_df=min_df,
            max_df=max_df,
            sublinear_tf=sublinear_tf,
            dtype=np.float32
        )
        
        self.preprocessor = TextPreprocessor(
            remove_stopwords=True,
            use_lemmatization=True,
            use_stemming=False
        )
        
        self.tfidf_matrix = None
        self.document_ids = None
        self.id_to_index: Dict[int, int] = {}  # Fast lookup
        self.is_fitted = False
        
    def fit(self, documents: Union[List[str], pd.Series], 
            document_ids: Optional[List] = None, 
            preprocess: bool = True) -> None:
        """
        Fit the TF-IDF model on a corpus of documents.
        """
        print("    Fitting TF-IDF model...")
        
        # Convert to list
        if isinstance(documents, pd.Series):
            documents = documents.tolist()
        else:
            documents = list(documents)
        
        # Store document IDs
        if document_ids is not None:
            self.document_ids = list(document_ids)
        else:
            self.document_ids = list(range(len(documents)))
        
        # Build fast lookup dictionary
        self.id_to_index = {doc_id: idx for idx, doc_id in enumerate(self.document_ids)}
        
        # Preprocess documents
        if preprocess:
            print("    Preprocessing documents...")
            processed_docs = self.preprocessor.preprocess_batch(documents, show_progress=True)
        else:
            processed_docs = documents
        
        # Fit and transform TF-IDF
        print("    Computing TF-IDF matrix...")
        self.tfidf_matrix = self.vectorizer.fit_transform(processed_docs)
        
        self.is_fitted = True
        
        # Report statistics
        vocab_size = len(self.vectorizer.vocabulary_)
        n_docs = self.tfidf_matrix.shape[0]
        sparsity = 1.0 - (self.tfidf_matrix.nnz / (n_docs * vocab_size))
        
        print(f"\n    TF-IDF Model Statistics:")
        print(f"        Documents: {n_docs:,}")
        print(f"        Vocabulary size: {vocab_size:,}")
        print(f"        N-gram range: {self.ngram_range}")
        print(f"        Matrix shape: {self.tfidf_matrix.shape}")
        print(f"        Matrix sparsity: {sparsity:.2%}")
        print(f"        Non-zero elements: {self.tfidf_matrix.nnz:,}")
        
    def search(self, query: str, top_k: int = 10, preprocess: bool = True) -> List[Tuple[int, float]]:
        """
        Search for recipes matching a query.
        
        Returns:
            List of tuples (document_id, similarity_score)
        """
        if not self.is_fitted:
            raise ValueError("Model not fitted. Call fit() first.")
        
        # Validate query
        if not query or not query.strip():
            logger.warning("Empty query provided")
            return []
        
        # Preprocess query
        if preprocess:
            processed_query = self.preprocessor.preprocess(query)
        else:
            processed_query = query
        
        # Check if query has any valid terms after preprocessing
        if not processed_query.strip():
            logger.warning(f"Query '{query}' is empty after preprocessing")
            return []
        
        # Transform query to TF-IDF vector
        query_vector = self.vectorizer.transform([processed_query])
        
        # Compute cosine similarities
        similarities = cosine_similarity(query_vector, self.tfidf_matrix).flatten()
        
        # Get top-k results
        top_indices = np.argsort(similarities)[::-1][:top_k]
        
        results = []
        for idx in top_indices:
            doc_id = self.document_ids[idx]
            score = float(similarities[idx])
            if score > 0:  # Only include results with positive similarity
                results.append((doc_id, score))
        
        return results
    
    def get_query_terms(self, query: str, preprocess: bool = True) -> Dict:
        """Get the terms from a query that exist in the vocabulary."""
        if preprocess:
            processed_query = self.preprocessor.preprocess(query)
        else:
            processed_query = query
        
        query_terms = processed_query.split()
        vocabulary = set(self.vectorizer.vocabulary_.keys())
        
        matched = [t for t in query_terms if t in vocabulary]
        unmatched = [t for t in query_terms if t not in vocabulary]
        
        matched_ngrams = []
        for n in range(2, self.ngram_range[1] + 1):
            for i in range(len(query_terms) - n + 1):
                ngram = ' '.join(query_terms[i:i+n])
                if ngram in vocabulary:
                    matched_ngrams.append(ngram)
        
        return {
            'original_query': query,
            'processed_query': processed_query,
            'matched_terms': matched,
            'matched_ngrams': matched_ngrams,
            'unmatched_terms': unmatched
        }
    
    def get_top_terms_for_document(self, doc_id: int, top_k: int = 10) -> List[Tuple[str, float]]:
        """Get the highest weighted TF-IDF terms for a document."""
        if doc_id not in self.id_to_index:
            raise ValueError(f"Document ID {doc_id} not found")
        
        idx = self.id_to_index[doc_id]
        feature_names = self.vectorizer.get_feature_names_out()
        doc_vector = self.tfidf_matrix[idx].toarray().flatten()
        
        top_indices = np.argsort(doc_vector)[::-1][:top_k]
        
        return [(feature_names[i], float(doc_vector[i])) for i in top_indices if doc_vector[i] > 0]
    
    def save(self, filepath: str) -> None:
        """Save the fitted model to disk."""
        with open(filepath, 'wb') as f:
            pickle.dump({
                'vectorizer': self.vectorizer,
                'tfidf_matrix': self.tfidf_matrix,
                'document_ids': self.document_ids,
                'id_to_index': self.id_to_index,
                'preprocessor': self.preprocessor,
                'ngram_range': self.ngram_range,
                'max_features': self.max_features
            }, f)
        print(f"    Model saved to {filepath}")
    
    def load(self, filepath: str) -> None:
        """Load a fitted model from disk."""
        with open(filepath, 'rb') as f:
            data = pickle.load(f)
        
        self.vectorizer = data['vectorizer']
        self.tfidf_matrix = data['tfidf_matrix']
        self.document_ids = data['document_ids']
        self.id_to_index = data.get('id_to_index', {doc_id: idx for idx, doc_id in enumerate(self.document_ids)})
        self.preprocessor = data['preprocessor']
        self.ngram_range = data['ngram_range']
        self.max_features = data['max_features']
        self.is_fitted = True
        
        print(f"    Model loaded from {filepath}")


# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================

def display_search_results(results: List[Tuple[int, float]], 
                          metadata_df: pd.DataFrame, 
                          corpus_df: pd.DataFrame, 
                          query: str, 
                          show_snippet: bool = True) -> None:
    """Display search results in a formatted way."""
    print(f"\n{'='*70}")
    print(f"SEARCH RESULTS FOR: '{query}'")
    print(f"{'='*70}")
    
    if not results:
        print("No results found.")
        return
    
    for rank, (recipe_id, score) in enumerate(results, 1):
        meta_row = metadata_df[metadata_df['recipe_id'] == recipe_id]
        corpus_row = corpus_df[corpus_df['recipe_id'] == recipe_id]
        
        if len(meta_row) == 0:
            continue
            
        meta = meta_row.iloc[0]
        
        print(f"\n[{rank}] {meta['recipe_name']}")
        print(f"    Score: {score:.4f}")
        print(f"    Cooking Time: {meta['cooking_time']} min | "
              f"Ingredients: {meta['num_ingredients']} | "
              f"Steps: {meta['num_steps']}")
        
        if show_snippet and len(corpus_row) > 0:
            doc = corpus_row.iloc[0]['document']
            snippet = doc[:200] + "..." if len(doc) > 200 else doc
            print(f"    Preview: {snippet}")
        
        if pd.notna(meta['description']) and str(meta['description']) != 'nan':
            desc = str(meta['description'])[:150]
            print(f"    Description: {desc}...")


def analyze_query_matching(search_engine: TFIDFSearchEngine, query: str) -> None:
    """Analyze how a query is being matched by the TF-IDF model."""
    analysis = search_engine.get_query_terms(query)
    
    print(f"\n--- Query Analysis for: '{query}' ---")
    print(f"Processed query: '{analysis['processed_query']}'")
    print(f"Matched unigrams: {analysis['matched_terms']}")
    print(f"Matched n-grams: {analysis['matched_ngrams']}")
    print(f"Unmatched terms: {analysis['unmatched_terms']}")


# =============================================================================
# BUILD TF-IDF SEARCH ENGINE
# =============================================================================

print("\n" + "=" * 70)
print("BUILDING TF-IDF SEARCH ENGINE")
print("=" * 70)

tfidf_engine = TFIDFSearchEngine(
    ngram_range=(1, 2),
    max_features=50000,
    min_df=3,
    max_df=0.90,
    sublinear_tf=True
)

tfidf_engine.fit(
    documents=corpus_df['document'],
    document_ids=corpus_df['recipe_id'].tolist(),
    preprocess=True
)

tfidf_engine.save("tfidf_search_engine.pkl")

# =============================================================================
# TEST TF-IDF ENGINE
# =============================================================================

print("\n" + "=" * 70)
print("TESTING TF-IDF ENGINE")
print("=" * 70)

test_queries = [
    "chocolate cake",
    "pasta carbonara",
    "chicken soup",
    "comfort food for a rainy day",
    "healthy dinner after gym",
    "quick and easy breakfast",
    "romantic dinner for two",
    "light summer salad",
]

for query in test_queries[:4]:  # Test first 4 queries
    analyze_query_matching(tfidf_engine, query)
    results = tfidf_engine.search(query, top_k=3)
    display_search_results(results, metadata_df, corpus_df, query, show_snippet=False)
    print("\n" + "-" * 70)

print("\n" + "=" * 70)
print("TF-IDF ENGINE READY")
print("=" * 70)

Downloading wordnet...
LOADING DATA
Recipes: 5,000 | Interactions: 23,121
Recipes dataset: 5,000 recipes, 12 features
Interactions dataset: 23,121 interactions, 5 features

Recipe columns: ['name', 'id', 'minutes', 'contributor_id', 'submitted', 'tags', 'nutrition', 'n_steps', 'steps', 'description', 'ingredients', 'n_ingredients']

Sample recipe entry:
name                                    crab filled crescent snacks
id                                                            94947
minutes                                                          70
contributor_id                                               111448
submitted                                                2004-07-03
tags              ['time-to-make', 'course', 'main-ingredient', ...
nutrition                      [69.2, 3.0, 9.0, 6.0, 5.0, 4.0, 3.0]
n_steps                                                          16
steps             ['heat over to 375 degrees', 'spray large cook...
description               found 

Preprocessing: 100%|██████████| 5000/5000 [00:03<00:00, 1379.94it/s]


    Computing TF-IDF matrix...

    TF-IDF Model Statistics:
        Documents: 5,000
        Vocabulary size: 31,561
        N-gram range: (1, 2)
        Matrix shape: (5000, 31561)
        Matrix sparsity: 99.57%
        Non-zero elements: 681,755
    Model saved to tfidf_search_engine.pkl

TESTING TF-IDF ENGINE

--- Query Analysis for: 'chocolate cake' ---
Processed query: 'chocolate cake'
Matched unigrams: ['chocolate', 'cake']
Matched n-grams: ['chocolate cake']
Unmatched terms: []

SEARCH RESULTS FOR: 'chocolate cake'

[1] chocolate  cake
    Score: 0.3028
    Cooking Time: 35 min | Ingredients: 9 | Steps: 4
    Description: this is our family's favorite chocolate cake.  
the addition of coffee makes a rich tasting cake. also the oil instead of butter is healthier. i some...

[2] rich chocolate kahlua bundt cake
    Score: 0.2546
    Cooking Time: 55 min | Ingredients: 7 | Steps: 5
    Description: i checked the other kahlua cakes, and none were like our ours. so... here's anothe

In [46]:
# =============================================================================
# FAISS AVAILABILITY CHECK
# =============================================================================

try:
    import faiss
    FAISS_AVAILABLE = True
    print("FAISS is available for fast similarity search")
except ImportError:
    FAISS_AVAILABLE = False
    print("Note: FAISS not installed. Using sklearn for similarity search.")

# Check for GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
if device == "cuda":
    print(f"GPU: {torch.cuda.get_device_name(0)}")


# =============================================================================
# DOCUMENT PREPARER CLASS
# =============================================================================

class DocumentPreparer:
    """
    Prepares recipe documents for embedding.
    Different from TF-IDF preprocessing - preserves semantic meaning.
    """
    
    def __init__(self, max_length: int = 512):
        """
        Args:
            max_length: Maximum document length (in words) to prevent 
                       truncation issues with transformer models
        """
        self.max_length = max_length
    
    def prepare_document(self, row: pd.Series) -> str:
        """
        Create an optimized document representation for embedding.
        
        Args:
            row: DataFrame row with recipe data
            
        Returns:
            Formatted document string
        """
        parts = []
        
        # Recipe name (most important)
        recipe_name = row.get('recipe_name', '')
        if pd.notna(recipe_name) and str(recipe_name).strip():
            name = str(recipe_name).strip()
            parts.append(f"Recipe: {name}")
        
        # Tags (crucial for semantic matching)
        tags = row.get('tags', '')
        if pd.notna(tags) and str(tags).strip():
            tags_clean = str(tags).strip().replace(',', ', ')
            parts.append(f"Tags: {tags_clean}")
        
        # Full document content
        document = row.get('document', '')
        if pd.notna(document) and str(document).strip():
            doc = str(document).strip()
            words = doc.split()
            if len(words) > self.max_length:
                doc = ' '.join(words[:self.max_length])
            parts.append(doc)
        
        return ' '.join(parts)
    
    def prepare_batch(self, df: pd.DataFrame, show_progress: bool = True) -> List[str]:
        """
        Prepare a batch of documents.
        
        Args:
            df: DataFrame with recipe data
            show_progress: Whether to show progress bar
            
        Returns:
            List of prepared document strings
        """
        prepared = []
        iterator = tqdm(df.iterrows(), total=len(df), desc="Preparing documents", disable=not show_progress)
        
        for idx, row in iterator:
            prepared.append(self.prepare_document(row))
        
        return prepared


# =============================================================================
# EMBEDDINGS SEARCH ENGINE CLASS
# =============================================================================

class EmbeddingsSearchEngine:
    """
    Neural embeddings-based recipe search engine using Sentence Transformers.
    Captures semantic meaning of queries and documents.
    """
    
    # Available models (from smaller/faster to larger/better)
    AVAILABLE_MODELS = {
        'mini': 'all-MiniLM-L6-v2',           # 80MB, fast, good quality
        'mpnet': 'all-mpnet-base-v2',          # 420MB, best quality
        'distilbert': 'all-distilroberta-v1',  # 290MB, good balance
        'minilm-l12': 'all-MiniLM-L12-v2',     # 120MB, better than L6
        'multi': 'paraphrase-multilingual-MiniLM-L12-v2'  # Multilingual
    }
    
    def __init__(self, 
                 model_name: str = 'mini', 
                 use_faiss: bool = True, 
                 batch_size: int = 64):
        """
        Initialize the embeddings search engine.
        
        Args:
            model_name: Key from AVAILABLE_MODELS or full HuggingFace model name
            use_faiss: Whether to use FAISS for fast similarity search
            batch_size: Batch size for encoding documents
        """
        # Resolve model name
        if model_name in self.AVAILABLE_MODELS:
            self.model_path = self.AVAILABLE_MODELS[model_name]
        else:
            self.model_path = model_name
        
        self.batch_size = batch_size
        self.use_faiss = use_faiss and FAISS_AVAILABLE
        
        # Load the sentence transformer model
        print(f"    Loading model: {self.model_path}")
        self.model = SentenceTransformer(self.model_path, device=device)
        
        # Get embedding dimension
        self.embedding_dim = self.model.get_sentence_embedding_dimension()
        print(f"    Embedding dimension: {self.embedding_dim}")
        
        # Storage
        self.embeddings: Optional[np.ndarray] = None
        self.document_ids: Optional[List] = None
        self.id_to_index: Dict[int, int] = {}  # Fast O(1) lookup
        self.faiss_index = None
        self.is_fitted = False
        
        # Document preparer
        self.doc_preparer = DocumentPreparer(max_length=256)
    
    def fit(self, 
            documents: Union[pd.DataFrame, List[str]], 
            document_ids: Optional[List] = None, 
            show_progress: bool = True) -> None:
        """
        Compute embeddings for all documents in the corpus.
        
        Args:
            documents: List/Series of document strings or DataFrame with recipe data
            document_ids: Optional list of document identifiers
            show_progress: Whether to show progress bar
        """
        print("\n    Computing document embeddings...")
        
        # Handle DataFrame input
        if isinstance(documents, pd.DataFrame):
            print("    Preparing documents from DataFrame...")
            doc_list = self.doc_preparer.prepare_batch(documents, show_progress=show_progress)
            if document_ids is None and 'recipe_id' in documents.columns:
                document_ids = documents['recipe_id'].tolist()
        elif isinstance(documents, pd.Series):
            doc_list = documents.tolist()
        else:
            doc_list = list(documents)
        
        # Store document IDs
        if document_ids is not None:
            self.document_ids = list(document_ids)
        else:
            self.document_ids = list(range(len(doc_list)))
        
        # Build fast lookup dictionary - O(1) instead of O(n)
        self.id_to_index = {doc_id: idx for idx, doc_id in enumerate(self.document_ids)}
        
        # Compute embeddings in batches
        print(f"    Encoding {len(doc_list):,} documents...")
        self.embeddings = self.model.encode(
            doc_list,
            batch_size=self.batch_size,
            show_progress_bar=show_progress,
            convert_to_numpy=True,
            normalize_embeddings=True  # L2 normalize for cosine similarity
        )
        
        print(f"    Embeddings shape: {self.embeddings.shape}")
        
        # Build FAISS index for fast search
        if self.use_faiss:
            self._build_faiss_index()
        
        self.is_fitted = True
        
        # Report statistics
        print(f"\n    Embeddings Search Engine Statistics:")
        print(f"        Documents: {len(self.document_ids):,}")
        print(f"        Embedding dimension: {self.embedding_dim}")
        print(f"        Total memory: {self.embeddings.nbytes / 1024**2:.1f} MB")
        print(f"        Using FAISS: {self.use_faiss}")
    
    def _build_faiss_index(self) -> None:
        """Build FAISS index for fast approximate nearest neighbor search."""
        print("    Building FAISS index...")
        
        # Use Inner Product index (equivalent to cosine similarity for normalized vectors)
        self.faiss_index = faiss.IndexFlatIP(self.embedding_dim)
        
        # Add vectors to index
        self.faiss_index.add(self.embeddings.astype(np.float32))
        
        print(f"    FAISS index built with {self.faiss_index.ntotal} vectors")
    
    def encode_query(self, query: str) -> np.ndarray:
        """
        Encode a query string into an embedding vector.
        
        Args:
            query: Query string
            
        Returns:
            Normalized embedding vector
        """
        embedding = self.model.encode(
            query,
            convert_to_numpy=True,
            normalize_embeddings=True
        )
        return embedding
    
    def search(self, query: str, top_k: int = 10) -> List[Tuple[int, float]]:
        """
        Search for recipes matching a query using semantic similarity.
        
        Args:
            query: Search query string
            top_k: Number of results to return
            
        Returns:
            List of tuples (document_id, similarity_score)
        """
        if not self.is_fitted:
            raise ValueError("Model not fitted. Call fit() first.")
        
        # Validate query
        if not query or not query.strip():
            logger.warning("Empty query provided")
            return []
        
        # Encode query
        query_embedding = self.encode_query(query)
        
        if self.use_faiss:
            # FAISS search
            query_embedding = query_embedding.reshape(1, -1).astype(np.float32)
            scores, indices = self.faiss_index.search(query_embedding, top_k)
            
            results = []
            for idx, score in zip(indices[0], scores[0]):
                if idx != -1:  # FAISS returns -1 for empty results
                    doc_id = self.document_ids[idx]
                    results.append((doc_id, float(score)))
        else:
            # Sklearn cosine similarity
            query_embedding = query_embedding.reshape(1, -1)
            similarities = cosine_similarity(query_embedding, self.embeddings).flatten()
            
            # Get top-k results
            top_indices = np.argsort(similarities)[::-1][:top_k]
            
            results = []
            for idx in top_indices:
                doc_id = self.document_ids[idx]
                score = float(similarities[idx])
                results.append((doc_id, score))
        
        return results
    
    def search_batch(self, queries: List[str], top_k: int = 10) -> Dict[str, List[Tuple[int, float]]]:
        """
        Search for multiple queries at once (more efficient).
        
        Args:
            queries: List of query strings
            top_k: Number of results per query
            
        Returns:
            Dictionary mapping queries to their results
        """
        if not self.is_fitted:
            raise ValueError("Model not fitted. Call fit() first.")
        
        # Filter empty queries
        valid_queries = [q for q in queries if q and q.strip()]
        if not valid_queries:
            return {q: [] for q in queries}
        
        # Encode all queries
        query_embeddings = self.model.encode(
            valid_queries,
            batch_size=self.batch_size,
            convert_to_numpy=True,
            normalize_embeddings=True
        )
        
        results = {}
        
        if self.use_faiss:
            scores, indices = self.faiss_index.search(
                query_embeddings.astype(np.float32), top_k
            )
            
            for i, query in enumerate(valid_queries):
                query_results = []
                for idx, score in zip(indices[i], scores[i]):
                    if idx != -1:
                        doc_id = self.document_ids[idx]
                        query_results.append((doc_id, float(score)))
                results[query] = query_results
        else:
            similarities = cosine_similarity(query_embeddings, self.embeddings)
            
            for i, query in enumerate(valid_queries):
                top_indices = np.argsort(similarities[i])[::-1][:top_k]
                query_results = [
                    (self.document_ids[idx], float(similarities[i][idx]))
                    for idx in top_indices
                ]
                results[query] = query_results
        
        # Add empty results for invalid queries
        for query in queries:
            if query not in results:
                results[query] = []
        
        return results
    
    def get_similar_recipes(self, recipe_id: int, top_k: int = 10) -> List[Tuple[int, float]]:
        """
        Find recipes similar to a given recipe.
        
        Args:
            recipe_id: ID of the recipe to find similar items for
            top_k: Number of similar recipes to return
            
        Returns:
            List of tuples (recipe_id, similarity_score)
        """
        if not self.is_fitted:
            raise ValueError("Model not fitted. Call fit() first.")
        
        # Use O(1) lookup instead of O(n)
        if recipe_id not in self.id_to_index:
            raise ValueError(f"Recipe ID {recipe_id} not found in index")
        
        idx = self.id_to_index[recipe_id]
        recipe_embedding = self.embeddings[idx].reshape(1, -1)
        
        if self.use_faiss:
            scores, indices = self.faiss_index.search(
                recipe_embedding.astype(np.float32), top_k + 1
            )
            # Skip the first result (the recipe itself)
            results = []
            for i, s in zip(indices[0], scores[0]):
                if i != -1 and i != idx:  # Skip self
                    results.append((self.document_ids[i], float(s)))
            results = results[:top_k]
        else:
            similarities = cosine_similarity(recipe_embedding, self.embeddings).flatten()
            # Set self-similarity to -1 to exclude it
            similarities[idx] = -1
            top_indices = np.argsort(similarities)[::-1][:top_k]
            results = [
                (self.document_ids[i], float(similarities[i]))
                for i in top_indices
            ]
        
        return results
    
    def compute_query_document_similarity(self, query: str, recipe_id: int) -> float:
        """
        Compute similarity between a query and a specific recipe.
        
        Args:
            query: Query string
            recipe_id: ID of the recipe
            
        Returns:
            Similarity score
        """
        if not self.is_fitted:
            raise ValueError("Model not fitted. Call fit() first.")
        
        if not query or not query.strip():
            return 0.0
        
        # Use O(1) lookup
        if recipe_id not in self.id_to_index:
            raise ValueError(f"Recipe ID {recipe_id} not found in index")
        
        query_embedding = self.encode_query(query).reshape(1, -1)
        
        idx = self.id_to_index[recipe_id]
        doc_embedding = self.embeddings[idx].reshape(1, -1)
        
        similarity = cosine_similarity(query_embedding, doc_embedding)[0][0]
        return float(similarity)
    
    def get_embedding(self, recipe_id: int) -> np.ndarray:
        """
        Get the embedding vector for a specific recipe.
        
        Args:
            recipe_id: ID of the recipe
            
        Returns:
            Embedding vector
        """
        if not self.is_fitted:
            raise ValueError("Model not fitted. Call fit() first.")
        
        if recipe_id not in self.id_to_index:
            raise ValueError(f"Recipe ID {recipe_id} not found in index")
        
        idx = self.id_to_index[recipe_id]
        return self.embeddings[idx].copy()
    
    def save(self, filepath: str) -> None:
        """Save the fitted model data to disk (not the transformer model itself)."""
        with open(filepath, 'wb') as f:
            pickle.dump({
                'embeddings': self.embeddings,
                'document_ids': self.document_ids,
                'id_to_index': self.id_to_index,
                'model_path': self.model_path,
                'embedding_dim': self.embedding_dim,
                'use_faiss': self.use_faiss
            }, f)
        print(f"    Embeddings saved to {filepath}")
    
    def load(self, filepath: str) -> None:
        """Load embeddings from disk."""
        with open(filepath, 'rb') as f:
            data = pickle.load(f)
        
        # Check for model mismatch
        saved_model_path = data.get('model_path')
        if saved_model_path and saved_model_path != self.model_path:
            raise ValueError(
                f"Model mismatch: embeddings were created with '{saved_model_path}' "
                f"but current engine uses '{self.model_path}'. "
                f"Initialize the engine with model_name='{saved_model_path}' or recompute embeddings."
            )
        
        self.embeddings = data['embeddings']
        self.document_ids = data['document_ids']
        self.id_to_index = data.get('id_to_index', {doc_id: idx for idx, doc_id in enumerate(self.document_ids)})
        self.embedding_dim = data['embedding_dim']
        
        # Restore use_faiss from saved state, but still check FAISS availability
        saved_use_faiss = data.get('use_faiss', True)
        self.use_faiss = saved_use_faiss and FAISS_AVAILABLE
        
        # Rebuild FAISS index if needed
        if self.use_faiss:
            self._build_faiss_index()
        
        self.is_fitted = True
        print(f"    Embeddings loaded from {filepath}")

# =============================================================================
# UTILITY FUNCTIONS FOR EMBEDDINGS RESULTS
# =============================================================================

def display_search_results_embeddings(results: List[Tuple[int, float]], 
                                      metadata_df: pd.DataFrame, 
                                      corpus_df: pd.DataFrame, 
                                      query: str, 
                                      engine_name: str = "EMBEDDINGS") -> None:
    """Display search results in a formatted way."""
    print(f"\n{'='*70}")
    print(f"{engine_name} RESULTS FOR: '{query}'")
    print(f"{'='*70}")
    
    if not results:
        print("No results found.")
        return
    
    for rank, (recipe_id, score) in enumerate(results, 1):
        meta_row = metadata_df[metadata_df['recipe_id'] == recipe_id]
        
        if len(meta_row) == 0:
            continue
            
        meta = meta_row.iloc[0]
        
        print(f"\n[{rank}] {meta['recipe_name']}")
        print(f"    Similarity Score: {score:.4f}")
        print(f"    Cooking Time: {meta['cooking_time']} min | "
              f"Ingredients: {meta['num_ingredients']} | "
              f"Steps: {meta['num_steps']}")
        
        # Show tags if available
        corpus_row = corpus_df[corpus_df['recipe_id'] == recipe_id]
        if len(corpus_row) > 0:
            tags = corpus_row.iloc[0].get('tags', '')
            if pd.notna(tags) and str(tags) != 'nan':
                tags_preview = str(tags)[:100]
                print(f"    Tags: {tags_preview}...")
        
        # Show description
        if pd.notna(meta['description']) and str(meta['description']) != 'nan':
            desc = str(meta['description'])[:150]
            print(f"    Description: {desc}...")


# =============================================================================
# BUILD EMBEDDINGS SEARCH ENGINE
# =============================================================================

print("\n" + "=" * 70)
print("BUILDING EMBEDDINGS SEARCH ENGINE")
print("=" * 70)

embeddings_engine = EmbeddingsSearchEngine(
    model_name='mini',  # all-MiniLM-L6-v2
    use_faiss=FAISS_AVAILABLE,
    batch_size=64
)

embeddings_engine.fit(
    documents=corpus_df,
    document_ids=corpus_df['recipe_id'].tolist(),
    show_progress=True
)

embeddings_engine.save("embeddings_search_engine.pkl")


# =============================================================================
# TEST EMBEDDINGS ENGINE
# =============================================================================

print("\n" + "=" * 70)
print("TESTING EMBEDDINGS ENGINE")
print("=" * 70)

test_queries_embeddings = [
    # Simple keyword queries
    "chocolate cake",
    "pasta carbonara",
    "chicken soup",
    
    # Semantic/high-level queries
    "comfort food for a rainy day",
    "healthy dinner after gym",
    "quick and easy breakfast",
    "romantic dinner for two",
    "light summer salad",
    "warm winter soup",
    "kid friendly lunch",
    "low carb vegetarian",
    
    # Abstract/mood-based queries
    "something sweet and indulgent",
    "meal prep for the week",
    "impressive dish for guests",
    "nostalgic childhood favorite"
]

# Test first few queries
for query in test_queries_embeddings[:5]:
    results = embeddings_engine.search(query, top_k=3)
    display_search_results_embeddings(results, metadata_df, corpus_df, query)
    print("\n" + "-" * 70)


# =============================================================================
# SAVE RESULTS FOR COMPARISON
# =============================================================================

print("\n" + "=" * 70)
print("SAVING SEARCH RESULTS FOR COMPARISON")
print("=" * 70)

comparison_queries = [
    "comfort food for a rainy day",
    "healthy dinner after gym",
    "quick and easy breakfast",
    "romantic dinner for two",
    "light summer salad",
    "vegetarian protein rich",
    "decadent chocolate dessert",
    "mediterranean diet lunch"
]

# Get TF-IDF results
tfidf_results = {}
for query in comparison_queries:
    results = tfidf_engine.search(query, top_k=20)
    tfidf_results[query] = results

with open("tfidf_search_results.pkl", "wb") as f:
    pickle.dump(tfidf_results, f)
print("TF-IDF search results saved")

# Get Embeddings results
embeddings_results = {}
for query in comparison_queries:
    results = embeddings_engine.search(query, top_k=20)
    embeddings_results[query] = results

with open("embeddings_search_results.pkl", "wb") as f:
    pickle.dump(embeddings_results, f)
print("Embeddings search results saved")


# =============================================================================
# EMBEDDING SPACE ANALYSIS
# =============================================================================

print("\n" + "=" * 70)
print("EMBEDDING SPACE ANALYSIS")
print("=" * 70)

analysis_queries = [
    "healthy dinner",
    "nutritious meal",
    "diet food",
    "comfort food",
    "hearty meal",
    "cozy dinner"
]

# Encode queries
query_embeddings = embeddings_engine.model.encode(
    analysis_queries,
    normalize_embeddings=True
)

# Compute pairwise similarities
query_similarities = cosine_similarity(query_embeddings)

print("\nQuery-to-Query Similarity Matrix:")
print(f"{'':20}", end="")
for q in analysis_queries:
    print(f"{q[:15]:>16}", end="")
print()

for i, q1 in enumerate(analysis_queries):
    print(f"{q1[:20]:20}", end="")
    for j, q2 in enumerate(analysis_queries):
        print(f"{query_similarities[i][j]:16.3f}", end="")
    print()


# =============================================================================
# FIND SIMILAR RECIPES DEMO
# =============================================================================

print("\n" + "=" * 70)
print("SIMILAR RECIPES DEMO")
print("=" * 70)

sample_recipe_id = corpus_df.iloc[0]['recipe_id']
sample_meta = metadata_df[metadata_df['recipe_id'] == sample_recipe_id]

if len(sample_meta) > 0:
    sample_recipe_name = sample_meta.iloc[0]['recipe_name']
    print(f"\nFinding recipes similar to: '{sample_recipe_name}' (ID: {sample_recipe_id})")
    
    similar_recipes = embeddings_engine.get_similar_recipes(sample_recipe_id, top_k=5)
    
    print("\nSimilar recipes:")
    for rank, (recipe_id, score) in enumerate(similar_recipes, 1):
        meta_row = metadata_df[metadata_df['recipe_id'] == recipe_id]
        if len(meta_row) > 0:
            name = meta_row.iloc[0]['recipe_name']
            print(f"  {rank}. {name} (similarity: {score:.4f})")
else:
    print("Could not find sample recipe for demo")


# =============================================================================
# PREPARE RECIPE DATA FOR GUI
# =============================================================================

print("\n" + "=" * 70)
print("PREPARING DATA FOR GUI")
print("=" * 70)

recipe_data = metadata_df.copy()

# Add tags from corpus to metadata
recipe_data = recipe_data.merge(
    corpus_df[['recipe_id', 'tags']], 
    on='recipe_id', 
    how='left'
)

# Validate merge
if len(recipe_data) == 0:
    raise ValueError("Recipe data merge failed - no records found")

print(f"Recipe data prepared: {len(recipe_data):,} recipes")
print(f"Columns: {list(recipe_data.columns)}")


print("\n" + "=" * 70)
print("EMBEDDINGS ENGINE COMPLETE")
print("=" * 70)

2026-01-13 18:53:20,033 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2


FAISS is available for fast similarity search
Using device: cpu

BUILDING EMBEDDINGS SEARCH ENGINE
    Loading model: all-MiniLM-L6-v2
    Embedding dimension: 384

    Computing document embeddings...
    Preparing documents from DataFrame...


Preparing documents: 100%|██████████| 5000/5000 [00:00<00:00, 32789.42it/s]

    Encoding 5,000 documents...





Batches:   0%|          | 0/79 [00:00<?, ?it/s]

    Embeddings shape: (5000, 384)
    Building FAISS index...
    FAISS index built with 5000 vectors

    Embeddings Search Engine Statistics:
        Documents: 5,000
        Embedding dimension: 384
        Total memory: 7.3 MB
        Using FAISS: True
    Embeddings saved to embeddings_search_engine.pkl

TESTING EMBEDDINGS ENGINE


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


EMBEDDINGS RESULTS FOR: 'chocolate cake'

[1] heavenly chocolate cake
    Similarity Score: 0.6097
    Cooking Time: 50 min | Ingredients: 8 | Steps: 11
    Tags: 60-minutes-or-less, time-to-make, course, preparation, occasion, for-large-groups, desserts, oven, e...
    Description: this is a sinfully rich cake that would be wonderful for a birthday party. i found it in our local newspaper years ago and have make it numrous times....

[2] mimi s double rich chocolate cake  from a cake mix
    Similarity Score: 0.5909
    Cooking Time: 80 min | Ingredients: 6 | Steps: 9
    Tags: time-to-make, course, preparation, desserts, cakes, dietary, 4-hours-or-less...
    Description: this is a fantastic cake starting with a cake mix and so easy to do.  i bake it in a angel food pan and it comes out great.  frost it with a butter fr...

[3] paradise cake
    Similarity Score: 0.5908
    Cooking Time: 30 min | Ingredients: 10 | Steps: 15
    Tags: 30-minutes-or-less, time-to-make, course, main-in

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


EMBEDDINGS RESULTS FOR: 'pasta carbonara'

[1] eggless spaghetti carbonara   california pizza kitchen
    Similarity Score: 0.5743
    Cooking Time: 15 min | Ingredients: 7 | Steps: 9
    Tags: bacon, 15-minutes-or-less, time-to-make, course, main-ingredient, preparation, main-dish, pasta, por...
    Description: got this recipe from their cookbook....

[2] easy carbonara
    Similarity Score: 0.5490
    Cooking Time: 30 min | Ingredients: 9 | Steps: 6
    Tags: 30-minutes-or-less, time-to-make, course, main-ingredient, cuisine, preparation, occasion, for-1-or-...
    Description: my favourite pasta recipe which is easy to prepare, delicious and low in fat!...

[3] spaghetti sauce to die for
    Similarity Score: 0.4891
    Cooking Time: 145 min | Ingredients: 15 | Steps: 13
    Tags: weeknight, time-to-make, course, main-ingredient, cuisine, preparation, occasion, healthy, main-dish...
    Description: this is a hearty (meaty) yet tangy spaghetti sauce that i have tweaked to almost p

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


EMBEDDINGS RESULTS FOR: 'chicken soup'

[1] chicken and beef meatball soup
    Similarity Score: 0.6574
    Cooking Time: 45 min | Ingredients: 7 | Steps: 16
    Tags: 60-minutes-or-less, time-to-make, course, main-ingredient, cuisine, preparation, occasion, soups-ste...
    Description: this dutch  recipe is my mother's. she was making this in holland from scratch during the war. in 1951 we came to america and she continued making it ...

[2] jan s comforting  yummy but naughty chicken wild rice soup
    Similarity Score: 0.6026
    Cooking Time: 135 min | Ingredients: 9 | Steps: 4
    Tags: weeknight, time-to-make, course, main-ingredient, cuisine, preparation, occasion, north-american, so...
    Description: this soup is to die for! my friend jan makes it for potlucks and i can never get enough. it is the best, yummiest, soup around. good for those cold wi...

[3] savory meatball soup
    Similarity Score: 0.5986
    Cooking Time: 380 min | Ingredients: 22 | Steps: 5
    Tags: occa

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


EMBEDDINGS RESULTS FOR: 'comfort food for a rainy day'

[1] napa dave s mexican chicken soup
    Similarity Score: 0.4021
    Cooking Time: 20 min | Ingredients: 11 | Steps: 7
    Tags: 30-minutes-or-less, time-to-make, course, main-ingredient, cuisine, preparation, north-american, hea...
    Description: a delightful combination of ingredients and easy to make as well. and if you are feeling under the weather, this will cure what ails you (just ask you...

[2] chinese melon soup  dong gwah jong
    Similarity Score: 0.3911
    Cooking Time: 45 min | Ingredients: 11 | Steps: 7
    Tags: 60-minutes-or-less, time-to-make, course, main-ingredient, cuisine, preparation, occasion, north-ame...
    Description: great summer soup. it sounds really weird, but it's so good. dh says absolutely not to leave out the watermelon because it really adds to the flavor o...

[3] indian chicken with vegetables  murgh subji wala
    Similarity Score: 0.3907
    Cooking Time: 55 min | Ingredients: 14 | St

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


EMBEDDINGS RESULTS FOR: 'healthy dinner after gym'

[1] marmie s delicious asain flavor  low cal  low fat vegetable soup
    Similarity Score: 0.4221
    Cooking Time: 45 min | Ingredients: 14 | Steps: 9
    Tags: 60-minutes-or-less, time-to-make, course, main-ingredient, cuisine, preparation, occasion, for-large...
    Description: amazingly delicious, low calorie, low fat, somewhat spicy, oriental flavor diet soup!  a really nice change from the tomatoe based diet soups.  
this...

[2] apple pie in a bowl
    Similarity Score: 0.4201
    Cooking Time: 5 min | Ingredients: 3 | Steps: 3
    Tags: 15-minutes-or-less, time-to-make, course, main-ingredient, preparation, for-1-or-2, low-protein, hea...
    Description: this is something i like to eat for breakfast on a cold day. i keep the ingredients on hand at work so i can heat this up fresh and start the day off ...

[3] sophie s super easy chickie pea and mato salad
    Similarity Score: 0.4187
    Cooking Time: 15 min | Ingredients:

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Embeddings search results saved

EMBEDDING SPACE ANALYSIS


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Query-to-Query Similarity Matrix:
                      healthy dinner nutritious meal       diet food    comfort food     hearty meal     cozy dinner
healthy dinner                 1.000           0.731           0.568           0.536           0.643           0.630
nutritious meal                0.731           1.000           0.673           0.554           0.660           0.442
diet food                      0.568           0.673           1.000           0.526           0.525           0.291
comfort food                   0.536           0.554           0.526           1.000           0.618           0.582
hearty meal                    0.643           0.660           0.525           0.618           1.000           0.526
cozy dinner                    0.630           0.442           0.291           0.582           0.526           1.000

SIMILAR RECIPES DEMO

Finding recipes similar to: 'crab filled crescent snacks' (ID: 94947)

Similar recipes:
  1. california crab melt (similari

In [47]:
# =============================================================================
# =============================================================================
# EVALUATION SYSTEM
# =============================================================================
# =============================================================================

print("=" * 70)
print("EVALUATION SYSTEM")
print("=" * 70)

# =============================================================================
# PROXY RELEVANCE GENERATOR
# =============================================================================

class ProxyRelevanceGenerator:
    """
    Generates proxy relevance judgments for evaluation.
    Since we don't have human annotations, we use multiple signals
    to estimate relevance.
    """
    
    def __init__(self, corpus_df: pd.DataFrame, metadata_df: pd.DataFrame):
        """
        Initialize the relevance generator.
        
        Args:
            corpus_df: DataFrame with recipe documents and tags
            metadata_df: DataFrame with recipe metadata
        """
        self.corpus_df = corpus_df.copy()
        self.metadata_df = metadata_df.copy()
        
        # Precompute lowercase versions for matching
        self.corpus_df['tags_lower'] = self.corpus_df['tags'].fillna('').astype(str).str.lower()
        self.corpus_df['document_lower'] = self.corpus_df['document'].fillna('').astype(str).str.lower()
        self.corpus_df['name_lower'] = self.corpus_df['recipe_name'].fillna('').astype(str).str.lower()
        
        self.metadata_df['name_lower'] = self.metadata_df['recipe_name'].fillna('').astype(str).str.lower()
        self.metadata_df['desc_lower'] = self.metadata_df['description'].fillna('').astype(str).str.lower()
        
        # Create lookup dictionaries for fast access
        # Much faster using to_dict
        self.corpus_lookup = self.corpus_df.set_index('recipe_id')[
            ['tags_lower', 'document_lower', 'name_lower']
        ].to_dict('index')

        self.metadata_lookup = self.metadata_df.set_index('recipe_id')[
            ['name_lower', 'desc_lower']
        ].to_dict('index')
        
        
        print(f"    Initialized relevance generator with {len(self.corpus_lookup)} recipes")
    
    def _tokenize_query(self, query: str) -> List[str]:
        """Tokenize and clean query."""
        query = query.lower()
        # Remove common stopwords for matching
        stopwords_set = {'a', 'an', 'the', 'for', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'of', 'is', 'it'}
        tokens = re.findall(r'\b[a-z]+\b', query)
        tokens = [t for t in tokens if t not in stopwords_set and len(t) > 2]
        return tokens
    
    def _compute_term_overlap(self, query_terms: List[str], text: str) -> float:
        """Compute the proportion of query terms found in text."""
        if not query_terms or not text:
            return 0.0
        
        matches = sum(1 for term in query_terms if term in text)
        return matches / len(query_terms)
    
    def compute_relevance_score(self, query: str, recipe_id: int) -> Dict:
        """
        Compute a multi-signal relevance score for a query-recipe pair.
        
        Args:
            query: Search query
            recipe_id: Recipe ID
            
        Returns:
            Dictionary with relevance signals and overall score
        """
        query_terms = self._tokenize_query(query)
    
        # Ensure recipe_id is a Python int
        recipe_id = int(recipe_id)
    
        # Get recipe data
        corpus_data = self.corpus_lookup.get(recipe_id, {})
        meta_data = self.metadata_lookup.get(recipe_id, {})
        
        if not corpus_data and not meta_data:
            return {'overall_score': 0.0, 'binary_relevant': False, 'graded_relevance': 0}
        
        # Signal 1: Tag matching (highest weight - tags are curated)
        tags = corpus_data.get('tags_lower', '')
        tag_score = self._compute_term_overlap(query_terms, tags)
        
        # Signal 2: Recipe name matching (high weight)
        name = meta_data.get('name_lower', '') or corpus_data.get('name_lower', '')
        name_score = self._compute_term_overlap(query_terms, name)
        
        # Signal 3: Description matching (medium weight)
        desc = meta_data.get('desc_lower', '')
        desc_score = self._compute_term_overlap(query_terms, desc)
        
        # Signal 4: Full document matching (lower weight - more noisy)
        doc = corpus_data.get('document_lower', '')
        doc_score = self._compute_term_overlap(query_terms, doc)
        
        # Compute weighted overall score
        weights = {
            'tag': 0.40,
            'name': 0.30,
            'desc': 0.20,
            'doc': 0.10
        }
        
        overall_score = (
            weights['tag'] * tag_score +
            weights['name'] * name_score +
            weights['desc'] * desc_score +
            weights['doc'] * doc_score
        )
        
        # Binary relevance threshold
        binary_relevant = (
            tag_score >= 0.5 or 
            name_score >= 0.5 or 
            overall_score >= 0.3
        )
        
        return {
            'tag_score': tag_score,
            'name_score': name_score,
            'desc_score': desc_score,
            'doc_score': doc_score,
            'overall_score': overall_score,
            'binary_relevant': binary_relevant,
            'graded_relevance': self._to_graded_relevance(overall_score)
        }
    
    def _to_graded_relevance(self, score: float) -> int:
        """Convert continuous score to graded relevance (0-3)."""
        if score >= 0.6:
            return 3  # Highly relevant
        elif score >= 0.4:
            return 2  # Relevant
        elif score >= 0.2:
            return 1  # Partially relevant
        else:
            return 0  # Not relevant
    
    def generate_relevance_judgments(self, 
                                      query: str, 
                                      candidate_ids: List[int]) -> Dict[int, Dict]:
        """
        Generate relevance judgments for a set of candidate recipes.
        
        Args:
            query: Search query
            candidate_ids: List of recipe IDs to judge
            
        Returns:
            Dictionary mapping recipe_id to relevance scores
        """
        judgments = {}
        for recipe_id in candidate_ids:
            judgments[recipe_id] = self.compute_relevance_score(query, recipe_id)
        return judgments
    
    def get_pseudo_relevant_set(self, 
                                 query: str, 
                                 top_k: int = 100,
                                 min_score: float = 0.3,
                                 sample_size: int = 5000,
                                 random_state: int=42) -> List[int]:
        """
        Get a set of pseudo-relevant recipes for a query using text matching.
        Uses sampling for efficiency on large datasets.
        
        Args:
            query: Search query
            top_k: Maximum number of relevant recipes to return
            min_score: Minimum relevance score threshold
            sample_size: Number of recipes to sample for evaluation
            
        Returns:
            List of relevant recipe IDs
        """
        query_terms = self._tokenize_query(query)
        
        if not query_terms:
            return []
        
        # Sample recipes for efficiency
        all_recipe_ids = list(self.corpus_lookup.keys())
        if len(all_recipe_ids) > sample_size:
            rng = np.random.RandomState(random_state)
            sampled_ids = rng.choice(all_recipe_ids, size=sample_size, replace=False)
        else:
            sampled_ids = all_recipe_ids
        
        relevant_recipes = []
        
        for recipe_id in sampled_ids:
            score_data = self.compute_relevance_score(query, recipe_id)
            if score_data['overall_score'] >= min_score:
                relevant_recipes.append((recipe_id, score_data['overall_score']))
        
        # Sort by score and return top-k
        relevant_recipes.sort(key=lambda x: x[1], reverse=True)
        return [r[0] for r in relevant_recipes[:top_k]]
    
    def get_relevant_set_fast(self, 
                               query: str, 
                               retrieved_ids: List[int],
                               min_score: float = 0.25) -> set:
        """
        Get relevant set from retrieved candidates only (faster for evaluation).
        
        Args:
            query: Search query
            retrieved_ids: List of retrieved recipe IDs to evaluate
            min_score: Minimum relevance score threshold
            
        Returns:
            Set of relevant recipe IDs
        """
        query_terms = self._tokenize_query(query)
        
        if not query_terms:
            return set()
        
        relevant = set()
        for recipe_id in retrieved_ids:
            score_data = self.compute_relevance_score(query, recipe_id)
            if score_data['binary_relevant'] or score_data['overall_score'] >= min_score:
                relevant.add(recipe_id)
        
        return relevant


# =============================================================================
# EVALUATION METRICS
# =============================================================================

class SearchEngineEvaluator:
    """
    Comprehensive evaluation metrics for search engines.
    """
    
    def __init__(self, relevance_generator: ProxyRelevanceGenerator):
        """
        Initialize evaluator.
        
        Args:
            relevance_generator: ProxyRelevanceGenerator instance
        """
        self.relevance_generator = relevance_generator
    
    def precision_at_k(self, 
                       retrieved_ids: List[int], 
                       relevant_ids: set, 
                       k: int) -> float:
        """
        Compute Precision@K.
        
        Precision@K = (# of relevant docs in top-K) / K
        """
        if k <= 0:
            return 0.0
        
        retrieved_at_k = retrieved_ids[:k]
        relevant_retrieved = sum(1 for doc_id in retrieved_at_k if doc_id in relevant_ids)
        
        return relevant_retrieved / k
    
    def recall_at_k(self, 
                    retrieved_ids: List[int], 
                    relevant_ids: set, 
                    k: int) -> float:
        """
        Compute Recall@K.
        
        Recall@K = (# of relevant docs in top-K) / (total # of relevant docs)
        """
        if not relevant_ids:
            return 0.0
        
        retrieved_at_k = retrieved_ids[:k]
        relevant_retrieved = sum(1 for doc_id in retrieved_at_k if doc_id in relevant_ids)
        
        return relevant_retrieved / len(relevant_ids)
    
    def average_precision(self, 
                          retrieved_ids: List[int], 
                          relevant_ids: set) -> float:
        """
        Compute Average Precision (AP).
        
        AP = (1/R) * Σ(Precision@k * rel(k))
        where R is total relevant docs and rel(k) is 1 if doc at rank k is relevant
        """
        if not relevant_ids:
            return 0.0
        
        num_relevant = 0
        sum_precision = 0.0
        
        for k, doc_id in enumerate(retrieved_ids, 1):
            if doc_id in relevant_ids:
                num_relevant += 1
                precision_at_k = num_relevant / k
                sum_precision += precision_at_k
        
        if num_relevant == 0:
            return 0.0
        
        return sum_precision / len(relevant_ids)
    
    def reciprocal_rank(self, 
                        retrieved_ids: List[int], 
                        relevant_ids: set) -> float:
        """
        Compute Reciprocal Rank (RR).
        
        RR = 1 / (rank of first relevant document)
        """
        for rank, doc_id in enumerate(retrieved_ids, 1):
            if doc_id in relevant_ids:
                return 1.0 / rank
        return 0.0
    
    def dcg_at_k(self, 
                 retrieved_ids: List[int], 
                 relevance_scores: Dict[int, int], 
                 k: int) -> float:
        """
        Compute Discounted Cumulative Gain (DCG) at K.
        
        DCG@K = Σ(rel_i / log2(i + 1)) for i = 1 to K
        """
        dcg = 0.0
        for i, doc_id in enumerate(retrieved_ids[:k], 1):
            rel = relevance_scores.get(doc_id, 0)
            dcg += rel / np.log2(i + 1)
        return dcg
    
    def ndcg_at_k(self, 
                  retrieved_ids: List[int], 
                  relevance_scores: Dict[int, int], 
                  k: int) -> float:
        """
        Compute Normalized Discounted Cumulative Gain (NDCG) at K.
        
        NDCG@K = DCG@K / IDCG@K
        where IDCG is the ideal DCG (perfect ranking)
        """
        # Compute DCG
        dcg = self.dcg_at_k(retrieved_ids, relevance_scores, k)
        
        # Compute IDCG (ideal ranking)
        ideal_ranking = sorted(relevance_scores.values(), reverse=True)[:k]
        idcg = 0.0
        for i, rel in enumerate(ideal_ranking, 1):
            idcg += rel / np.log2(i + 1)
        
        if idcg == 0:
            return 0.0
        
        return dcg / idcg
    
    def f1_at_k(self, 
                retrieved_ids: List[int], 
                relevant_ids: set, 
                k: int) -> float:
        """
        Compute F1 score at K.
        
        F1@K = 2 * (Precision@K * Recall@K) / (Precision@K + Recall@K)
        """
        p = self.precision_at_k(retrieved_ids, relevant_ids, k)
        r = self.recall_at_k(retrieved_ids, relevant_ids, k)
        
        if p + r == 0:
            return 0.0
        
        return 2 * (p * r) / (p + r)
    
    def evaluate_single_query(self, 
                               query: str, 
                               retrieved_results: List[Tuple[int, float]], 
                               k_values: List[int] = [5, 10, 20]) -> Dict:
        """
        Evaluate a single query's results.
        
        Args:
            query: Search query
            retrieved_results: List of (recipe_id, score) tuples
            k_values: List of K values for metrics
            
        Returns:
            Dictionary of evaluation metrics
        """
        retrieved_ids = [r[0] for r in retrieved_results]
        
        if not retrieved_ids:
            # Return zeros if no results
            results = {
                'query': query,
                'num_retrieved': 0,
                'num_relevant_in_corpus': 0,
                'reciprocal_rank': 0.0,
                'average_precision': 0.0
            }
            for k in k_values:
                results[f'precision@{k}'] = 0.0
                results[f'recall@{k}'] = 0.0
                results[f'f1@{k}'] = 0.0
                results[f'ndcg@{k}'] = 0.0
            return results
        
        # Get relevant set from retrieved candidates (faster than full corpus scan)
        relevant_ids = self.relevance_generator.get_relevant_set_fast(
            query, retrieved_ids, min_score=0.25
        )
        
        # Also check a sample for better recall estimation
        sample_relevant = self.relevance_generator.get_pseudo_relevant_set(
            query, top_k=50, min_score=0.25, sample_size=2000, random_state=2
        )
        relevant_ids.update(sample_relevant)
        
        # Get graded relevance for NDCG
        judgments = self.relevance_generator.generate_relevance_judgments(
            query, retrieved_ids
        )
        graded_relevance = {
            rid: j['graded_relevance'] for rid, j in judgments.items()
        }
        
        # Compute metrics
        results = {
            'query': query,
            'num_retrieved': len(retrieved_ids),
            'num_relevant_in_corpus': len(relevant_ids),
            'reciprocal_rank': self.reciprocal_rank(retrieved_ids, relevant_ids),
            'average_precision': self.average_precision(retrieved_ids, relevant_ids)
        }
        
        # Compute metrics at different K values
        for k in k_values:
            results[f'precision@{k}'] = self.precision_at_k(retrieved_ids, relevant_ids, k)
            results[f'recall@{k}'] = self.recall_at_k(retrieved_ids, relevant_ids, k)
            results[f'f1@{k}'] = self.f1_at_k(retrieved_ids, relevant_ids, k)
            results[f'ndcg@{k}'] = self.ndcg_at_k(retrieved_ids, graded_relevance, k)
        
        return results
    
    def evaluate_engine(self, 
                        search_engine, 
                        queries: List[str], 
                        top_k: int = 20,
                        k_values: List[int] = [5, 10, 20],
                        engine_name: str = "Engine") -> pd.DataFrame:
        """
        Evaluate a search engine across multiple queries.
        
        Args:
            search_engine: Search engine with search() method
            queries: List of queries to evaluate
            top_k: Number of results to retrieve per query
            k_values: List of K values for metrics
            engine_name: Name of the engine for reporting
            
        Returns:
            DataFrame with evaluation results
        """
        print(f"\n    Evaluating {engine_name}...")
        
        all_results = []
        
        for query in tqdm(queries, desc=f"    {engine_name}"):
            try:
                # Get search results
                search_results = search_engine.search(query, top_k=top_k)
                
                # Evaluate
                metrics = self.evaluate_single_query(query, search_results, k_values)
                metrics['engine'] = engine_name
                all_results.append(metrics)
                
            except Exception as e:
                print(f"    Warning: Error evaluating query '{query}': {e}")
                # Add a row with zeros for failed queries
                failed_metrics = {
                    'query': query,
                    'engine': engine_name,
                    'num_retrieved': 0,
                    'num_relevant_in_corpus': 0,
                    'reciprocal_rank': 0.0,
                    'average_precision': 0.0
                }
                for k in k_values:
                    failed_metrics[f'precision@{k}'] = 0.0
                    failed_metrics[f'recall@{k}'] = 0.0
                    failed_metrics[f'f1@{k}'] = 0.0
                    failed_metrics[f'ndcg@{k}'] = 0.0
                all_results.append(failed_metrics)
        
        if not all_results:
            print(f"    Warning: No results for {engine_name}")
            # Return empty dataframe with correct columns
            columns = ['query', 'engine', 'num_retrieved', 'num_relevant_in_corpus',
                      'reciprocal_rank', 'average_precision']
            for k in k_values:
                columns.extend([f'precision@{k}', f'recall@{k}', f'f1@{k}', f'ndcg@{k}'])
            return pd.DataFrame(columns=columns)
        
        return pd.DataFrame(all_results)


# =============================================================================
# ENGINE COMPARISON
# =============================================================================

class EngineComparator:
    """
    Statistical comparison between search engines.
    """
    
    def __init__(self, evaluator: SearchEngineEvaluator):
        """
        Initialize comparator.
        
        Args:
            evaluator: SearchEngineEvaluator instance
        """
        self.evaluator = evaluator
    
    def compare_engines(self, 
                        engine1, 
                        engine2, 
                        queries: List[str],
                        engine1_name: str = "TF-IDF",
                        engine2_name: str = "Embeddings",
                        top_k: int = 20,
                        k_values: List[int] = [5, 10, 20]) -> Dict:
        """
        Compare two search engines across multiple queries.
        """
        print("\n" + "=" * 70)
        print("ENGINE COMPARISON")
        print("=" * 70)
        
        # Evaluate both engines
        results1 = self.evaluator.evaluate_engine(
            engine1, queries, top_k, k_values, engine1_name
        )
        results2 = self.evaluator.evaluate_engine(
            engine2, queries, top_k, k_values, engine2_name
        )
        
        # Debug: print shape and columns
        print(f"\n    {engine1_name} results shape: {results1.shape}")
        print(f"    {engine2_name} results shape: {results2.shape}")
        
        if results1.empty or results2.empty:
            print("    Warning: One or both engines returned no results!")
            return {
                'individual_results': pd.concat([results1, results2], ignore_index=True),
                'engine1_name': engine1_name,
                'engine2_name': engine2_name,
                'queries': queries,
                'summary': {},
                'statistical_tests': {},
                'overlap_analysis': {}
            }
        
        # Combine results
        all_results = pd.concat([results1, results2], ignore_index=True)
        
        # Compute aggregate statistics
        comparison = {
            'individual_results': all_results,
            'engine1_name': engine1_name,
            'engine2_name': engine2_name,
            'queries': queries,
            'summary': {},
            'statistical_tests': {},
            'overlap_analysis': {}
        }
        
        # Compute summary statistics
        metric_columns = [col for col in results1.columns 
                         if col not in ['query', 'engine', 'num_retrieved', 'num_relevant_in_corpus']]
        
        summary = {}
        for metric in metric_columns:
            if metric in results1.columns and metric in results2.columns:
                summary[metric] = {
                    engine1_name: {
                        'mean': float(results1[metric].mean()),
                        'std': float(results1[metric].std()),
                        'median': float(results1[metric].median()),
                        'min': float(results1[metric].min()),
                        'max': float(results1[metric].max())
                    },
                    engine2_name: {
                        'mean': float(results2[metric].mean()),
                        'std': float(results2[metric].std()),
                        'median': float(results2[metric].median()),
                        'min': float(results2[metric].min()),
                        'max': float(results2[metric].max())
                    }
                }
        comparison['summary'] = summary
        
        # Statistical significance tests
        print("\n    Running statistical tests...")
        stat_tests = {}
        for metric in metric_columns:
            if metric not in results1.columns or metric not in results2.columns:
                continue
                
            values1 = results1[metric].values
            values2 = results2[metric].values
            
            # Ensure same length
            min_len = min(len(values1), len(values2))
            values1 = values1[:min_len]
            values2 = values2[:min_len]
            
            # Paired t-test
            try:
                t_stat, t_pvalue = stats.ttest_rel(values1, values2)
            except Exception:
                t_stat, t_pvalue = np.nan, np.nan
            
            # Wilcoxon signed-rank test (non-parametric)
            try:
                # Check if there's any variance
                diff = values1 - values2
                if np.all(diff == 0):
                    w_stat, w_pvalue = np.nan, np.nan
                else:
                    w_stat, w_pvalue = stats.wilcoxon(values1, values2)
            except Exception:
                w_stat, w_pvalue = np.nan, np.nan
            
            stat_tests[metric] = {
                'paired_ttest': {'statistic': float(t_stat) if not np.isnan(t_stat) else None, 
                                'pvalue': float(t_pvalue) if not np.isnan(t_pvalue) else None},
                'wilcoxon': {'statistic': float(w_stat) if not np.isnan(w_stat) else None, 
                            'pvalue': float(w_pvalue) if not np.isnan(w_pvalue) else None},
                'engine1_wins': int(np.sum(values1 > values2)),
                'engine2_wins': int(np.sum(values2 > values1)),
                'ties': int(np.sum(values1 == values2))
            }
        
        comparison['statistical_tests'] = stat_tests
        
        # Overlap analysis
        print("\n    Computing overlap analysis...")
        overlap_analysis = self._compute_overlap_analysis(
            engine1, engine2, queries, top_k, engine1_name, engine2_name
        )
        comparison['overlap_analysis'] = overlap_analysis
        
        return comparison
    
    def _compute_overlap_analysis(self, 
                                   engine1, 
                                   engine2, 
                                   queries: List[str],
                                   top_k: int,
                                   engine1_name: str,
                                   engine2_name: str) -> Dict:
        """Compute result overlap between engines."""
        overlaps = []
        rank_correlations = []
        
        for query in queries:
            try:
                results1 = engine1.search(query, top_k=top_k)
                results2 = engine2.search(query, top_k=top_k)
                
                ids1 = set(r[0] for r in results1)
                ids2 = set(r[0] for r in results2)
                
                # Jaccard overlap
                intersection = len(ids1 & ids2)
                union = len(ids1 | ids2)
                jaccard = intersection / union if union > 0 else 0
                
                overlaps.append({
                    'query': query,
                    'overlap_count': intersection,
                    'jaccard_similarity': jaccard,
                    f'{engine1_name}_unique': len(ids1 - ids2),
                    f'{engine2_name}_unique': len(ids2 - ids1)
                })
                
                # Rank correlation for common items
                common_ids = list(ids1 & ids2)
                if len(common_ids) >= 3:
                    rank1 = {r[0]: i for i, r in enumerate(results1)}
                    rank2 = {r[0]: i for i, r in enumerate(results2)}
                    
                    ranks1 = [rank1[id_] for id_ in common_ids]
                    ranks2 = [rank2[id_] for id_ in common_ids]
                    
                    try:
                        spearman_corr, _ = spearmanr(ranks1, ranks2)
                        kendall_corr, _ = kendalltau(ranks1, ranks2)
                        
                        rank_correlations.append({
                            'query': query,
                            'spearman': spearman_corr,
                            'kendall': kendall_corr,
                            'num_common': len(common_ids)
                        })
                    except Exception:
                        pass
            
            except Exception as e:
                print(f"    Warning: Overlap analysis failed for '{query}': {e}")
                continue
        
        overlap_df = pd.DataFrame(overlaps) if overlaps else pd.DataFrame()
        rank_corr_df = pd.DataFrame(rank_correlations) if rank_correlations else pd.DataFrame()
        
        return {
            'per_query_overlap': overlap_df,
            'rank_correlations': rank_corr_df,
            'mean_jaccard': float(overlap_df['jaccard_similarity'].mean()) if len(overlap_df) > 0 else 0,
            'mean_overlap_count': float(overlap_df['overlap_count'].mean()) if len(overlap_df) > 0 else 0,
            'mean_spearman': float(rank_corr_df['spearman'].mean()) if len(rank_corr_df) > 0 else np.nan,
            'mean_kendall': float(rank_corr_df['kendall'].mean()) if len(rank_corr_df) > 0 else np.nan
        }
    
    def print_comparison_report(self, comparison: Dict) -> None:
        """Print a formatted comparison report."""
        engine1 = comparison['engine1_name']
        engine2 = comparison['engine2_name']
        
        print("\n" + "=" * 70)
        print("COMPARISON REPORT")
        print("=" * 70)
        
        print(f"\nEngines: {engine1} vs {engine2}")
        print(f"Number of queries: {len(comparison['queries'])}")
        
        summary = comparison['summary']
        stat_tests = comparison['statistical_tests']
        
        if not summary:
            print("\nNo evaluation results available.")
            return
        
        # Summary statistics
        print("\n" + "-" * 70)
        print("SUMMARY STATISTICS")
        print("-" * 70)
        
        key_metrics = ['precision@10', 'recall@10', 'ndcg@10', 'reciprocal_rank', 'average_precision']
        
        print(f"\n{'Metric':<25} {engine1:>20} {engine2:>20} {'Winner':>12}")
        print("-" * 77)
        
        for metric in key_metrics:
            if metric in summary:
                mean1 = summary[metric][engine1]['mean']
                mean2 = summary[metric][engine2]['mean']
                winner = engine1 if mean1 > mean2 else engine2 if mean2 > mean1 else "Tie"
                print(f"{metric:<25} {mean1:>20.4f} {mean2:>20.4f} {winner:>12}")
        
        # Statistical tests
        print("\n" + "-" * 70)
        print("STATISTICAL SIGNIFICANCE (p-value < 0.05 is significant)")
        print("-" * 70)
        
        print(f"\n{'Metric':<25} {'t-test p-val':>15} {'Wilcoxon p-val':>15} {'Significant?':>15}")
        print("-" * 70)
        
        for metric in key_metrics:
            if metric in stat_tests:
                t_pval = stat_tests[metric]['paired_ttest']['pvalue']
                w_pval = stat_tests[metric]['wilcoxon']['pvalue']
                
                t_pval_str = f"{t_pval:.4f}" if t_pval is not None else "N/A"
                w_pval_str = f"{w_pval:.4f}" if w_pval is not None else "N/A"
                
                significant = "No"
                if t_pval is not None and t_pval < 0.05:
                    significant = "Yes"
                elif w_pval is not None and w_pval < 0.05:
                    significant = "Yes"
                
                print(f"{metric:<25} {t_pval_str:>15} {w_pval_str:>15} {significant:>15}")
        
        # Win/Loss analysis
        print("\n" + "-" * 70)
        print("WIN/LOSS ANALYSIS")
        print("-" * 70)
        
        print(f"\n{'Metric':<25} {engine1 + ' Wins':>15} {engine2 + ' Wins':>15} {'Ties':>10}")
        print("-" * 65)
        
        for metric in key_metrics:
            if metric in stat_tests:
                e1_wins = stat_tests[metric]['engine1_wins']
                e2_wins = stat_tests[metric]['engine2_wins']
                ties = stat_tests[metric]['ties']
                print(f"{metric:<25} {e1_wins:>15} {e2_wins:>15} {ties:>10}")
        
        # Overlap analysis
        print("\n" + "-" * 70)
        print("OVERLAP ANALYSIS")
        print("-" * 70)
        
        overlap = comparison['overlap_analysis']
        print(f"\nMean Jaccard Similarity: {overlap['mean_jaccard']:.4f}")
        print(f"Mean Overlap Count (top-20): {overlap['mean_overlap_count']:.1f}")
        if not np.isnan(overlap['mean_spearman']):
            print(f"Mean Spearman Rank Correlation: {overlap['mean_spearman']:.4f}")
            print(f"Mean Kendall Rank Correlation: {overlap['mean_kendall']:.4f}")
        
        print("\n" + "=" * 70)
        print("END OF COMPARISON REPORT")
        print("=" * 70)
    
    def get_comparison_dataframe(self, comparison: Dict) -> pd.DataFrame:
        """Convert comparison results to a summary DataFrame."""
        rows = []
        
        engine1 = comparison['engine1_name']
        engine2 = comparison['engine2_name']
        summary = comparison['summary']
        stat_tests = comparison['statistical_tests']
        
        for metric in summary.keys():
            t_pval = None
            w_pval = None
            e1_wins = 0
            e2_wins = 0
            
            if metric in stat_tests:
                t_pval = stat_tests[metric]['paired_ttest']['pvalue']
                w_pval = stat_tests[metric]['wilcoxon']['pvalue']
                e1_wins = stat_tests[metric]['engine1_wins']
                e2_wins = stat_tests[metric]['engine2_wins']
            
            row = {
                'metric': metric,
                f'{engine1}_mean': summary[metric][engine1]['mean'],
                f'{engine1}_std': summary[metric][engine1]['std'],
                f'{engine2}_mean': summary[metric][engine2]['mean'],
                f'{engine2}_std': summary[metric][engine2]['std'],
                'difference': summary[metric][engine2]['mean'] - summary[metric][engine1]['mean'],
                't_test_pvalue': t_pval,
                'wilcoxon_pvalue': w_pval,
                f'{engine1}_wins': e1_wins,
                f'{engine2}_wins': e2_wins
            }
            rows.append(row)
        
        return pd.DataFrame(rows)


# =============================================================================
# RUN EVALUATION
# =============================================================================

print("\n" + "=" * 70)
print("RUNNING EVALUATION")
print("=" * 70)

# Initialize relevance generator
print("\nInitializing proxy relevance generator...")
relevance_generator = ProxyRelevanceGenerator(corpus_df, metadata_df)

# Initialize evaluator
evaluator = SearchEngineEvaluator(relevance_generator)

# Define evaluation queries - mix of different types
evaluation_queries = [
    # Simple keyword queries
    "chocolate cake",
    "pasta carbonara",
    "chicken soup",
    "banana bread",
    "grilled salmon",
    "beef stew",
    "apple pie",
    "caesar salad",
    
    # Semantic/conceptual queries
    "comfort food for a rainy day",
    "healthy dinner after gym",
    "quick and easy breakfast",
    "romantic dinner for two",
    "light summer salad",
    "warm winter soup",
    "kid friendly lunch",
    "low carb vegetarian",
    
    # Cuisine-based queries
    "italian pasta dish",
    "mexican dinner",
    "asian stir fry",
    "mediterranean lunch",
    
    # Occasion-based queries
    "holiday dessert",
    "birthday cake",
    "party appetizer",
    "sunday brunch",
    
    # Mood/attribute queries
    "something sweet and indulgent",
    "quick weeknight meal",
    "impressive dish for guests",
    "healthy meal prep"
]

print(f"\nEvaluating {len(evaluation_queries)} queries...")

# Initialize comparator
comparator = EngineComparator(evaluator)

# Run comparison
comparison_results = comparator.compare_engines(
    engine1=tfidf_engine,
    engine2=embeddings_engine,
    queries=evaluation_queries,
    engine1_name="TF-IDF",
    engine2_name="Embeddings",
    top_k=20,
    k_values=[5, 10, 20]
)

# Print comparison report
comparator.print_comparison_report(comparison_results)

# Get comparison DataFrame
comparison_df = comparator.get_comparison_dataframe(comparison_results)
print("\nComparison Summary DataFrame:")
print(comparison_df.to_string(index=False))

# Save evaluation results
comparison_df.to_csv("evaluation_comparison.csv", index=False)
comparison_results['individual_results'].to_csv("evaluation_individual_results.csv", index=False)
print("\nEvaluation results saved to CSV files")

# Save full comparison results
with open("evaluation_results.pkl", "wb") as f:
    pickle.dump(comparison_results, f)
print("Full evaluation results saved to 'evaluation_results.pkl'")


# =============================================================================
# INDIVIDUAL ENGINE EVALUATION SUMMARY
# =============================================================================

print("\n" + "=" * 70)
print("INDIVIDUAL ENGINE EVALUATION SUMMARY")
print("=" * 70)

individual_results = comparison_results['individual_results']

# Check if results are valid
if individual_results.empty or 'engine' not in individual_results.columns:
    print("\nWarning: No evaluation results available.")
else:
    for engine_name in ['TF-IDF', 'Embeddings']:
        engine_results = individual_results[individual_results['engine'] == engine_name]
        
        if engine_results.empty:
            print(f"\n{engine_name} Engine: No results")
            continue
        
        print(f"\n{engine_name} Engine:")
        print("-" * 40)
        
        metrics_to_show = ['precision@5', 'precision@10', 'recall@10', 'ndcg@10', 'reciprocal_rank', 'average_precision']
        
        for metric in metrics_to_show:
            if metric in engine_results.columns:
                mean_val = engine_results[metric].mean()
                std_val = engine_results[metric].std()
                print(f"  {metric:<20}: {mean_val:.4f} (±{std_val:.4f})")


# =============================================================================
# QUERY-TYPE ANALYSIS
# =============================================================================

print("\n" + "=" * 70)
print("QUERY-TYPE ANALYSIS")
print("=" * 70)

# Categorize queries
query_categories = {
    'keyword': [
        "chocolate cake", "pasta carbonara", "chicken soup", "banana bread",
        "grilled salmon", "beef stew", "apple pie", "caesar salad"
    ],
    'semantic': [
        "comfort food for a rainy day", "healthy dinner after gym",
        "quick and easy breakfast", "romantic dinner for two",
        "light summer salad", "warm winter soup", "kid friendly lunch",
        "low carb vegetarian"
    ],
    'cuisine': [
        "italian pasta dish", "mexican dinner", "asian stir fry", "mediterranean lunch"
    ],
    'occasion': [
        "holiday dessert", "birthday cake", "party appetizer", "sunday brunch"
    ],
    'mood': [
        "something sweet and indulgent", "quick weeknight meal",
        "impressive dish for guests", "healthy meal prep"
    ]
}

if not individual_results.empty and 'engine' in individual_results.columns:
    print("\nPerformance by Query Type (NDCG@10):")
    print("-" * 60)
    print(f"{'Query Type':<15} {'TF-IDF':>15} {'Embeddings':>15} {'Winner':>12}")
    print("-" * 60)
    
    for category, queries in query_categories.items():
        cat_results = individual_results[individual_results['query'].isin(queries)]
        
        if cat_results.empty:
            print(f"{category:<15} {'N/A':>15} {'N/A':>15} {'N/A':>12}")
            continue
        
        tfidf_results = cat_results[cat_results['engine'] == 'TF-IDF']
        embed_results = cat_results[cat_results['engine'] == 'Embeddings']
        
        tfidf_ndcg = tfidf_results['ndcg@10'].mean() if not tfidf_results.empty else 0
        embed_ndcg = embed_results['ndcg@10'].mean() if not embed_results.empty else 0
        
        winner = "TF-IDF" if tfidf_ndcg > embed_ndcg else "Embeddings" if embed_ndcg > tfidf_ndcg else "Tie"
        
        print(f"{category:<15} {tfidf_ndcg:>15.4f} {embed_ndcg:>15.4f} {winner:>12}")
else:
    print("\nNo results available for query-type analysis.")


EVALUATION SYSTEM

RUNNING EVALUATION

Initializing proxy relevance generator...
    Initialized relevance generator with 5000 recipes

Evaluating 28 queries...

ENGINE COMPARISON

    Evaluating TF-IDF...


    TF-IDF: 100%|██████████| 28/28 [00:00<00:00, 40.86it/s]



    Evaluating Embeddings...


    Embeddings:   0%|          | 0/28 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings:   7%|▋         | 2/28 [00:00<00:01, 15.55it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings:  14%|█▍        | 4/28 [00:00<00:01, 12.43it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings:  21%|██▏       | 6/28 [00:00<00:01, 12.62it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings:  29%|██▊       | 8/28 [00:00<00:01, 14.75it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings:  36%|███▌      | 10/28 [00:00<00:01, 12.69it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings:  43%|████▎     | 12/28 [00:00<00:01, 11.58it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings:  54%|█████▎    | 15/28 [00:01<00:00, 14.72it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings:  61%|██████    | 17/28 [00:01<00:00, 14.00it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings:  68%|██████▊   | 19/28 [00:01<00:00, 12.40it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings:  75%|███████▌  | 21/28 [00:01<00:00, 13.51it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings:  82%|████████▏ | 23/28 [00:01<00:00, 14.44it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings:  89%|████████▉ | 25/28 [00:01<00:00, 12.74it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings:  96%|█████████▋| 27/28 [00:02<00:00, 13.64it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

    Embeddings: 100%|██████████| 28/28 [00:02<00:00, 13.52it/s]



    TF-IDF results shape: (28, 18)
    Embeddings results shape: (28, 18)

    Running statistical tests...

    Computing overlap analysis...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


COMPARISON REPORT

Engines: TF-IDF vs Embeddings
Number of queries: 28

----------------------------------------------------------------------
SUMMARY STATISTICS
----------------------------------------------------------------------

Metric                                  TF-IDF           Embeddings       Winner
-----------------------------------------------------------------------------
precision@10                            0.9250               0.7893       TF-IDF
recall@10                               0.1616               0.1338       TF-IDF
ndcg@10                                 0.8523               0.7255       TF-IDF
reciprocal_rank                         0.9821               0.8158       TF-IDF
average_precision                       0.2920               0.2296       TF-IDF

----------------------------------------------------------------------
STATISTICAL SIGNIFICANCE (p-value < 0.05 is significant)
----------------------------------------------------------------------



In [48]:
# =============================================================================
# SIMPLE GUI APPLICATION
# =============================================================================

print("\n" + "=" * 70)
print("INITIALIZING SIMPLE GUI")
print("=" * 70)


class SimpleRecipeSearchApp:
    """Simple GUI for Recipe Search Engine."""
    
    def __init__(self, root):
        self.root = root
        self.root.title("Recipe Search Engine")
        self.root.geometry("800x600")
        
        # ----- Search Frame -----
        search_frame = tk.Frame(root, pady=10)
        search_frame.pack(fill=tk.X, padx=10)
        
        # Search entry
        self.search_entry = tk.Entry(search_frame, font=('Arial', 12), width=50)
        self.search_entry.pack(side=tk.LEFT, padx=(0, 10))
        self.search_entry.insert(0, "comfort food")
        
        # Engine selection
        self.engine_var = tk.StringVar(value="Embeddings")
        tk.Radiobutton(search_frame, text="TF-IDF", variable=self.engine_var, value="TF-IDF").pack(side=tk.LEFT)
        tk.Radiobutton(search_frame, text="Embeddings", variable=self.engine_var, value="Embeddings").pack(side=tk.LEFT)
        
        # Search button
        self.search_btn = tk.Button(search_frame, text="Search", font=('Arial', 11), command=self.search)
        self.search_btn.pack(side=tk.LEFT, padx=10)
        
        # ----- Results Area -----
        self.results_text = scrolledtext.ScrolledText(root, font=('Arial', 10), wrap=tk.WORD)
        self.results_text.pack(fill=tk.BOTH, expand=True, padx=10, pady=10)
        
        # Bind Enter key
        self.root.bind('<Return>', lambda e: self.search())
    
    def search(self):
        """Perform search and display results."""
        query = self.search_entry.get().strip()
        
        if not query:
            self.results_text.delete('1.0', tk.END)
            self.results_text.insert(tk.END, "Please enter a search query.")
            return
        
        # Clear results
        self.results_text.delete('1.0', tk.END)
        self.results_text.insert(tk.END, "Searching...\n")
        self.root.update()
        
        try:
            # Get results based on selected engine
            if self.engine_var.get() == "TF-IDF":
                results = tfidf_engine.search(query, top_k=10)
                engine_name = "TF-IDF"
            else:
                results = embeddings_engine.search(query, top_k=10)
                engine_name = "Embeddings"
            
            # Display results
            self.results_text.delete('1.0', tk.END)
            self.results_text.insert(tk.END, f"=== {engine_name} Results for: '{query}' ===\n\n")
            
            if not results:
                self.results_text.insert(tk.END, "No results found.")
                return
            
            for i, (recipe_id, score) in enumerate(results, 1):
                # Get recipe info
                meta = metadata_df[metadata_df['recipe_id'] == recipe_id]
                
                if len(meta) == 0:
                    continue
                
                meta = meta.iloc[0]
                name = meta['recipe_name']
                time = meta['cooking_time']
                ingredients = meta['num_ingredients']
                
                # Display
                self.results_text.insert(tk.END, f"{i}. {name}\n")
                self.results_text.insert(tk.END, f"   Score: {score:.4f} | Time: {time} min | Ingredients: {ingredients}\n\n")
        
        except Exception as e:
            self.results_text.delete('1.0', tk.END)
            self.results_text.insert(tk.END, f"Error: {str(e)}")


# Launch the app
if __name__ == "__main__":
    root = tk.Tk()
    app = SimpleRecipeSearchApp(root)
    root.mainloop()


INITIALIZING SIMPLE GUI


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [49]:
query = "easy meal for kids"

# TF-IDF Results
tfidf_results = tfidf_engine.search(query, top_k=5)
display_search_results(tfidf_results, metadata_df, corpus_df, query, show_snippet=False)

# Embeddings Results
embed_results = embeddings_engine.search(query, top_k=5)
display_search_results_embeddings(embed_results, metadata_df, corpus_df, query, engine_name="EMBEDDINGS")


SEARCH RESULTS FOR: 'easy meal for kids'

[1] cheesy chicken sandwiches
    Score: 0.2589
    Cooking Time: 20 min | Ingredients: 5 | Steps: 5
    Description: a quick and easy meal and a kid pleaser!...

[2] chicken pesto pasta
    Score: 0.1039
    Cooking Time: 30 min | Ingredients: 7 | Steps: 9
    Description: found this recipe on allrecipes.com. a very simple recipe for a tasty, satisfying pasta meal. you can make your own homemade pesto, or use store-bough...

[3] chili mock carne
    Score: 0.1019
    Cooking Time: 60 min | Ingredients: 16 | Steps: 9
    Description: yes, it's a vegetarian chili con carne using tvp instead of beef. i actually served it up to my family and my dad and brother didn't really seem to no...

[4] sara s veggie stromboli from scratch  using your bread machine
    Score: 0.0958
    Cooking Time: 40 min | Ingredients: 18 | Steps: 15
    Description: with a picture! an easy, healthy, tasty vegetarian filled bread roll that has tangy italian flavors and m

Batches:   0%|          | 0/1 [00:00<?, ?it/s]


EMBEDDINGS RESULTS FOR: 'easy meal for kids'

[1] beef  rice  peas and carrots one dish meal
    Similarity Score: 0.5787
    Cooking Time: 20 min | Ingredients: 5 | Steps: 7
    Tags: 30-minutes-or-less, time-to-make, course, preparation, healthy, 5-ingredients-or-less, main-dish, ea...
    Description: quick, simple supper.  my 3 year old ate 6 servings, so i guess its kid friendly too. :)  i used a very lean beef that didn't require draining, if you...

[2] sophie s super easy chickie pea and mato salad
    Similarity Score: 0.5478
    Cooking Time: 15 min | Ingredients: 12 | Steps: 4
    Tags: 15-minutes-or-less, time-to-make, course, main-ingredient, cuisine, preparation, low-protein, health...
    Description: a refreshing, flavorful salad with just a hint of smokiness.  my 3 year old and i were looking to make a quick and healthy lunch with what we had on h...

[3] chicken nuggets parmesan
    Similarity Score: 0.5409
    Cooking Time: 10 min | Ingredients: 4 | Steps: 4
    Tag