<a href="https://colab.research.google.com/github/darlon31/FlavorGraph/blob/HybridSystem/Full_Hybrid_System_Test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# CELL 1: Setup and Dependencies
!pip install -q transformers torch networkx pandas numpy scikit-learn matplotlib seaborn pickle5 tqdm

# Mount Google Drive to access local files
from google.colab import drive
drive.mount('/content/drive')

# Create directory structure
!mkdir -p /content/FlavorGraph/input
!mkdir -p /content/FlavorGraph/output

# Copy files from local to Colab
!cp "/content/drive/MyDrive/FlavorGraph/FlavorGraph/output/kitchenette_embeddings.pkl" "/content/FlavorGraph/output/"
!cp "/content/drive/MyDrive/FlavorGraph/FlavorGraph/input/dict_ingr2cate - Top300+FDB400+HyperFoods104=616.csv" "/content/FlavorGraph/input/"

import os
import sys
import torch
import pickle5 as pickle
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import AutoTokenizer, T5ForConditionalGeneration
from sklearn.metrics.pairwise import cosine_similarity
from tqdm.notebook import tqdm
import time
import logging

# Set up logging
logging.basicConfig(level=logging.INFO,
                   format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# CELL 2: Data Loading and Configuration
class Config:
    # Base paths
    BASE_DIR = '/content/FlavorGraph'
    INPUT_DIR = os.path.join(BASE_DIR, "input")
    OUTPUT_DIR = os.path.join(BASE_DIR, "output")

    # File paths
    EMBEDDINGS_FILE = os.path.join(OUTPUT_DIR, "kitchenette_embeddings.pkl")
    CATEGORIES_FILE = os.path.join(INPUT_DIR, "dict_ingr2cate - Top300+FDB400+HyperFoods104=616.csv")

    # Load embeddings
    try:
        logger.info(f"Loading embeddings from {EMBEDDINGS_FILE}")
        with open(EMBEDDINGS_FILE, 'rb') as f:
            EMBEDDINGS = pickle.load(f)
        logger.info(f"Successfully loaded embeddings with {len(EMBEDDINGS)} ingredients")
    except Exception as e:
        logger.error(f"Error loading embeddings: {str(e)}")
        raise

    # Load categories
    try:
        logger.info(f"Loading categories from {CATEGORIES_FILE}")
        CATEGORIES_DF = pd.read_csv(CATEGORIES_FILE)
        CATEGORIES = dict(zip(CATEGORIES_DF['ingredient'], CATEGORIES_DF['category']))
        logger.info(f"Successfully loaded categories for {len(CATEGORIES)} ingredients")
    except Exception as e:
        logger.error(f"Error loading categories: {str(e)}")
        raise

    # Model Configuration
    MODEL_NAME = "flax-community/t5-recipe-generation"
    MODEL_CACHE_DIR = os.path.join(BASE_DIR, "model_cache")

# CELL 3: FlavorGraph Testing
class FlavorGraphTester:
    def __init__(self):
        self.embeddings = Config.EMBEDDINGS
        self.categories = Config.CATEGORIES

    def test_embedding_coverage(self):
        """Test embedding coverage and statistics"""
        logger.info("Testing FlavorGraph Embeddings:")
        n_ingredients = len(self.embeddings)
        n_categories = len(set(self.categories.values()))
        embedding_dim = next(iter(self.embeddings.values())).shape[0]

        logger.info(f"Total ingredients: {n_ingredients}")
        logger.info(f"Total categories: {n_categories}")
        logger.info(f"Embedding dimension: {embedding_dim}")

        # Category distribution
        category_dist = pd.Series(self.categories.values()).value_counts()
        plt.figure(figsize=(15, 5))
        category_dist.plot(kind='bar')
        plt.title('Ingredient Category Distribution')
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()

    def test_ingredient_similarity(self, test_ingredients):
        """Test ingredient similarity computation"""
        logger.info("\nTesting Ingredient Similarities:")

        results = {}
        for ing in test_ingredients:
            if ing not in self.embeddings:
                logger.warning(f"Ingredient '{ing}' not found in embeddings")
                continue

            # Compute similarities
            base_emb = self.embeddings[ing].reshape(1, -1)
            similarities = {}

            for other_ing, other_emb in self.embeddings.items():
                if other_ing != ing:
                    sim = cosine_similarity(base_emb, other_emb.reshape(1, -1))[0][0]
                    similarities[other_ing] = sim

            # Get top 5 similar ingredients
            top_similar = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:5]
            results[ing] = top_similar

            logger.info(f"\nTop 5 ingredients similar to {ing}:")
            for similar_ing, score in top_similar:
                category = self.categories.get(similar_ing, "Unknown")
                logger.info(f"- {similar_ing} ({category}): {score:.3f}")

        return results

# CELL 4: ChefTransformer Testing
class ChefTransformerTester:
    def __init__(self):
        logger.info("Initializing ChefTransformer...")
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = AutoTokenizer.from_pretrained(Config.MODEL_NAME)
        self.model = T5ForConditionalGeneration.from_pretrained(Config.MODEL_NAME).to(self.device)
        logger.info(f"ChefTransformer initialized on {self.device}")

    def test_recipe_generation(self, ingredient_sets):
        """Test recipe generation with different ingredient combinations"""
        logger.info("\nTesting Recipe Generation:")

        results = {}
        for i, ingredients in enumerate(ingredient_sets, 1):
            logger.info(f"\nTest Case {i}: {ingredients}")

            # Generate recipe
            start_time = time.time()
            recipe = self.generate_recipe(ingredients)
            generation_time = time.time() - start_time

            if recipe:
                results[tuple(ingredients)] = {
                    'recipe': recipe,
                    'generation_time': generation_time
                }

                logger.info(f"Generation time: {generation_time:.2f}s")
                logger.info(f"Title: {recipe['title']}")
                logger.info("Ingredients:")
                for ing in recipe['ingredients']:
                    logger.info(f"- {ing}")
                logger.info("Directions:")
                for j, step in enumerate(recipe['directions'], 1):
                    logger.info(f"{j}. {step}")
            else:
                logger.error("Recipe generation failed")

        return results

    def generate_recipe(self, ingredients):
        try:
            input_text = f"ingredients: {', '.join(ingredients)}"
            inputs = self.tokenizer(input_text, return_tensors="pt",
                                  max_length=512, truncation=True).to(self.device)

            with torch.no_grad():
                outputs = self.model.generate(
                    inputs.input_ids,
                    max_length=512,
                    min_length=64,
                    do_sample=True,
                    top_k=50,
                    top_p=0.9,
                    temperature=0.7,
                    num_return_sequences=1
                )

            recipe_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
            return self._parse_recipe(recipe_text)

        except Exception as e:
            logger.error(f"Error in recipe generation: {str(e)}")
            return None

    def _parse_recipe(self, recipe_text):
        # [Same parsing logic as before]
        pass

# CELL 5: Hybrid System Testing
class HybridSystemTester:
    def __init__(self):
        self.flavor_graph = FlavorGraphTester()
        self.chef_transformer = ChefTransformerTester()

    def test_hybrid_system(self, test_cases):
        """Comprehensive testing of the hybrid system"""
        logger.info("\nTesting Hybrid Recipe System:")

        results = {}
        for i, base_ingredients in enumerate(test_cases, 1):
            logger.info(f"\nTest Case {i}: {base_ingredients}")

            # Step 1: FlavorGraph Analysis
            logger.info("\nStep 1: Analyzing ingredient compatibility...")
            similar_ingredients = self.flavor_graph.test_ingredient_similarity(base_ingredients)

            # Create enhanced ingredient list
            enhanced_ingredients = set(base_ingredients)
            for ing, similar in similar_ingredients.items():
                enhanced_ingredients.add(similar[0][0])  # Add top similar ingredient

            # Step 2: Recipe Generation
            logger.info("\nStep 2: Generating recipes...")
            logger.info("Original ingredients recipe:")
            original_recipe = self.chef_transformer.generate_recipe(base_ingredients)

            logger.info("\nEnhanced ingredients recipe:")
            enhanced_recipe = self.chef_transformer.generate_recipe(list(enhanced_ingredients))

            results[tuple(base_ingredients)] = {
                'similar_ingredients': similar_ingredients,
                'enhanced_ingredients': enhanced_ingredients,
                'original_recipe': original_recipe,
                'enhanced_recipe': enhanced_recipe
            }

            # Step 3: Performance Analysis
            self._analyze_performance(base_ingredients, results[tuple(base_ingredients)])

        return results

    def _analyze_performance(self, base_ingredients, result):
        """Analyze the performance of the hybrid system"""
        logger.info("\nStep 3: Performance Analysis")

        # Ingredient Enhancement Analysis
        n_original = len(base_ingredients)
        n_enhanced = len(result['enhanced_ingredients'])
        enhancement_ratio = n_enhanced / n_original

        logger.info(f"Ingredient Enhancement:")
        logger.info(f"- Original ingredients: {n_original}")
        logger.info(f"- Enhanced ingredients: {n_enhanced}")
        logger.info(f"- Enhancement ratio: {enhancement_ratio:.2f}")

        # Recipe Comparison
        if result['original_recipe'] and result['enhanced_recipe']:
            orig_steps = len(result['original_recipe']['directions'])
            enhanced_steps = len(result['enhanced_recipe']['directions'])

            logger.info("\nRecipe Comparison:")
            logger.info(f"- Original recipe steps: {orig_steps}")
            logger.info(f"- Enhanced recipe steps: {enhanced_steps}")

# CELL 6: Run Tests
def run_comprehensive_tests():
    # Test cases
    test_ingredients = [
        ["chicken", "rice", "garlic"],
        ["pasta", "tomato", "basil"],
        ["beef", "potato", "carrot"],
        ["salmon", "lemon", "dill"]
    ]

    # 1. Test FlavorGraph
    logger.info("=== FlavorGraph Tests ===")
    fg_tester = FlavorGraphTester()
    fg_tester.test_embedding_coverage()

    # 2. Test ChefTransformer
    logger.info("\n=== ChefTransformer Tests ===")
    ct_tester = ChefTransformerTester()
    ct_results = ct_tester.test_recipe_generation(test_ingredients)

    # 3. Test Hybrid System
    logger.info("\n=== Hybrid System Tests ===")
    hybrid_tester = HybridSystemTester()
    hybrid_results = hybrid_tester.test_hybrid_system(test_ingredients)

    return {
        'chef_transformer_results': ct_results,
        'hybrid_results': hybrid_results
    }

# Run all tests
results = run_comprehensive_tests()

Mounted at /content/drive
cp: cannot stat '/content/drive/MyDrive/FlavorGraph/FlavorGraph/output/kitchenette_embeddings.pkl': No such file or directory
cp: cannot stat '/content/drive/MyDrive/FlavorGraph/FlavorGraph/input/dict_ingr2cate - Top300+FDB400+HyperFoods104=616.csv': No such file or directory


ERROR:__main__:Error loading embeddings: [Errno 2] No such file or directory: '/content/FlavorGraph/output/kitchenette_embeddings.pkl'


FileNotFoundError: [Errno 2] No such file or directory: '/content/FlavorGraph/output/kitchenette_embeddings.pkl'