<a href="https://colab.research.google.com/github/darlon31/FlavorGraph/blob/HybridSystem/Full_Hybrid_System_Test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [76]:
# CELL 1: Setup and Dependencies
!pip install -q transformers torch networkx pandas numpy scikit-learn matplotlib seaborn tqdm gdown

import os
import pickle
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from transformers import AutoTokenizer, T5ForConditionalGeneration
from sklearn.metrics.pairwise import cosine_similarity
from tqdm.notebook import tqdm
import time
import logging

# Set up logging
logging.basicConfig(level=logging.INFO,
                   format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

In [77]:
# CELL 2: Mount Google Drive and Setup
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Make sure we're in the content directory and clean up
os.chdir('/content')
!rm -rf /content/FlavorGraph
logger.info("Environment prepared")

Mounted at /content/drive


In [78]:
# CELL 3: Clone Repository and Check Structure
# Clone the repository
!git clone https://github.com/darlon31/FlavorGraph.git
logger.info("FlavorGraph repository cloned successfully")

# Check the actual structure
print("\nChecking actual directory structure:")
!ls -R /content/FlavorGraph

# Change to the FlavorGraph directory
os.chdir('FlavorGraph')
logger.info(f"Current working directory: {os.getcwd()}")

Cloning into 'FlavorGraph'...
remote: Enumerating objects: 340, done.[K
remote: Counting objects: 100% (91/91), done.[K
remote: Compressing objects: 100% (57/57), done.[K
remote: Total 340 (delta 47), reused 57 (delta 33), pack-reused 249 (from 1)[K
Receiving objects: 100% (340/340), 20.85 MiB | 20.99 MiB/s, done.
Resolving deltas: 100% (195/195), done.

Checking actual directory structure:
/content/FlavorGraph:
images	input  LICENSE	output	README.md  src

/content/FlavorGraph/images:
embeddings.png	flavorgraph2vec.png  flavorgraph.png

/content/FlavorGraph/input:
'dict_ingr2cate - Top300+FDB400+HyperFoods104=616.csv'	 node_classification_hub.csv
 edges_191120.csv					 nodes_191120.csv

/content/FlavorGraph/output:
 kitchenette_embeddings.pkl  'place output files here'

/content/FlavorGraph/src:
dataloader.py  graph2vec.py  main.py  model.py	parser.py  plotter.py  utils.py  walkers.py


In [79]:
# CELL 4: Configuration and Path Verification
class Config:
    """Configuration class for the FlavorGraph Hybrid Recipe System"""

    def __init__(self):
        # Detect environment
        self.IN_COLAB = 'google.colab' in str(get_ipython())

        # Base paths
        if self.IN_COLAB:
            self.BASE_DIR = "/content/FlavorGraph"
        else:
            self.BASE_DIR = "c:/Users/dario/FlavorGraph"

        # Directory structure - using the correct paths as shown in the repository
        self.INPUT_DIR = os.path.join(self.BASE_DIR, "input")
        self.OUTPUT_DIR = os.path.join(self.BASE_DIR, "output")
        self.SRC_DIR = os.path.join(self.BASE_DIR, "src")

        # Data files with exact names from the repository
        self.NODES_FILE = os.path.join(self.INPUT_DIR, "nodes_191120.csv")
        self.EDGES_FILE = os.path.join(self.INPUT_DIR, "edges_191120.csv")
        self.CATEGORIES_FILE = os.path.join(self.INPUT_DIR, "dict_ingr2cate - Top300+FDB400+HyperFoods104=616.csv")
        self.NODE_CLASSIFICATION_FILE = os.path.join(self.INPUT_DIR, "node_classification_hub.csv")
        self.EMBEDDING_FILE = os.path.join(self.OUTPUT_DIR, "kitchenette_embeddings.pkl")

        # Model settings
        self.EMBEDDING_DIM = 300
        self.MOLECULAR_DIM = 881
        self.MODEL_NAME = "flax-community/t5-recipe-generation"
        self.MODEL_CACHE_DIR = os.path.join(self.BASE_DIR, "model_cache")

        # Generation settings
        self.GENERATION_CONFIG = {
            "max_length": 512,
            "min_length": 64,
            "no_repeat_ngram_size": 3,
            "do_sample": True,
            "top_k": 60,
            "top_p": 0.95,
            "temperature": 0.7
        }

        # Hybrid settings
        self.SIMILARITY_THRESHOLD = 0.7
        self.MAX_SIMILAR_INGREDIENTS = 3

    def verify_paths(self):
        """Verify all paths exist and log their status"""
        paths = {
            'BASE_DIR': self.BASE_DIR,
            'INPUT_DIR': self.INPUT_DIR,
            'OUTPUT_DIR': self.OUTPUT_DIR,
            'SRC_DIR': self.SRC_DIR,
            'NODES_FILE': self.NODES_FILE,
            'EDGES_FILE': self.EDGES_FILE,
            'CATEGORIES_FILE': self.CATEGORIES_FILE,
            'NODE_CLASSIFICATION_FILE': self.NODE_CLASSIFICATION_FILE,
            'EMBEDDING_FILE': self.EMBEDDING_FILE
        }

        all_exist = True
        logger.info("\nVerifying paths:")
        for name, path in paths.items():
            if os.path.exists(path):
                if os.path.isfile(path):
                    size = os.path.getsize(path)
                    logger.info(f"{name}: ✓ ({path}) - Size: {size/1024:.2f} KB")
                else:
                    logger.info(f"{name}: ✓ ({path}) - Directory")
            else:
                logger.warning(f"{name}: ✗ ({path}) - Not found")
                all_exist = False
        return all_exist

# Create and verify configuration
config = Config()
logger.info(f"Running in Colab: {config.IN_COLAB}")
logger.info(f"Current working directory: {os.getcwd()}")
logger.info(f"Base directory: {config.BASE_DIR}")

if not config.verify_paths():
    logger.warning("Some required paths are missing!")
else:
    logger.info("All paths verified successfully!")

# Print out all available files in input and output directories for verification
print("\nInput directory contents:")
!ls -l {config.INPUT_DIR}
print("\nOutput directory contents:")
!ls -l {config.OUTPUT_DIR}


Input directory contents:
total 5392
-rw-r--r-- 1 root root   16219 Nov 30 14:30 'dict_ingr2cate - Top300+FDB400+HyperFoods104=616.csv'
-rw-r--r-- 1 root root 5155973 Nov 30 14:30  edges_191120.csv
-rw-r--r-- 1 root root    1484 Nov 30 14:30  node_classification_hub.csv
-rw-r--r-- 1 root root  343416 Nov 30 14:30  nodes_191120.csv

Output directory contents:
total 8644
-rw-r--r-- 1 root root 8845756 Nov 30 14:30  kitchenette_embeddings.pkl
-rw-r--r-- 1 root root      25 Nov 30 14:30 'place output files here'


In [80]:
# CELL 5: Setup Hybrid System Structure
# Create hybrid_system directory
!mkdir -p /content/FlavorGraph/hybrid_system

# Create necessary Python files
hybrid_files = {
    'config.py': '''import os
import logging

logging.basicConfig(level=logging.INFO,
                   format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class Config:
    """Configuration class for the FlavorGraph Hybrid Recipe System"""

    def __init__(self):
        # Detect environment
        self.IN_COLAB = 'google.colab' in str(get_ipython())

        # Base paths
        if self.IN_COLAB:
            self.BASE_DIR = "/content/FlavorGraph"
        else:
            self.BASE_DIR = "c:/Users/dario/FlavorGraph"

        # Directory structure
        self.INPUT_DIR = os.path.join(self.BASE_DIR, "input")
        self.OUTPUT_DIR = os.path.join(self.BASE_DIR, "output")
        self.SRC_DIR = os.path.join(self.BASE_DIR, "src")

        # Data files
        self.NODES_FILE = os.path.join(self.INPUT_DIR, "nodes_191120.csv")
        self.EDGES_FILE = os.path.join(self.INPUT_DIR, "edges_191120.csv")
        self.CATEGORIES_FILE = os.path.join(self.INPUT_DIR, "dict_ingr2cate - Top300+FDB400+HyperFoods104=616.csv")
        self.NODE_CLASSIFICATION_FILE = os.path.join(self.INPUT_DIR, "node_classification_hub.csv")
        self.EMBEDDING_FILE = os.path.join(self.OUTPUT_DIR, "kitchenette_embeddings.pkl")

        # Model settings
        self.EMBEDDING_DIM = 300
        self.MOLECULAR_DIM = 881
        self.MODEL_NAME = "flax-community/t5-recipe-generation"
        self.MODEL_CACHE_DIR = os.path.join(self.BASE_DIR, "model_cache")

        # Generation settings
        self.GENERATION_CONFIG = {
            "max_length": 512,
            "min_length": 64,
            "no_repeat_ngram_size": 3,
            "do_sample": True,
            "top_k": 60,
            "top_p": 0.95,
            "temperature": 0.7
        }

        # Hybrid settings
        self.SIMILARITY_THRESHOLD = 0.7
        self.MAX_SIMILAR_INGREDIENTS = 3

    def verify_paths(self):
        """Verify all paths exist and log their status"""
        paths = {
            'BASE_DIR': self.BASE_DIR,
            'INPUT_DIR': self.INPUT_DIR,
            'OUTPUT_DIR': self.OUTPUT_DIR,
            'SRC_DIR': self.SRC_DIR,
            'NODES_FILE': self.NODES_FILE,
            'EDGES_FILE': self.EDGES_FILE,
            'CATEGORIES_FILE': self.CATEGORIES_FILE,
            'NODE_CLASSIFICATION_FILE': self.NODE_CLASSIFICATION_FILE,
            'EMBEDDING_FILE': self.EMBEDDING_FILE
        }

        all_exist = True
        logger.info("\\nVerifying paths:")
        for name, path in paths.items():
            if os.path.exists(path):
                if os.path.isfile(path):
                    size = os.path.getsize(path)
                    logger.info(f"{name}: ✓ ({path}) - Size: {size/1024:.2f} KB")
                else:
                    logger.info(f"{name}: ✓ ({path}) - Directory")
            else:
                logger.warning(f"{name}: ✗ ({path}) - Not found")
                all_exist = False
        return all_exist''',
    'ingredient_processor.py': '''from config import Config
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import pickle

class IngredientProcessor:
    def __init__(self, config):
        self.config = config
        self._load_data()

    def _load_data(self):
        """Load all necessary data files"""
        # Load ingredient nodes
        self.nodes_df = pd.read_csv(self.config.NODES_FILE)

        # Load categories
        self.categories_df = pd.read_csv(self.config.CATEGORIES_FILE)

        # Load embeddings
        with open(self.config.EMBEDDING_FILE, 'rb') as f:
            self.embeddings = pickle.load(f)
''',
    'recipe_generator.py': '''from transformers import AutoTokenizer, T5ForConditionalGeneration
from config import Config
import torch

class RecipeGenerator:
    def __init__(self, config):
        self.config = config
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self._load_model()

    def _load_model(self):
        """Load the T5 model and tokenizer"""
        self.tokenizer = AutoTokenizer.from_pretrained(self.config.MODEL_NAME)
        self.model = T5ForConditionalGeneration.from_pretrained(self.config.MODEL_NAME)
        self.model.to(self.device)
''',
    'test_config.py': '''import unittest
from config import Config
import os

class TestConfig(unittest.TestCase):
    def setUp(self):
        self.config = Config()

    def test_paths_exist(self):
        """Test that all required paths exist"""
        self.assertTrue(self.config.verify_paths())
''',
    'test_hybrid.py': '''import unittest
from config import Config
from ingredient_processor import IngredientProcessor
from recipe_generator import RecipeGenerator

class TestHybridSystem(unittest.TestCase):
    def setUp(self):
        self.config = Config()
        self.ingredient_processor = IngredientProcessor(self.config)
        self.recipe_generator = RecipeGenerator(self.config)
'''
}

# Write files to hybrid_system directory
for filename, content in hybrid_files.items():
    filepath = f'/content/FlavorGraph/hybrid_system/{filename}'
    with open(filepath, 'w') as f:
        f.write(content)

print("Created hybrid system files:")
!ls -l /content/FlavorGraph/hybrid_system/

Created hybrid system files:
total 20
-rw-r--r-- 1 root root 3024 Nov 30 14:31 config.py
-rw-r--r-- 1 root root  663 Nov 30 14:31 ingredient_processor.py
-rw-r--r-- 1 root root  585 Nov 30 14:31 recipe_generator.py
-rw-r--r-- 1 root root  284 Nov 30 14:31 test_config.py
-rw-r--r-- 1 root root  366 Nov 30 14:31 test_hybrid.py


In [81]:
# CELL 6: Comprehensive System Test
import importlib
from ingredient_processor import IngredientProcessor
from recipe_generator import RecipeGenerator

# Initialize components
config = Config()
ingredient_proc = IngredientProcessor(config)
recipe_gen = RecipeGenerator(config)

# Print data statistics
ingredient_proc.print_data_stats()

# Test ingredient substitutions
test_ingredients = [
    "chicken",
    "tomatoes",
    "onion",
    "garlic",
    "bell peppers",
    "carrots"
]

print("\n=== Testing Smart Substitutions ===")
for ingredient in test_ingredients:
    try:
        print(f"\nSubstitutes for {ingredient}:")
        substitutes = ingredient_proc.suggest_substitutions(ingredient)
        for sub in substitutes:
            print(f"- {sub['name']} (Category: {sub['category']}, Similarity: {sub['similarity']:.3f})")
    except ValueError as e:
        print(f"Cannot find substitutes for {ingredient}: {str(e)}")

# Test recipe generation with variations
print("\n=== Testing Recipe Generation with Variations ===")
base_recipe = ["chicken", "rice", "kale", "onion", "garlic"]
print("\nOriginal ingredients:", base_recipe)

variations = ingredient_proc.generate_recipe_variations(base_recipe, n_variations=2)
for i, variation in enumerate(variations, 1):
    print(f"\nVariation {i} ingredients:", variation)
    recipe = recipe_gen.generate_recipe(variation)
    print("Recipe:", recipe)


Data Statistics:
Total nodes in dataset: 8297
Valid ingredients with embeddings: 2552
Total categories: 17

Sample of valid ingredients:
['seasoning_salt' 'flaked_coconut' 'tapioca_flour' 'curry_leaf'
 'vanilla_vodka']

=== Testing Smart Substitutions ===

Substitutes for chicken:
- beef (Category: Meat/Animal Product, Similarity: 0.861)
- meat (Category: Meat/Animal Product, Similarity: 0.843)
- pork (Category: Meat/Animal Product, Similarity: 0.838)

Substitutes for tomatoes:
Cannot find substitutes for tomatoes: No embedding found for ingredient 'tomatoes'

Substitutes for onion:
- paprika (Category: Plant/Vegetable, Similarity: 0.876)
- basil (Category: Plant/Vegetable, Similarity: 0.865)
- pepper (Category: Spice, Similarity: 0.896)

Substitutes for garlic:
- beef_mince (Category: Unknown, Similarity: 0.499)
- boneless_chicken (Category: Unknown, Similarity: 0.490)
- brinjal (Category: Unknown, Similarity: 0.474)

Substitutes for bell peppers:
Cannot find substitutes for bell pep