In [6]:
import pandas as pd
import networkx as nx
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
from typing import List, Dict, Optional

# ============================================================================
# LOAD ALL DATA
# ============================================================================

print("üì• Loading FlavorGraph data...")

edges_df = pd.read_csv("https://raw.githubusercontent.com/lamypark/FlavorGraph/master/input/edges_191120.csv")
nodes_df = pd.read_csv("https://raw.githubusercontent.com/lamypark/FlavorGraph/master/input/nodes_191120.csv")

print(f"‚úÖ Loaded {len(nodes_df)} nodes and {len(edges_df)} edges")
print(f"\nEdge types:")
print(edges_df['edge_type'].value_counts())
print(f"\nNode types (sample):")
print(nodes_df['node_type'].value_counts())


# ============================================================================
# UNIVERSAL FLAVOR GRAPH CLASS
# ============================================================================

class UniversalFlavorGraph:
    """
    Complete food pairing system using chemical compounds and recipe data
    Works with ANY ingredient in FlavorGraph
    """
    
    def __init__(self, edges_df, nodes_df):
        self.edges_df = edges_df
        self.nodes_df = nodes_df
        
        # Create mappings
        self.node_id_to_name = dict(zip(nodes_df['node_id'], nodes_df['name']))
        self.node_name_to_id = {str(v).lower(): k for k, v in self.node_id_to_name.items()}
        
        # Store node types
        self.node_types = dict(zip(nodes_df['node_id'], nodes_df['node_type']))
        
        # Identify different node types
        self.ingredient_ids = set()
        self.compound_ids = set()
        self.drug_ids = set()
        
        # Build all graphs
        self.build_graphs()
        
    def build_graphs(self):
        """Build separate graphs for different relationship types"""
        
        print("\nüî® Building knowledge graphs...")
        
        # 1. Ingredient-Ingredient graph (recipe co-occurrence)
        self.ing_graph = nx.Graph()
        ing_edges = self.edges_df[self.edges_df['edge_type'] == 'ingr-ingr']
        for _, row in ing_edges.iterrows():
            self.ing_graph.add_edge(row['id_1'], row['id_2'], 
                                   weight=row['score'])
            self.ingredient_ids.add(row['id_1'])
            self.ingredient_ids.add(row['id_2'])
        
        # 2. Ingredient-Compound bipartite graph
        self.compound_graph = nx.Graph()
        comp_edges = self.edges_df[self.edges_df['edge_type'] == 'ingr-fcomp']
        for _, row in comp_edges.iterrows():
            self.compound_graph.add_edge(row['id_1'], row['id_2'], 
                                        weight=row.get('score', 1.0))
            self.ingredient_ids.add(row['id_1'])
            self.compound_ids.add(row['id_2'])
        
        # 3. Ingredient-Drug graph (optional for health benefits)
        self.drug_graph = nx.Graph()
        drug_edges = self.edges_df[self.edges_df['edge_type'] == 'ingr-dcomp']
        for _, row in drug_edges.iterrows():
            self.drug_graph.add_edge(row['id_1'], row['id_2'], 
                                    weight=row.get('score', 1.0))
            self.ingredient_ids.add(row['id_1'])
            self.drug_ids.add(row['id_2'])
        
        print(f"‚úÖ Built graphs:")
        print(f"   ‚Ä¢ {len(self.ingredient_ids)} food ingredients")
        print(f"   ‚Ä¢ {len(self.compound_ids)} flavor compounds")
        print(f"   ‚Ä¢ {len(self.drug_ids)} drug compounds")
        print(f"   ‚Ä¢ {self.ing_graph.number_of_edges()} ingredient-ingredient edges")
        print(f"   ‚Ä¢ {self.compound_graph.number_of_edges()} ingredient-compound edges")
        print(f"   ‚Ä¢ {self.drug_graph.number_of_edges()} ingredient-drug edges")
    
    def search_ingredient(self, query: str) -> List[Dict]:
        """Search for ingredients by name"""
        query = query.lower()
        matches = []
        
        for node_id in self.ingredient_ids:
            name = self.node_id_to_name.get(node_id, '')
            if query in str(name).lower():
                matches.append({
                    'id': node_id,
                    'name': name,
                    'num_compounds': len(self.get_compounds_for_ingredient(name)),
                    'num_recipes': self.ing_graph.degree(node_id) if node_id in self.ing_graph else 0
                })
        
        return sorted(matches, key=lambda x: x['num_compounds'], reverse=True)
    
    def get_ingredient_id(self, ingredient_name: str) -> Optional[int]:
        """Get ingredient ID from name (fuzzy match)"""
        ingredient_name = str(ingredient_name).lower()
        
        # Exact match
        if ingredient_name in self.node_name_to_id:
            return self.node_name_to_id[ingredient_name]
        
        # Partial match
        for name, nid in self.node_name_to_id.items():
            if ingredient_name in name and nid in self.ingredient_ids:
                return nid
        
        return None
    
    def get_compounds_for_ingredient(self, ingredient_name: str) -> List[Dict]:
        """Get all flavor compounds for an ingredient"""
        ingredient_id = self.get_ingredient_id(ingredient_name)
        
        if not ingredient_id or ingredient_id not in self.compound_graph:
            return []
        
        compounds = []
        for neighbor in self.compound_graph.neighbors(ingredient_id):
            if neighbor in self.compound_ids:
                compounds.append({
                    'id': neighbor,
                    'name': self.node_id_to_name.get(neighbor, f"Compound_{neighbor}"),
                    'weight': self.compound_graph[ingredient_id][neighbor]['weight']
                })
        
        return sorted(compounds, key=lambda x: x['weight'], reverse=True)
    
    def get_ingredients_with_compound(self, compound_id: int) -> List[Dict]:
        """Get all ingredients that contain a specific compound"""
        if compound_id not in self.compound_graph:
            return []
        
        ingredients = []
        for neighbor in self.compound_graph.neighbors(compound_id):
            if neighbor in self.ingredient_ids:
                ingredients.append({
                    'id': neighbor,
                    'name': self.node_id_to_name.get(neighbor, f"Ingredient_{neighbor}"),
                    'weight': self.compound_graph[neighbor][compound_id]['weight']
                })
        
        return ingredients
    
    def find_shared_compounds(self, ingredient1: str, ingredient2: str) -> List[Dict]:
        """Find compounds shared between two ingredients"""
        compounds1 = {c['id']: c for c in self.get_compounds_for_ingredient(ingredient1)}
        compounds2 = {c['id']: c for c in self.get_compounds_for_ingredient(ingredient2)}
        
        shared_ids = set(compounds1.keys()).intersection(set(compounds2.keys()))
        
        return [
            {
                'id': cid,
                'name': self.node_id_to_name.get(cid, f"Compound_{cid}"),
                'in_ing1': compounds1[cid]['weight'],
                'in_ing2': compounds2[cid]['weight']
            }
            for cid in shared_ids
        ]
    
    def compound_similarity(self, ingredient1: str, ingredient2: str) -> float:
        """Calculate Jaccard similarity based on shared compounds"""
        compounds1 = set(c['id'] for c in self.get_compounds_for_ingredient(ingredient1))
        compounds2 = set(c['id'] for c in self.get_compounds_for_ingredient(ingredient2))
        
        if not compounds1 or not compounds2:
            return 0.0
        
        intersection = len(compounds1.intersection(compounds2))
        union = len(compounds1.union(compounds2))
        
        return intersection / union if union > 0 else 0.0
    
    def recipe_cooccurrence_score(self, ingredient1: str, ingredient2: str) -> float:
        """Get recipe co-occurrence score"""
        id1 = self.get_ingredient_id(ingredient1)
        id2 = self.get_ingredient_id(ingredient2)
        
        if not id1 or not id2:
            return 0.0
        
        if self.ing_graph.has_edge(id1, id2):
            return self.ing_graph[id1][id2]['weight']
        
        return 0.0
    
    def recommend_pairings(self, 
                          base_ingredient: str, 
                          method: str = 'hybrid',
                          top_n: int = 10,
                          min_shared_compounds: int = 1,
                          category_filter: Optional[List[str]] = None) -> List[Dict]:
        """
        Recommend ingredient pairings
        
        Methods:
        - 'chemical': Based purely on chemical compound similarity
        - 'recipe': Based purely on recipe co-occurrence
        - 'hybrid': Combines both (default)
        """
        
        base_id = self.get_ingredient_id(base_ingredient)
        if not base_id:
            return []
        
        base_compounds = self.get_compounds_for_ingredient(base_ingredient)
        
        if not base_compounds and method in ['chemical', 'hybrid']:
            print(f"‚ö†Ô∏è  No compound data for '{base_ingredient}', falling back to recipe method")
            method = 'recipe'
        
        candidates = defaultdict(lambda: {
            'shared_compounds': [],
            'compound_similarity': 0.0,
            'recipe_score': 0.0,
            'final_score': 0.0
        })
        
        # Method 1: Chemical similarity
        if method in ['chemical', 'hybrid']:
            for compound in base_compounds:
                ingredients_with_compound = self.get_ingredients_with_compound(compound['id'])
                
                for ing in ingredients_with_compound:
                    if ing['id'] != base_id:
                        ing_name = ing['name']
                        candidates[ing_name]['shared_compounds'].append(compound['name'])
        
        # Method 2: Recipe co-occurrence
        if method in ['recipe', 'hybrid']:
            if base_id in self.ing_graph:
                for neighbor in self.ing_graph.neighbors(base_id):
                    ing_name = self.node_id_to_name.get(neighbor)
                    if ing_name:
                        candidates[ing_name]['recipe_score'] = self.ing_graph[base_id][neighbor]['weight']
        
        # Calculate final scores
        recommendations = []
        
        for ing_name, data in candidates.items():
            # Filter by minimum shared compounds
            if len(data['shared_compounds']) < min_shared_compounds:
                continue
            
            # Calculate compound similarity
            if method in ['chemical', 'hybrid']:
                data['compound_similarity'] = self.compound_similarity(base_ingredient, ing_name)
            
            # Combined score
            if method == 'chemical':
                data['final_score'] = data['compound_similarity']
            elif method == 'recipe':
                data['final_score'] = data['recipe_score']
            else:  # hybrid
                data['final_score'] = (0.6 * data['compound_similarity']) + (0.4 * data['recipe_score'])
            
            recommendations.append({
                'ingredient': ing_name,
                'compound_similarity': data['compound_similarity'],
                'recipe_score': data['recipe_score'],
                'final_score': data['final_score'],
                'shared_compounds': data['shared_compounds'][:5],
                'num_shared_compounds': len(data['shared_compounds'])
            })
        
        # Sort by final score
        recommendations.sort(key=lambda x: x['final_score'], reverse=True)
        
        return recommendations[:top_n]
    
    def explain_pairing(self, ingredient1: str, ingredient2: str):
        """Explain why two ingredients pair well"""
        print(f"\n{'='*70}")
        print(f"üîç Analyzing pairing: {ingredient1.upper()} + {ingredient2.upper()}")
        print(f"{'='*70}")
        
        # Compound similarity
        shared = self.find_shared_compounds(ingredient1, ingredient2)
        comp_sim = self.compound_similarity(ingredient1, ingredient2)
        
        print(f"\nüìä Chemical Analysis:")
        print(f"   Compound similarity: {comp_sim:.3f}")
        print(f"   Shared compounds: {len(shared)}")
        
        if shared:
            print(f"\nüß™ Top shared flavor compounds:")
            for i, comp in enumerate(shared[:10], 1):
                print(f"   {i}. {comp['name']}")
        
        # Recipe co-occurrence
        recipe_score = self.recipe_cooccurrence_score(ingredient1, ingredient2)
        print(f"\nüìñ Recipe Analysis:")
        print(f"   Co-occurrence score: {recipe_score:.3f}")
        
        if recipe_score > 0:
            print(f"   ‚úÖ These ingredients ARE used together in recipes")
        else:
            print(f"   ‚ö†Ô∏è  These ingredients are NOT commonly used together")
            print(f"   üí° This could be a NOVEL pairing opportunity!")
        
        # Overall verdict
        print(f"\nüéØ Verdict:")
        if comp_sim > 0.3 or recipe_score > 0.5:
            print(f"   ‚úÖ STRONG PAIRING - Good chemical and/or recipe support")
        elif comp_sim > 0.1 or recipe_score > 0.2:
            print(f"   ‚ö†Ô∏è  MODERATE PAIRING - Some support, worth experimenting")
        else:
            print(f"   ‚ùå WEAK PAIRING - Little support, likely incompatible")
        
        print(f"{'='*70}\n")
    
    def get_ingredient_profile(self, ingredient_name: str):
        """Get complete profile of an ingredient"""
        ing_id = self.get_ingredient_id(ingredient_name)
        if not ing_id:
            print(f"‚ùå Ingredient '{ingredient_name}' not found")
            return
        
        compounds = self.get_compounds_for_ingredient(ingredient_name)
        
        # Recipe connections
        recipe_connections = 0
        if ing_id in self.ing_graph:
            recipe_connections = self.ing_graph.degree(ing_id)
        
        print(f"\n{'='*70}")
        print(f"üìã Profile: {ingredient_name.upper()}")
        print(f"{'='*70}")
        print(f"   ID: {ing_id}")
        print(f"   Flavor compounds: {len(compounds)}")
        print(f"   Recipe connections: {recipe_connections}")
        
        if compounds:
            print(f"\nüß™ Top 10 flavor compounds:")
            for i, comp in enumerate(compounds[:10], 1):
                print(f"   {i}. {comp['name']} (weight: {comp['weight']:.3f})")
        
        print(f"{'='*70}\n")


# ============================================================================
# INITIALIZE THE GRAPH
# ============================================================================

print("\nüöÄ Initializing Universal Flavor Graph...")
ufg = UniversalFlavorGraph(edges_df, nodes_df)
print("‚úÖ Ready!\n")


# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

def display_recommendations(recommendations: List[Dict], show_compounds: bool = True):
    """Pretty print recommendations"""
    
    if not recommendations:
        print("‚ùå No recommendations found.")
        return
    
    print(f"\n{'Rank':<6} {'Ingredient':<35} {'Score':<8} {'Chem':<8} {'Recipe':<8} {'Shared':<8}")
    print("-" * 110)
    
    for i, rec in enumerate(recommendations, 1):
        print(f"{i:<6} {rec['ingredient']:<35} {rec['final_score']:.3f}    "
              f"{rec['compound_similarity']:.3f}    {rec['recipe_score']:.3f}    "
              f"{rec['num_shared_compounds']:<8}")
        
        if show_compounds and rec['shared_compounds']:
            compounds_str = ', '.join(rec['shared_compounds'][:3])
            if len(rec['shared_compounds']) > 3:
                compounds_str += f" ... (+{len(rec['shared_compounds'])-3} more)"
            print(f"       üß™ {compounds_str}")
            print()


# ============================================================================
# EXAMPLE USAGE
# ============================================================================

print("\n" + "üéØ"*35)
print("EXAMPLE 1: Find pairings for CHOCOLATE")
print("üéØ"*35)

results = ufg.recommend_pairings("chocolate", method='hybrid', top_n=15)
display_recommendations(results)


print("\n" + "üéØ"*35)
print("EXAMPLE 2: Find pairings for TOMATO (chemical only)")
print("üéØ"*35)

results = ufg.recommend_pairings("tomato", method='chemical', top_n=15)
display_recommendations(results)


print("\n" + "üéØ"*35)
print("EXAMPLE 3: Why do STRAWBERRY and BASIL work?")
print("üéØ"*35)

ufg.explain_pairing("strawberry", "basil")


print("\n" + "üéØ"*35)
print("EXAMPLE 4: Search for ingredients")
print("üéØ"*35)

print("\nüîç Searching for 'beef'...")
results = ufg.search_ingredient("beef")
for r in results[:5]:
    print(f"   ‚Ä¢ {r['name']} (compounds: {r['num_compounds']}, recipes: {r['num_recipes']})")


print("\n" + "üéØ"*35)
print("EXAMPLE 5: Get ingredient profile")
print("üéØ"*35)

ufg.get_ingredient_profile("vanilla")


print("\n" + "üéØ"*35)
print("EXAMPLE 6: Novel pairing discovery")
print("üéØ"*35)

print("\nüí° Finding ingredients with similar compounds to COFFEE but NOT used in recipes:")
results = ufg.recommend_pairings("coffee", method='chemical', top_n=20)

# Filter for novel (low recipe score)
novel = [r for r in results if r['recipe_score'] < 0.1]
print(f"\nFound {len(novel)} potential NOVEL pairings:")
display_recommendations(novel[:10])

üì• Loading FlavorGraph data...
‚úÖ Loaded 8298 nodes and 147179 edges

Edge types:
edge_type
ingr-ingr     111355
ingr-fcomp     35440
ingr-dcomp       384
Name: count, dtype: int64

Node types (sample):
node_type
ingredient    6653
compound      1645
Name: count, dtype: int64

üöÄ Initializing Universal Flavor Graph...

üî® Building knowledge graphs...
‚úÖ Built graphs:
   ‚Ä¢ 6653 food ingredients
   ‚Ä¢ 1561 flavor compounds
   ‚Ä¢ 84 drug compounds
   ‚Ä¢ 111355 ingredient-ingredient edges
   ‚Ä¢ 35440 ingredient-compound edges
   ‚Ä¢ 384 ingredient-drug edges
‚úÖ Ready!


üéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØ
EXAMPLE 1: Find pairings for CHOCOLATE
üéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØüéØ

Rank   Ingredient                          Score    Chem     Recipe   Shared  
-----------

In [None]:
def interactive_demo():
    """Interactive command-line demo"""
    
    print("\n" + "="*70)
    print("üç≥ UNIVERSAL FLAVOR PAIRING AI")
    print("="*70)
    
    while True:
        print("\nüéØ What would you like to do?")
        print("   1. Find pairings for an ingredient")
        print("   2. Explain why two ingredients pair")
        print("   3. Get ingredient profile")
        print("   4. Search for ingredients")
        print("   5. Discover novel pairings")
        print("   6. Exit")
        
        choice = input("\nEnter choice (1-6): ").strip()
        
        if choice == '1':
            ingredient = input("Enter ingredient: ").strip()
            method = input("Method (chemical/recipe/hybrid) [hybrid]: ").strip() or 'hybrid'
            top_n = int(input("Number of results [10]: ").strip() or 10)
            
            print(f"\nüîç Finding pairings for {ingredient}...")
            results = ufg.recommend_pairings(ingredient, method=method, top_n=top_n)
            display_recommendations(results)
        
        elif choice == '2':
            ing1 = input("First ingredient: ").strip()
            ing2 = input("Second ingredient: ").strip()
            ufg.explain_pairing(ing1, ing2)
        
        elif choice == '3':
            ingredient = input("Enter ingredient: ").strip()
            ufg.get_ingredient_profile(ingredient)
        
        elif choice == '4':
            query = input("Search query: ").strip()
            results = ufg.search_ingredient(query)
            print(f"\nüìã Found {len(results)} matches:")
            for r in results[:15]:
                print(f"   ‚Ä¢ {r['name']}")
        
        elif choice == '5':
            ingredient = input("Base ingredient: ").strip()
            print(f"\nüí° Finding novel pairings for {ingredient}...")
            results = ufg.recommend_pairings(ingredient, method='chemical', top_n=30)
            novel = [r for r in results if r['recipe_score'] < 0.1 and r['compound_similarity'] > 0.15]
            print(f"\nFound {len(novel)} novel pairings (high chemistry, low recipe usage):")
            display_recommendations(novel[:10])
        
        elif choice == '6':
            print("\nüëã Goodbye!")
            break
        
        else:
            print("‚ùå Invalid choice")

# Run interactive demo
interactive_demo()


üç≥ UNIVERSAL FLAVOR PAIRING AI

üéØ What would you like to do?
   1. Find pairings for an ingredient
   2. Explain why two ingredients pair
   3. Get ingredient profile
   4. Search for ingredients
   5. Discover novel pairings
   6. Exit
