In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from itertools import combinations
import seaborn as sns

In [6]:
# Load the CSV file
df = pd.read_csv("../data/all_drinks.csv")

# Filter for alcoholic drinks
df_alcoholic = df[df['strAlcoholic'] == 'Alcoholic'].copy()

# Extract ingredient columns (strIngredient1 to strIngredient15)
ingredient_cols = [f'strIngredient{i}' for i in range(1, 16)]
ingredients = df_alcoholic[ingredient_cols].values

# Define a mapping for standardizing ingredient names (synonyms and variations)
ingredient_mapping = {
    'creme de cacao': ['creme de cacao', 'dark creme de cacao', 'white creme de cacao'],
    'lemon juice': ['lemon juice', 'fresh lemon juice'],
    'cranberry juice': ['cranberry juice', 'cranberries'],
    'orange juice': ['orange juice', 'tropicana'],
    'sweet vermouth': ['sweet vermouth', 'vermouth'],
    'dry vermouth': ['dry vermouth'],
    'vodka': ['vodka', 'absolut vodka', 'absolut citron', 'absolut kurant', 'vanilla vodka', 'lemon vodka', 'cranberry vodka', 'peach vodka'],
    'rum': ['rum', 'light rum', 'dark rum', 'spiced rum', 'malibu rum', 'coconut rum', 'añejo rum', 'white rum'],
    'gin': ['gin'],
    'tequila': ['tequila', 'gold tequila'],
    'triple sec': ['triple sec', 'cointreau', 'orange curacao'],
    'amaretto': ['amaretto'],
    'kahlua': ['kahlua', 'coffee liqueur'],
    'bailey\'s irish cream': ['bailey\'s irish cream', 'irish cream'],
    'sour mix': ['sour mix', 'sweet and sour'],
    'soda water': ['soda water', 'club soda', 'carbonated water'],
    'pineapple juice': ['pineapple juice'],
    'grenadine': ['grenadine'],
    'lime juice': ['lime juice', 'fresh lime juice'],
    'grand marnier': ['grand marnier'],
    'peach schnapps': ['peach schnapps', 'peachtree schnapps'],
    'blue curacao': ['blue curacao'],
    'midori melon liqueur': ['midori melon liqueur', 'melon liqueur'],
    'ginger ale': ['ginger ale'],
    'coca-cola': ['coca-cola', 'pepsi cola'],
    'lemon': ['lemon', 'lemon peel'],
    'orange': ['orange', 'orange peel'],
    'cherry': ['cherry', 'maraschino cherry'],
    'coffee': ['coffee'],
    'brandy': ['brandy', 'apricot brandy', 'apple brandy', 'cherry brandy', 'coffee brandy'],
    'whiskey': ['whiskey', 'blended whiskey', 'bourbon', 'scotch', 'irish whiskey', 'jack daniels', 'jim beam', 'wild turkey', 'tennessee whiskey', 'johnnie walker', 'crown royal'],
}

# Invert mapping for easier lookup
lookup = {}
for standard, variants in ingredient_mapping.items():
    for variant in variants:
        lookup[variant.lower()] = standard

# Create a list of ingredients per drink, excluding empty or NaN values
drink_ingredients = []
for row in ingredients:
    valid_ingredients = [
        lookup.get(ing.strip().lower(), ing.strip().lower())
        for ing in row
        if pd.notnull(ing) and ing.strip() and ing.strip().lower() not in ['ice', 'sugar', 'water', 'salt', 'food coloring']
    ]
    # Remove duplicates within a drink while preserving order
    seen = set()
    unique_ingredients = [ing for ing in valid_ingredients if not (ing in seen or seen.add(ing))]
    drink_ingredients.append(unique_ingredients)

In [7]:
G = nx.Graph()

# Add edges between ingredients that appear in the same drink
for ingredients in drink_ingredients:
    # Create edges for all pairwise combinations of ingredients in the drink
    for ing1, ing2 in combinations(ingredients, 2):
        if G.has_edge(ing1, ing2):
            G[ing1][ing2]['weight'] += 1
        else:
            G.add_edge(ing1, ing2, weight=1)

In [8]:
output_file = "../data/nodes.csv"
# Create nodes DataFrame
nodes_data = {
    'Id': list(G.nodes()),
    'Label': list(G.nodes()),
}
nodes_df = pd.DataFrame(nodes_data)

# Save to CSV
nodes_df.to_csv(output_file, index=False)
print(f"Nodes exported to {output_file}")

Nodes exported to ../data/nodes.csv


In [9]:
output_file = "../data/edges.csv"

# Create edges DataFrame
edges_data = {
    'Source': [u for u, v in G.edges()],
    'Target': [v for u, v in G.edges()],
    'Weight': [G[u][v]['weight'] for u, v in G.edges()]
}
edges_df = pd.DataFrame(edges_data)

# Save to CSV
edges_df.to_csv(output_file, index=False)
print(f"Edges exported to {output_file}")

Edges exported to ../data/edges.csv
