In [None]:
import pandas as pd
df = pd.read_csv('data_cocktails_cleaned.csv')

In [None]:
df

Unnamed: 0.1,Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,Alc_type,Basic_taste,strInstructions,strMeasures,Value_ml,Value_gr,Garnish_amount,Garnish_type
0,0,'57 Chevy with a White License Plate,Cocktail,Highball Glass,Creme De Cacao White,Creamy Liqueur,,1. Fill a rocks glass with ice 2.add white cre...,1 oz white,30.0,,,
1,1,'57 Chevy with a White License Plate,Cocktail,Highball Glass,Vodka,Vodka,,1. Fill a rocks glass with ice 2.add white cre...,1 oz,30.0,,,
2,2,1-900-FUK-MEUP,Shot,Old-fashioned glass,Grand Marnier,Triple Sec,,Shake ingredients in a mixing tin filled with ...,1/4 oz,7.5,,,
3,3,1-900-FUK-MEUP,Shot,Old-fashioned glass,Midori Melon Liqueur,Sweet Liqueur,,Shake ingredients in a mixing tin filled with ...,1/4 oz,7.5,,,
4,4,1-900-FUK-MEUP,Shot,Old-fashioned glass,Malibu Rum,Rum,,Shake ingredients in a mixing tin filled with ...,1/4 oz,7.5,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1776,1776,Zorbatini,Cocktail,Cocktail Glass,Vodka,Vodka,,Prepare like a Martini. Garnish with a green o...,1 1/4 oz stoli,37.5,,,
1777,1777,Zorbatini,Cocktail,Cocktail Glass,Ouzo,Ouzo,,Prepare like a Martini. Garnish with a green o...,1/4 oz,7.5,,,
1778,1778,Zorro,Coffee / Tea,Coffee Mug,Sambuca,Sambuca,,add all and pour black coffee and add whipped ...,2 cl,20.0,,,
1779,1779,Zorro,Coffee / Tea,Coffee Mug,Bailey'S Irish Cream,Creamy Liqueur,,add all and pour black coffee and add whipped ...,2 cl,20.0,,,


In [None]:
ingredients_type = {}
for ingredient in df['strIngredients'].unique():
    # aggregate unique Alc_type
    alc_type = df[df['strIngredients'] == ingredient]['Alc_type'].unique()
    # there should only be 1 Alc_type for each ingredient
    assert len(alc_type) == 1
    # check if Alc_type is a valid string
    if type(alc_type[0]) == str:
        ingredients_type[ingredient] = "spirit"
    else: # not a spirit if Alc_type is nan
        # aggregate unique Garnish_type
        garnish_type = df[df['strIngredients'] == ingredient]['Garnish_type'].unique()
        # count as mixer if it's "top up" (e.g. coca cola)
        if "top up" in garnish_type:
            ingredients_type[ingredient] = "mixer"
        # check if Garnish_type is nan
        elif len(garnish_type) == 1 and type(garnish_type[0]) != str:
            ingredients_type[ingredient] = "mixer"
        else:
            ingredients_type[ingredient] = "garnish"

In [None]:
ingredients_type

{'Creme De Cacao White': 'spirit',
 'Vodka': 'spirit',
 'Grand Marnier': 'spirit',
 'Midori Melon Liqueur': 'spirit',
 'Malibu Rum': 'spirit',
 'Amaretto': 'spirit',
 'Cranberry Juice': 'mixer',
 'Pineapple Juice': 'mixer',
 'Chambord Raspberry Liqueur': 'spirit',
 'Absolut Kurant': 'spirit',
 'Tequila': 'spirit',
 'Lager': 'spirit',
 'Dark Creme De Cacao': 'spirit',
 'Coconut Liqueur': 'spirit',
 'Light Rum': 'spirit',
 'Milk': 'mixer',
 'Cointreau': 'spirit',
 'Vanilla Ice-Cream': 'mixer',
 '151 Proof Rum': 'spirit',
 'Orange Juice': 'mixer',
 'Dark Rum': 'spirit',
 'Jägermeister': 'spirit',
 'Goldschlager': 'spirit',
 'Rumple Minze': 'spirit',
 'Wild Turkey': 'spirit',
 'Jim Beam': 'spirit',
 'Johnnie Walker': 'spirit',
 'Jack Daniels': 'spirit',
 'Coca-Cola': 'mixer',
 'Sweet And Sour': 'mixer',
 'Triple Sec': 'spirit',
 'Bitters': 'spirit',
 'Lemon': 'garnish',
 'Gin': 'spirit',
 'Peach Vodka': 'spirit',
 'Vanilla Vodka': 'spirit',
 'Sour Mix': 'mixer',
 'Blueberry Schnapps': 'spi

In [None]:
from itertools import combinations
from collections import defaultdict

# Process the data to create ingredient pairs
ingredient_pairs = defaultdict(int)
ingredient_pairs_cocktails = defaultdict(list)

for drink in df['strDrink'].unique():
    ingredients = df[df['strDrink'] == drink]['strIngredients'].tolist()
    ingredients.sort()
    for pair in combinations(ingredients, 2):
        ingredient_pairs[pair] += 1
        ingredient_pairs_cocktails[pair].append(drink)

# Filter out pairs with a score less than or equal to 5
filtered_pairs = {pair: count for pair, count in ingredient_pairs.items() if count > 5}

# Recalculate the IDs for the filtered ingredients
filtered_ingredients = defaultdict(int)
for ing1, ing2 in filtered_pairs.keys():
    filtered_ingredients[ing1] += 1
    filtered_ingredients[ing2] += 1

filtered_ingredient_ids = {ingredient: idx for idx, ingredient in enumerate(filtered_ingredients)}

# Create a DataFrame for the network data
network_data = []
for pair, count in filtered_pairs.items():
    network_data.append({
        # "name1": pair[0],
        # "name2": pair[1],
        "from": filtered_ingredient_ids[pair[0]],
        "to": filtered_ingredient_ids[pair[1]],
        "value": count,
        "cocktails": list(set(ingredient_pairs_cocktails[pair]))
    })


In [None]:
ingredients_data = []
for idx, (ingredient, count) in enumerate(filtered_ingredients.items()):
    ingredients_data.append({
        'id':idx, 
        'value': count, 
        'label': ingredient, 
        'group': ingredients_type[ingredient]
    })


In [None]:
ingredients_data

[{'id': 0, 'value': 3, 'label': 'Orange Juice', 'group': 'mixer'},
 {'id': 1, 'value': 9, 'label': 'Vodka', 'group': 'spirit'},
 {'id': 2, 'value': 1, 'label': 'Bitters', 'group': 'spirit'},
 {'id': 3, 'value': 16, 'label': 'Gin', 'group': 'spirit'},
 {'id': 4, 'value': 11, 'label': 'Lemon', 'group': 'garnish'},
 {'id': 5, 'value': 6, 'label': 'Triple Sec', 'group': 'spirit'},
 {'id': 6, 'value': 5, 'label': 'Light Rum', 'group': 'spirit'},
 {'id': 7, 'value': 2, 'label': 'Tequila', 'group': 'spirit'},
 {'id': 8, 'value': 1, 'label': "Bailey'S Irish Cream", 'group': 'spirit'},
 {'id': 9, 'value': 1, 'label': 'Kahlua', 'group': 'spirit'},
 {'id': 10, 'value': 3, 'label': 'Grenadine', 'group': 'mixer'},
 {'id': 11, 'value': 1, 'label': 'Cranberry Juice', 'group': 'mixer'},
 {'id': 12, 'value': 1, 'label': 'Peach Schnapps', 'group': 'spirit'},
 {'id': 13, 'value': 8, 'label': 'Lemon Juice', 'group': 'mixer'},
 {'id': 14, 'value': 3, 'label': 'Cherry', 'group': 'garnish'},
 {'id': 15, 'val

In [None]:
network_data

[{'from': 0,
  'to': 1,
  'value': 17,
  'cocktails': ['Addison Special',
   'Screwdriver',
   'Orange Whip',
   'Bubble Gum',
   'Arctic Fish',
   'Harvey Wallbanger',
   'Irish Curdling Cow',
   'After sex',
   'Blue Mountain',
   'Arizona Twister',
   'Popped cherry',
   'San Francisco',
   'Orange Crush',
   'Pink Penocha',
   'Apricot punch',
   "A Gilligan's Island",
   '155 Belmont']},
 {'from': 2,
  'to': 3,
  'value': 8,
  'cocktails': ['Pink Gin',
   'Turf Cocktail',
   'Boomerang',
   'Artillery',
   'Bluebird',
   'Gin Swizzle',
   '3-Mile Long Island Iced Tea',
   'Dubonnet Cocktail']},
 {'from': 3,
  'to': 4,
  'value': 8,
  'cocktails': ['Clover Club',
   'Long Island Tea',
   'Gin Rickey',
   'Gin Fizz',
   'Lady Love Fizz',
   'Royal Gin Fizz',
   'Gin Sling',
   '3-Mile Long Island Iced Tea']},
 {'from': 3,
  'to': 5,
  'value': 11,
  'cocktails': ['Grass Skirt',
   'White Lady',
   'Boxcar',
   'Cherry Electric Lemonade',
   'Bluebird',
   'Martinez Cocktail',
   'Fl

In [None]:
import json
with open("nodes.json", "w") as json_file:
    json.dump(ingredients_data, json_file, indent=4)
with open("edges.json", "w") as json_file:
    json.dump(network_data, json_file, indent=4)

# Previous version

In [None]:
from itertools import combinations
from collections import defaultdict

# Process the data to create ingredient pairs
ingredient_pairs = defaultdict(int)
ingredient_pairs_cocktails = defaultdict(list)

for drink in df['strDrink'].unique():
    ingredients = df[df['strDrink'] == drink]['strIngredients'].tolist()
    try:
        ingredients.remove('Sugar')
    except Exception:
        pass
    ingredients.sort()
    for pair in combinations(ingredients, 2):
        ingredient_pairs[pair] += 1
        ingredient_pairs_cocktails[pair].append(drink)

# Filter out pairs with a score less than or equal to 5
filtered_pairs = {pair: count for pair, count in ingredient_pairs.items() if count > 5}

# Recalculate the IDs for the filtered ingredients
filtered_ingredients = defaultdict(int)
for ing1, ing2 in filtered_pairs.keys():
    filtered_ingredients[ing1] += 1
    filtered_ingredients[ing2] += 1

filtered_ingredient_ids = {ingredient: idx for idx, ingredient in enumerate(filtered_ingredients)}

# # Assign unique IDs to each ingredient
# unique_ingredients = set(df['strIngredients'])
# ingredient_ids = {ingredient: idx for idx, ingredient in enumerate(unique_ingredients)}

# Create a DataFrame for the network data
network_data = []
for pair, count in filtered_pairs.items():
    network_data.append({
        "name1": pair[0],
        "name2": pair[1],
        "score": count,
        "source": filtered_ingredient_ids[pair[0]],
        "target": filtered_ingredient_ids[pair[1]],
        "cocktails": ingredient_pairs_cocktails[pair]
    })

network_df = pd.DataFrame(network_data)

# Displaying the first few rows of the network data
network_df.sort_values(by='score', ascending=False)


Unnamed: 0,name1,name2,score,source,target,cocktails
0,Orange Juice,Vodka,17,0,1,"[155 Belmont, A Gilligan's Island, Addison Spe..."
16,Gin,Lemon Juice,15,3,13,"[A1, Adam & Eve, Arthur Tompkins, Aviation, Bo..."
15,Gin,Grenadine,15,3,10,"[A1, Ace, Boxcar, Clover Club, English Rose Co..."
10,Bailey'S Irish Cream,Kahlua,14,8,9,"[747, After Five, Afternoon, B-52, Baby Guinne..."
26,Dry Vermouth,Gin,12,21,3,"[Adios Amigos Cocktail, Allies Cocktail, Bermu..."
12,Cranberry Juice,Vodka,11,11,1,"[A Gilligan's Island, Absolut Stress #2, Absol..."
3,Gin,Triple Sec,11,3,5,"[3-Mile Long Island Iced Tea, Bluebird, Boxcar..."
36,Blended Whiskey,Lemon,11,29,4,"[Boston Sour, Boston Sour, California Lemonade..."
37,Cherry,Lemon,11,14,4,"[Boston Sour, Boston Sour, Brandy Sour, Brandy..."
28,Gin,Lemon Peel,10,3,22,"[Alaska Cocktail, Arthur Tompkins, Bermuda Hig..."


In [None]:
network_df.to_csv('ingredient_relationships.csv', index=False)

In [None]:
filtered_ingredients

defaultdict(int,
            {'Orange Juice': 3,
             'Vodka': 9,
             'Bitters': 1,
             'Gin': 15,
             'Lemon': 10,
             'Triple Sec': 6,
             'Light Rum': 4,
             'Tequila': 2,
             "Bailey'S Irish Cream": 1,
             'Kahlua': 1,
             'Grenadine': 3,
             'Cranberry Juice': 1,
             'Peach Schnapps': 1,
             'Lemon Juice': 7,
             'Cherry': 2,
             'Orange': 3,
             'Orange Bitters': 1,
             'Sweet Vermouth': 1,
             'Lime Juice': 2,
             'Rum': 1,
             'Lemonade': 1,
             'Dry Vermouth': 1,
             'Lemon Peel': 2,
             'Light Cream': 1,
             'Nutmeg': 1,
             'Pineapple Juice': 1,
             'Maraschino Liqueur': 1,
             'Carbonated Water': 2,
             'Maraschino Cherry': 3,
             'Blended Whiskey': 1,
             'Egg White': 1,
             'Water': 1,
             

In [None]:
ingredients_data = {'nameDisplay': filtered_ingredients.keys(), 'name': filtered_ingredients.keys(), 'count': filtered_ingredients.values()}
ingredients_df = pd.DataFrame.from_dict(ingredients_data)

In [None]:
ingredients_df.to_csv('network_ingredients.csv', index=False)

In [None]:
ingredients_df

Unnamed: 0,nameDisplay,name,count
0,Orange Juice,Orange Juice,3
1,Vodka,Vodka,9
2,Bitters,Bitters,1
3,Gin,Gin,15
4,Lemon,Lemon,10
5,Triple Sec,Triple Sec,6
6,Light Rum,Light Rum,4
7,Tequila,Tequila,2
8,Bailey'S Irish Cream,Bailey'S Irish Cream,1
9,Kahlua,Kahlua,1


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=08e32eae-680f-4766-bfea-eabbd1bdf534' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>