In [None]:
import pandas as pd
df = pd.read_csv('data_cocktails_cleaned.csv')

In [None]:
df

Unnamed: 0.1,Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,Alc_type,Basic_taste,strInstructions,strMeasures,Value_ml,Value_gr,Garnish_amount,Garnish_type
0,0,'57 Chevy with a White License Plate,Cocktail,Highball Glass,Creme De Cacao White,Creamy Liqueur,,1. Fill a rocks glass with ice 2.add white cre...,1 oz white,30.0,,,
1,1,'57 Chevy with a White License Plate,Cocktail,Highball Glass,Vodka,Vodka,,1. Fill a rocks glass with ice 2.add white cre...,1 oz,30.0,,,
2,2,1-900-FUK-MEUP,Shot,Old-fashioned glass,Grand Marnier,Triple Sec,,Shake ingredients in a mixing tin filled with ...,1/4 oz,7.5,,,
3,3,1-900-FUK-MEUP,Shot,Old-fashioned glass,Midori Melon Liqueur,Sweet Liqueur,,Shake ingredients in a mixing tin filled with ...,1/4 oz,7.5,,,
4,4,1-900-FUK-MEUP,Shot,Old-fashioned glass,Malibu Rum,Rum,,Shake ingredients in a mixing tin filled with ...,1/4 oz,7.5,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1776,1776,Zorbatini,Cocktail,Cocktail Glass,Vodka,Vodka,,Prepare like a Martini. Garnish with a green o...,1 1/4 oz stoli,37.5,,,
1777,1777,Zorbatini,Cocktail,Cocktail Glass,Ouzo,Ouzo,,Prepare like a Martini. Garnish with a green o...,1/4 oz,7.5,,,
1778,1778,Zorro,Coffee / Tea,Coffee Mug,Sambuca,Sambuca,,add all and pour black coffee and add whipped ...,2 cl,20.0,,,
1779,1779,Zorro,Coffee / Tea,Coffee Mug,Bailey'S Irish Cream,Creamy Liqueur,,add all and pour black coffee and add whipped ...,2 cl,20.0,,,


In [None]:
ingredients_type = {}
for ingredient in df['strIngredients'].unique():
    # aggregate unique Alc_type
    alc_type = df[df['strIngredients'] == ingredient]['Alc_type'].unique()
    # there should only be 1 Alc_type for each ingredient
    assert len(alc_type) == 1
    # check if Alc_type is a valid string
    if type(alc_type[0]) == str:
        ingredients_type[ingredient] = "spirit"
    else: # not a spirit if Alc_type is nan
        # aggregate unique Garnish_type
        garnish_type = df[df['strIngredients'] == ingredient]['Garnish_type'].unique()
        # count as mixer if it's "top up" (e.g. coca cola)
        if "top up" in garnish_type:
            ingredients_type[ingredient] = "mixer"
        # check if Garnish_type is nan
        elif len(garnish_type) == 1 and type(garnish_type[0]) != str:
            ingredients_type[ingredient] = "mixer"
        else:
            ingredients_type[ingredient] = "garnish"

In [None]:
from collections import Counter

# Group by 'strDrink' and aggregate all ingredients into a list
cocktail_ingredients = df.groupby('strDrink')['strIngredients'].apply(list).reset_index()

# Flatten the list of ingredients and get the most common ones
ingredient_counts = Counter([ingredient for sublist in cocktail_ingredients['strIngredients'] for ingredient in sublist])

# Define a threshold for the number of cocktails an ingredient should appear in to be considered 'essential'
threshold = 11  # This is just an example value, adjust based on your dataset size and needs

# Filter ingredients by this threshold
essential_ingredients = [ingredient for ingredient, count in ingredient_counts.items() if count >= threshold]

# Now, filter cocktails to only those that can be made with the essential ingredients
def can_be_made_with_essentials(ingredients):
    return all(ingredient in essential_ingredients for ingredient in ingredients)

filtered_cocktails = cocktail_ingredients[cocktail_ingredients['strIngredients'].apply(can_be_made_with_essentials)]

In [None]:
filtered_cocktails = filtered_cocktails.merge(df[['strDrink', 'strInstructions']].drop_duplicates(), on='strDrink', how='left')

In [None]:
filtered_cocktails.to_dict('records')

[{'strDrink': '155 Belmont',
  'strIngredients': ['Light Rum', 'Orange Juice', 'Vodka', 'Dark Rum'],
  'strInstructions': 'Blend with ice. Serve in a wine glass. Garnish with carrot.'},
 {'strDrink': '3-Mile Long Island Iced Tea',
  'strIngredients': ['Light Rum',
   'Vodka',
   'Coca-Cola',
   'Sweet And Sour',
   'Triple Sec',
   'Bitters',
   'Lemon',
   'Gin',
   'Tequila'],
  'strInstructions': 'Fill 14oz glass with ice and alcohol. Fill 2/3 glass with cola and remainder with sweet & sour. Top with dash of bitters and lemon wedge.'},
 {'strDrink': "A Gilligan's Island",
  'strIngredients': ['Peach Schnapps',
   'Cranberry Juice',
   'Orange Juice',
   'Vodka'],
  'strInstructions': 'Shaken, not stirred!'},
 {'strDrink': 'A True Amaretto Sour',
  'strIngredients': ['Lemon', 'Maraschino Cherry', 'Amaretto'],
  'strInstructions': 'Rub the rim of an old fashioned glass with lemon, and dip repeatedly into granulated sugar until it has a good "frosted" rim. Shake a jigger of Amaretto wi

In [None]:
import json
with open("mix_drinks.json", "w") as json_file:
    json.dump(filtered_cocktails.to_dict('records'), json_file, indent=4)

In [None]:
filtered_ingredients_counts = Counter([ingredient for sublist in filtered_cocktails['strIngredients'] for ingredient in sublist])

In [None]:
filtered_ingredients = []
for idx, (ingredient, count) in enumerate(filtered_ingredients_counts.items()):
    filtered_ingredients.append(
        {
            "id": idx,
            "value" : count,
            "label": ingredient,
            "group": ingredients_type[ingredient]
        }
    )

In [None]:
with open("mix_ingredients.json", "w") as json_file:
    json.dump(filtered_ingredients, json_file, indent=4)

In [None]:
# The final lists of essential ingredients and the cocktails that can be made with them
essential_ingredients_df = pd.DataFrame(essential_ingredients, columns=['Essential Ingredient'])
filtered_cocktails_df = filtered_cocktails.rename(columns={'strIngredients': 'Ingredients'})

In [None]:
essential_ingredients_df

Unnamed: 0,Essential Ingredient
0,Vodka
1,Amaretto
2,Cranberry Juice
3,Pineapple Juice
4,Tequila
5,Light Rum
6,Milk
7,151 Proof Rum
8,Orange Juice
9,Dark Rum


In [None]:
filtered_cocktails_df

Unnamed: 0,strDrink,Ingredients
4,155 Belmont,"[Light Rum, Orange Juice, Vodka, Dark Rum]"
8,3-Mile Long Island Iced Tea,"[Light Rum, Vodka, Coca-Cola, Sweet And Sour, ..."
17,A Gilligan's Island,"[Peach Schnapps, Cranberry Juice, Orange Juice..."
21,A True Amaretto Sour,"[Lemon, Maraschino Cherry, Amaretto]"
29,Abbey Cocktail,"[Orange, Orange Bitters, Cherry, Gin]"
...,...,...
435,Tom Collins,"[Club Soda, Gin, Lemon Juice, Maraschino Cherr..."
445,Vesuvio,"[Egg White, Light Rum, Sweet Vermouth, Sugar, ..."
447,Victor,"[Gin, Brandy, Sweet Vermouth]"
453,Waikiki Beachcomber,"[Gin, Pineapple Juice, Triple Sec]"


In [None]:
# Optionally, save to CSV files
essential_ingredients_df.to_csv('essential_ingredients.csv', index=False)
filtered_cocktails_df.to_csv('filtered_cocktails.csv', index=False)

In [None]:
classic_drinks = ['Alexander', 'Amaretto Sour', 'Americano', 'Aviation',
       'Baby Guinness', 'Bramble', 'Caipirinha', 'Clover Club',
       'Cosmopolitan', 'Daiquiri', 'Dirty Martini', 'Espresso Martini',
       'French 75', 'French Martini', 'Godfather', 'Hemingway Special',
       'Long Island Iced Tea', 'Mai Tai', 'Manhattan', 'Margarita',
       'Mojito', 'Moscow Mule', 'Negroni', 'New York Sour',
       'Classic Old-Fashioned', 'Rum Sour', 'San Francisco', 'Sazerac',
       'Screaming Orgasm', 'Sidecar', 'Singapore Sling',
       'Tequila Sunrise', "Tommy's Margarita", 'Vesper', 'Whiskey Sour',
       'White Lady']

In [None]:
from collections import defaultdict

classic_ingredients = defaultdict(int)

for drink in classic_drinks:
    ingredients = list(cocktail_ingredients[cocktail_ingredients.strDrink == drink]["strIngredients"])[0]
    for ingredient in ingredients:
        classic_ingredients[ingredient] += 1

In [None]:
sorted(classic_ingredients.items(), key=lambda k_v: k_v[1], reverse=True)

[('Gin', 10),
 ('Sugar', 9),
 ('Vodka', 8),
 ('Lemon Juice', 6),
 ('Lemon', 6),
 ('Lime Juice', 5),
 ('Light Rum', 5),
 ('Grenadine', 4),
 ('Maraschino Cherry', 4),
 ('Tequila', 4),
 ('Cherry', 4),
 ('Lemon Peel', 3),
 ('Kahlua', 3),
 ('Lime', 3),
 ('Orange', 3),
 ('Triple Sec', 3),
 ('Amaretto', 2),
 ('Campari', 2),
 ('Orange Peel', 2),
 ('Maraschino Liqueur', 2),
 ("Bailey'S Irish Cream", 2),
 ('Sugar Syrup', 2),
 ('Cointreau', 2),
 ('Sweet And Sour', 2),
 ('Sweet Vermouth', 2),
 ('Blended Whiskey', 2),
 ('Water', 2),
 ('Bourbon', 2),
 ('Orange Juice', 2),
 ('Creme De Cacao White', 1),
 ('Nutmeg', 1),
 ('Light Cream', 1),
 ('Sour Mix', 1),
 ('Sweet Vermouth Red', 1),
 ('Creme De Mure', 1),
 ('Cachaca', 1),
 ('Egg White', 1),
 ('Cranberry Juice', 1),
 ('Absolut Citron', 1),
 ('Dry Vermouth', 1),
 ('Olive', 1),
 ('Olive Brine', 1),
 ('Champagne', 1),
 ('Raspberry Liqueur', 1),
 ('Pineapple Juice', 1),
 ('Scotch', 1),
 ('Rum', 1),
 ('Grapefruit Juice', 1),
 ('Coca-Cola', 1),
 ('Orgeat S

In [None]:
def can_be_made_with_classics(ingredients):
    return all(ingredient in classic_ingredients for ingredient in ingredients)

more_cocktails = cocktail_ingredients[cocktail_ingredients['strIngredients'].apply(can_be_made_with_classics)]

In [None]:
more_cocktails

Unnamed: 0,strDrink,strIngredients
0,'57 Chevy with a White License Plate,"[Creme De Cacao White, Vodka]"
8,3-Mile Long Island Iced Tea,"[Light Rum, Vodka, Coca-Cola, Sweet And Sour, ..."
21,A True Amaretto Sour,"[Lemon, Maraschino Cherry, Amaretto]"
26,ABC,"[Amaretto, Bailey'S Irish Cream, Cognac]"
30,Abbey Martini,"[Sweet Vermouth, Orange Juice, Angostura Bitte..."
...,...,...
453,Waikiki Beachcomber,"[Gin, Pineapple Juice, Triple Sec]"
454,Whiskey Sour,"[Lemon, Lemon, Blended Whiskey, Cherry, Sugar]"
456,White Lady,"[Lemon Juice, Triple Sec, Gin]"
460,Wine Punch,"[Orange, Lemon, Pineapple Juice, Red Wine, Ora..."


In [None]:
more_ingredients = defaultdict(int)

for drink in more_cocktails["strDrink"]:
    ingredients = list(cocktail_ingredients[cocktail_ingredients.strDrink == drink]["strIngredients"])[0]
    for ingredient in ingredients:
        more_ingredients[ingredient] += 1

In [None]:
essential_ingredients_df.to_csv('essential_ingredients.csv', index=False)
filtered_cocktails_df.to_csv('filtered_cocktails.csv', index=False)

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=08e32eae-680f-4766-bfea-eabbd1bdf534' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>