In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import os
import json

In [2]:
# Tempory data frame to pull in the drinks.csv file
temp_df = pd.read_csv("output_data/drinks.csv")

In [3]:
# Display head of data frame. Feel free to delete
temp_df.head()

Unnamed: 0,Drink ID,Name,Category,Alcoholic,Glass,Ingredients,Measure
0,17222,A1,Cocktail,True,Cocktail glass,"['Gin', 'Grand Marnier', 'Lemon Juice', 'Grena...","['1 3/4 shot ', '1 Shot ', '1/4 Shot', '1/8 Sh..."
1,13501,ABC,Shot,True,Shot glass,"['Amaretto', 'Baileys irish cream', 'Cognac']","['1/3 ', '1/3 ', '1/3 ']"
2,17225,Ace,Cocktail,True,Martini Glass,"['Gin', 'Grenadine', 'Heavy cream', 'Milk', 'E...","['2 shots ', '1/2 shot ', '1/2 shot ', '1/2 sh..."
3,17837,Adam,Ordinary Drink,True,Cocktail glass,"['Dark rum', 'Lemon juice', 'Grenadine']","['2 oz ', '1 oz ', '1 tsp ']"
4,13938,AT&T,Ordinary Drink,True,Highball Glass,"['Absolut Vodka', 'Gin', 'Tonic water']","['1 oz ', '1 oz ', '4 oz ']"


In [4]:
# List conversion function. Let me know if there are any issues
def convert(s):
    lst = []
    temp = s.split(", ")
    
    for x in temp:
        lst.append(x.translate({ord(i): None for i in "[']"}))
    
    return lst

In [5]:
# Necessary copy of data frame. The value count is just a test. Feel free to delete that line
drink_df = temp_df.copy()
drink_df['Glass'].value_counts()

Cocktail glass              92
Highball glass              62
Old-fashioned glass         38
Collins Glass               29
Shot glass                  25
Collins glass               24
Highball Glass              15
Whiskey sour glass          13
Coffee mug                   9
Punch bowl                   7
Hurricane glass              6
Champagne flute              5
Irish coffee cup             5
Coffee Mug                   4
Wine Glass                   4
Pitcher                      4
Martini Glass                4
Old-Fashioned glass          4
Balloon Glass                3
Pint glass                   3
Brandy snifter               3
White wine glass             3
Nick and Nora Glass          3
Beer mug                     3
Margarita/Coupette glass     2
Beer pilsner                 2
Margarita glass              2
Champagne Flute              2
Mason jar                    2
Beer Glass                   2
Shot Glass                   1
Pousse cafe glass            1
Cocktail

In [6]:
# Creation of main data frame. All plots should use this data (i.e. drink_df)
for i in range(0, len(drink_df)):
    drink_df.at[i,'Ingredients'] = convert(temp_df.loc[i,'Ingredients'])

In [7]:
# Creation of sorted list of tuples. Useful for most used ingredient
ing_count = {}
for i in range(0, len(drink_df)):
    for ing in drink_df.loc[i,'Ingredients']:
        if ing in ing_count:
            ing_count[ing] = ing_count[ing] + 1
        else:
            ing_count[ing] = 1
            
#print(ing_count)
a=sorted(ing_count.items(), key=lambda x: x[1], reverse = True)
print(a)
#print(drink2_df['Ingredients'].to_string())

[('Gin', 72), ('Vodka', 54), ('Sugar', 49), ('Lemon juice', 37), ('Orange juice', 37), ('Lemon', 34), ('Grenadine', 29), ('Ice', 29), ('Light rum', 27), ('Powdered sugar', 26), ('Water', 26), ('Coffee', 24), ('Lime', 24), ('Amaretto', 21), ('Milk', 21), ('Dry Vermouth', 20), ('Kahlua', 19), ('Triple sec', 18), ('Lemon peel', 18), ('Sweet Vermouth', 17), ('Baileys irish cream', 15), ('Rum', 15), ('Tequila', 15), ('Pineapple juice', 14), ('', 14), ('151 proof rum', 13), ('Maraschino cherry', 13), ('Lime juice', 13), ('Scotch', 13), ('Bitters', 13), ('Cherry', 13), ('Brandy', 13), ('Orange', 13), ('Egg white', 12), ('Light cream', 12), ('Lime Juice', 12), ('Carbonated water', 12), ('Dark rum', 11), ('Cranberry juice', 11), ('Ginger ale', 11), ('Nutmeg', 10), ('Mint', 9), ('Bourbon', 9), ('Soda water', 9), ('Coca-Cola', 9), ('Red wine', 9), ('Salt', 8), ('Grand Marnier', 7), ('Club soda', 7), ('Blended whiskey', 7), ('Sambuca', 7), ('Champagne', 7), ('Peach schnapps', 7), ('Sugar syrup', 7

In [8]:
# Creation of new series. Feel free to delete if not needed.

drink_df['Ingredients Count'] = 0
for i in range(0, len(drink_df)):
    drink_df.loc[i,'Ingredients Count'] = len(drink_df.loc[i,'Ingredients'])
drink_df.head()

Unnamed: 0,Drink ID,Name,Category,Alcoholic,Glass,Ingredients,Measure,Ingredients Count
0,17222,A1,Cocktail,True,Cocktail glass,"[Gin, Grand Marnier, Lemon Juice, Grenadine]","['1 3/4 shot ', '1 Shot ', '1/4 Shot', '1/8 Sh...",4
1,13501,ABC,Shot,True,Shot glass,"[Amaretto, Baileys irish cream, Cognac]","['1/3 ', '1/3 ', '1/3 ']",3
2,17225,Ace,Cocktail,True,Martini Glass,"[Gin, Grenadine, Heavy cream, Milk, Egg White]","['2 shots ', '1/2 shot ', '1/2 shot ', '1/2 sh...",5
3,17837,Adam,Ordinary Drink,True,Cocktail glass,"[Dark rum, Lemon juice, Grenadine]","['2 oz ', '1 oz ', '1 tsp ']",3
4,13938,AT&T,Ordinary Drink,True,Highball Glass,"[Absolut Vodka, Gin, Tonic water]","['1 oz ', '1 oz ', '4 oz ']",3


In [9]:
# Phase One Below -- Note these phases can be merged.

In [None]:
liquor_count = pd.DataFrame.from_dict(ing_count, orient='index')
liquor_count.reset_index(level=0, inplace=True)
liquor_count.columns = ['Name','Count']
liquor_count['Name'] = liquor_count['Name'].str.lower()
liquor_count = liquor_count.sort_values(by=['Count'], ascending=False)
liquor_count.to_csv('ingredients_list.csv')

In [None]:
# Phase Two -- Note these phases can be merged.

In [None]:
#df = pd.read_csv("ingredients_list.csv")
#df = df.drop('Unnamed: 0', axis=1)
#df.head(1)

In [None]:
#drink_names = df.Name

In [None]:
# alcoholic = []
# non_alcoholic = []

In [None]:
# Do not un-comment and run this please
# for d in drink_names:
#         answer = input(f"Is {d} alcoholic?\r")
#         if answer == "" or answer == "y":
#             alcoholic.append(d)
#         else:
#             non_alcoholic.append(d)

In [None]:
#alcoholic_df = pd.Series(alcoholic).dropna()
#non_alcoholic_df = pd.Series(non_alcoholic).dropna()

In [None]:
#alcoholic_df.to_csv("alcohol_items.csv", header=True)
#non_alcoholic_df.to_csv("non_alcohol_items.csv", header=True)

In [None]:
# Phase Three -- Note these phases can be merged.

In [None]:
alcoholic_df = pd.read_csv('alcohol_items.csv')
non_alcohol_df = pd.read_csv('non_alcohol_items.csv')
alcoholic_df = alcoholic_df.drop('Unnamed: 0', axis=1)
non_alcohol_df = non_alcohol_df.drop('Unnamed: 0', axis=1)

In [None]:
alcohol_groups = [
    'gin','liqueur','vodka','rum',
    'schnapps','scotch','wine','creme',
    'vermouth','sambuca','whiskey','tequila',
    'absolut','brandy'
]

In [None]:
#converter
for g in alcohol_groups:
    converter = alcoholic_df.loc[alcoholic_df['Names'].str.contains(g), 'Names'] = g

In [None]:
alcohol_names = alcoholic_df.groupby(['Names'])
add_values = alcohol_names['Count'].sum()
alcoholic_df_ = pd.DataFrame(add_values)

In [None]:
alcoholic_df_.sort_values(by=['Count'], ascending=False).head(30)