In [1]:
#!pip install matplotlib
#!pip install pandas
#!pip install requests
#!pip install gmaps


# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import os
import json

# Import API key
from api_keys import cocktail_api

# Output File (CSV)
output_data_file = "output_data/drinks.csv"



In [2]:
base_url = "https://www.thecocktaildb.com/api/json/v1/1/search.php?f="
letter_list = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "1", "2", "3", "4", "5", "6", "7", "8", "9", "0", "'"]

#initilize lists
drink_id = []
drink_name = []
drink_category = []
drink_alcohol = []
drink_glass = []
drink_ingredients=[]
drink_measure = []

for letter in letter_list:
    #create request and save data into json
    drink_response = requests.get(base_url+letter)
    drink_data = drink_response.json()
    drink_data = drink_data["drinks"]
    
    try:
        #get index for first through last list item
        #use try and except to filter out letter/numbers with no drinks listed
        length = len(drink_data)-1

        for element in range(0,length):

            #get drink ID and add to list
            drink_id.append(drink_data[element]["idDrink"])         

            #get drink name and add to list
            drink_name.append(drink_data[element]["strDrink"])
            #print list of drinks
            #print(f"{drink_name[-1]}")

            #get drink category and add to list
            drink_category.append(drink_data[element]["strCategory"])


            #get data on if its alcoholic or not
            TF_alcohol = drink_data[element]["strAlcoholic"]

            if TF_alcohol != "Non alcoholic":
                drink_alcohol.append(True)

            else:
                drink_alcohol.append(False)

            #get glass type and add to list
            drink_glass.append(drink_data[element]["strGlass"])


            #get ingredients from list
            i=1
            ingredient_list = []
            ingredient = ""
            while i<=15:
                ingredient = drink_data[element][f"strIngredient{i}"]

                if ingredient == None:
                    i+=1
                else:
                    ingredient_list.append(drink_data[element][f"strIngredient{i}"])
                    i+=1

            drink_ingredients.append(ingredient_list)


            #get measurments from data
            i=1
            measure_list = []
            measure = ""
            while i<=len(ingredient_list):
                measure = drink_data[element][f"strMeasure{i}"]     
                measure_list.append(drink_data[element][f"strMeasure{i}"])
                i+=1

            drink_measure.append(measure_list)
            
            
    except:
        #print message if there is no drink found starting with that letter
        print(f"No drinks staring with {letter} found.")


#create data frame with info from above lists
drinks_df = pd.DataFrame({
    "Drink ID":drink_id, 
    "Name":drink_name,
    "Category":drink_category,
    "Alcoholic":drink_alcohol,
    "Glass":drink_glass,
    "Ingredients":drink_ingredients,
    "Measure":drink_measure
})


No drinks staring with u found.
No drinks staring with x found.
No drinks staring with 8 found.
No drinks staring with 0 found.


In [3]:
#Clean glass data
drinks_df["Glass"] = drinks_df["Glass"].str.lower()
drinks_df["Glass"] = drinks_df["Glass"].replace(["margarita/coupette glass","white wine glass", "whiskey sour glass"], ["margarita glass","wine glass", "old-fashioned glass"])

# Question 2 Initial Cleaning

In [4]:
# Creation of sorted list of tuples. Useful for most used ingredient
ing_count = {}
for i in range(0, len(drink_df)):
    for ing in drink_df.loc[i,'Ingredients']:
        if ing in ing_count:
            ing_count[ing] = ing_count[ing] + 1
        else:
            ing_count[ing] = 1
            
#print(ing_count)
a=sorted(ing_count.items(), key=lambda x: x[1], reverse = True)
print(a)
#print(drink2_df['Ingredients'].to_string())

In [5]:
### NOTE: Adding in a csv read here if you actually wanted to run this Notebook. I recommend against it though!

df = pd.read_csv("ingredients_list.csv")
df = df.drop('Unnamed: 0', axis=1)
drink_names = df.Name

In [None]:
# Creating two lists of alcoholic and non-alcoholic.

alcoholic = []
non_alcoholic = []

In [None]:
# Manual filtration of ingredients in DF to these two lists. Ugly manual filtration.

for n in drink_names:
        answer = input(f"Is {n} alcoholic?\r")
        if answer == "" or answer == "y":
            alcoholic.append(n)
        else:
            non_alcoholic.append(n)

In [None]:
# Lists to Series and drop NAs, nothing flashy.

alcoholic_df = pd.Series(alcoholic).dropna()
non_alcoholic_df = pd.Series(non_alcoholic).dropna()

In [None]:
# Created two CSVs for these DataFrames so that I wouldn't have to re-classify these ever again..

alcoholic_df.to_csv("alcohol_items.csv")
non_alcoholic_df.to_csv("non_alcohol_items.csv")

## Question Two Further Cleaning

In [6]:
# Reading in alcoholic_df.csv so that you can actually run this Notebook. I recommend against it though!

alcoholic_df = pd.read_csv("alcohol_items.csv")

In [None]:
# Created groups for bulk-categorization.

alcohol_groups = [
    'gin','liqueur','vodka','rum',
    'schnapps','scotch','wine','creme',
    'vermouth','sambuca','whiskey','tequila',
    'absolut','brandy'
]

In [None]:
# Simply taking items with the category name somewhere in the name and assigning to the category.
# Example: Bacardi Rum, Dark Rum, Light Rum and Rum would be assigned simply to Rum.
# Shrinks list from over 150 items down to under 90.

for g in alcohol_groups:
    converter = alcoholic_df.loc[alcoholic_df['Names'].str.contains(g), 'Names'] = g

In [None]:
# Storing everything into new DataFrame to not risk tampering with old stuff. Unnecessary step but opted for it.

alcohol_names = alcoholic_df.groupby(['Names'])
add_values = alcohol_names['Count'].sum()
alcoholic_df_ = pd.DataFrame(add_values).sort_values(by=['Count'], ascending=False).reset_index()

In [None]:
# With 90 items, now we need to assigned named items into bigger categories. More manual filtration.
# For example, Absolut to Vodka, Prosecco to Wine, etc.

Categorization = []
counter = 1
for n in alcoholic_df_['Names']:
    categorization = input(f"({counter}/{(len(alcoholic_df_))}) Categorize {n}: ")
    Categorization.append(categorization)
    counter += 1

In [None]:
# Adding in the Category column and removing Names column to use the category as the substitute for name.

alcoholic_df_['Category'] = Categorization
alcoholic_df_ = alcoholic_df_.drop(columns=['Names'])

In [None]:
# Grouping by Category and summing the counts to get totals for the now 15-ish categories of alcohol.
# Content is stored in yet another new DataFrame (not necessary kept straightforward and simplistic).

alcohol_categories = alcoholic_df_.groupby(['Category'])
add_totals = alcohol_categories['Count'].sum()
alcohol_categories_df = pd.DataFrame(add_totals).sort_values(by=['Count'], ascending=False).reset_index()

In [None]:
# Write this DataFrame to a CSV just so I don't have to re-categorize every time I run this stuff (Duh!)

# alcohol_categories_df.to_csv("alcohol_categories.csv")