# Cocktail Data Analysis

## 1. Getting Data From https://liquor.com

### Retrieving  Recipe Links:

In [1]:
from selenium import webdriver
from selenium.webdriver.support.select import Select
import time

# url to scrape data
base_url = "https://www.liquor.com/recipes/page/"

# selenium driver
driver = webdriver.Chrome("/Users/nowgeun/Desktop/chromedriver")

recipe_links_2 = []

page_nums = range(1,48)
for i in page_nums:

    url = base_url+str(i) 
    driver.get(url)
    
    # Extract block tiles with cocktail information
    tiles = driver.find_elements_by_class_name("card-type-recipe")

    # Extract links to cocktail recipe
    for tile in tiles:

        # link to cocktail recipe
        cocktail_link = tile.find_element_by_class_name("archive-item-headline").find_element_by_tag_name("a").get_attribute("href")
        recipe_links_2.append(cocktail_link)


In [2]:
print("There are {} recipes".format(len(recipe_links_2)))
print("top 5 links ", recipe_links_2[0:5])

There are 2303 recipes
top 5 links  ['https://www.liquor.com/recipes/gin-sonic/', 'https://www.liquor.com/recipes/haileys-comet/', 'https://www.liquor.com/recipes/tennessee-summertide/', 'https://www.liquor.com/recipes/sugar-baby/', 'https://www.liquor.com/recipes/miamians-julep/']


In [3]:
# Save the link information
with open("liquor_com_recipes.csv","w") as d:
    for line in recipe_links_2:
        d.write(line+"\n")
d.close()

### Extracting information from respective links

In [1]:
f = open("liquor_com_recipes.csv","r")
urls_2 = [l.strip() for l in f.readlines()]

# This particular link seems broken. Exclude from analysis
urls_2.remove("https://www.liquor.com/recipes/godfather-101/")

In [26]:
from bs4 import BeautifulSoup
import requests

cocktail_data = {}

for l in urls_2:
    req = requests.get(l)
    html = req.text
    soup = BeautifulSoup(html,"html.parser")
    
    # Cocktail Name
    name = soup.find("h1",{"itemprop":"name"}).text
    
    
    """
    Not all links include information on spirits, flavors, types.
    Try and Except will be used to avoid errors.
    
    """
    
    # Base Spirit
    
    spirits = []
    try:       
        base_spirits = soup.find("div",{"class":"x-recipe-spirit"}).find_all("a")
        for base in base_spirits:
            spirits.append(base.text)
    except:
        spirits.append("No Data")
    
    # Flavors
    
    flavors = []
    
    try:
        tastes = soup.find("div",{"class":"x-recipe-flavor"}).find_all("a")
        for taste in tastes:
            flavors.append(taste.text)
    except:
        flavors.append("No Data")
        
    # Type
    try:
        cocktail_type = soup.find("div",{"class":"x-recipe-type"}).find("a").text
    except:
        cocktail_type = "No Data"
    # Strength
    try:
        strength = soup.find("div",{"class":"x-recipe-strength"}).find("a").text
    except:
        strength = "No Data"
    
    # Ingredients
    recipe = []
    ingredients = soup.find_all("div",{"class":"x-recipe-unit"})[1:]

    for ing in ingredients:
        try:
            quant = ing.find("div",{"class":"text-right"}).text.strip()
            res_ing = ing.find("div",{"class":"x-recipe-ingredient"}).text.strip()
            
            if (quant and res_ing):
                recipe.append(quant.strip()+"@"+res_ing)
            else:
                pass
        except:
            pass
    
    
    cocktail_data[name] = {"base": spirits,
                           "flavor": flavors,
                           "type": cocktail_type,
                           "strength": strength,
                           "recipe": recipe
                           }

In [27]:
cocktail_data.keys()

dict_keys(['Gin Sonic', 'Hailey’s Comet', 'Tennessee Summertide', 'Sugar Baby', 'Miamian’s Julep', 'Cabana Club', 'Holy Water', 'Banana Daiquiri', 'Amanyara Mojito', 'Honeysuckle', 'Rum Punch', 'Forefathers', 'Nu Mai Tai', 'Nightcap', 'Rum & Tonic', 'Summer Tonic', 'Grey Goose Magnifique', 'Bacardí Piña Colada', 'Frozen Margarita', 'Bombay Mule', 'Angostura Mai Tai', 'Oaks Lily', 'Angostura Rum Punch', 'Palo Santo Gimlet', 'Massimo Bond', 'Spicy Margarita', 'Kentucky River', 'Brandy Old Fashioned', 'Tamarind Margarita', 'Sea Blues', 'Blue Hawaii', 'Strangelove', 'Delete That Text', 'Cabana Boy', 'Night Flights', 'Dickel #12 Whisky Sour', 'Mexican Chardonnay', 'Life in Venice', 'Sunday Morning Sour', 'The Suffragette', 'Cazadores Loaded Michelada', 'Bacardí & Cola', 'Bacardí Cuatro Airmail', 'Bacardí Ocho Old Fashioned', 'Cazadores con Cerveza', 'Frontier Old Fashioned', 'Tennessee Tuxedo', 'Peppered Peach Tea Tom Collins', 'Perfect Paloma', 'Alpenglow Aspro', 'Tepiatl Sour', 'Citrónge 

In [28]:
import pandas as pd
df = pd.DataFrame(cocktail_data)

In [30]:
df.head(5)

Unnamed: 0,Gin Sonic,Hailey’s Comet,Tennessee Summertide,Sugar Baby,Miamian’s Julep,Cabana Club,Holy Water,Banana Daiquiri,Amanyara Mojito,Honeysuckle,...,Moon Mountain Greyhound,Citrus Flower Sour,The Irish Car Bomb,Flaming Dr. Pepper Shot,"""Plumdog"" Millionaire",New Year’s Sparkler,Finlandia Vodka Wild Berritini,VeeV Holiday Highball,Tropical Sunset,Hydrate
base,[Gin],"[Bourbon / American Whiskey, Whiskey]","[Bourbon / American Whiskey, Whiskey]","[Bourbon / American Whiskey, Whiskey]","[Bourbon / American Whiskey, Whiskey]",[Sherry],[Rum],[Rum],[Rum],[Rum],...,[Vodka],[Vodka],"[Beer, Irish Whiskey, Liqueurs]","[Beer, Liqueurs, Rum]",[Gin],[Vodka],[Vodka],[Liqueurs],[Vodka],[Vodka]
flavor,[Bubbly],[Sweet],[Spirit-forward],[Herbaceous],"[Creamy, Herbaceous]",[Sweet],"[Fruity/Citrus-forward, Spicy]","[Fruity/Citrus-forward, Sweet]","[Herbaceous, Sweet]","[Fruity/Citrus-forward, Sweet]",...,"[Fruity/Citrus-forward, Sweet]","[Fruity/Citrus-forward, Sour, Sweet]","[Bubbly, Spirit-forward]","[Bubbly, Spirit-forward]","[Salty/Savory, Spirit-forward]","[Fruity/Citrus-forward, Sweet]","[Fruity/Citrus-forward, Sweet]",[Fruity/Citrus-forward],[Fruity/Citrus-forward],[Fruity/Citrus-forward]
type,Modern Classics,Frozen / Blended,Frozen / Blended,Frozen / Blended,Frozen / Blended,Tiki / Tropical,Tiki / Tropical,Tiki / Tropical,Classics,Modern Classics,...,Modern Classics,Modern Classics,Classics,Shots,Modern Classics,Modern Classics,Modern Classics,Modern Classics,Tiki / Tropical,Modern Classics
strength,Medium,Medium,Medium,Medium,Medium,Medium,Medium,Medium,Medium,Medium,...,Medium,Medium,Medium,Medium,Strong,Medium,Medium,Medium,Medium,Medium
recipe,"[1 1⁄2 oz@Roku gin, 2 1⁄4 oz@Club soda, 2 1⁄4 ...","[1 oz@Peach-infused bourbon*, 3⁄4 oz@Pimm’s bl...",[1 1⁄2 oz@Chattanooga 1816 Cask unfiltered bar...,"[1@Large seedless watermelon, 1 cup@Bourbon, 1...","[2 oz@Woodinville bourbon, 1 oz@Coconut cream,...","[1 1⁄2 oz@Amontillado sherry, 1 oz@Fresh cocon...",[3⁄4 oz@Infused Hamilton Jamaican gold rum or ...,"[2 oz@Aged rum, 1⁄2 oz@Tempus Fugit crème de b...","[2 oz@Bacardí Superior light rum, 1 oz@Fresh l...","[2 oz@Angostura seven-year-old rum, 3⁄4 oz@Fre...",...,[1 1⁄4 oz@Moon Mountain Wild Raspberry Flavore...,[1 1⁄4 oz@Moon Mountain Coastal Citrus Flavore...,"[1⁄2 oz@Baileys Irish cream, 1⁄2 oz@Irish whis...","[3⁄4 oz@Amaretto, 1⁄4 oz@Rum]","[2 oz@Bulldog GIn, 1 oz@Japanese plum wine, 1 ...","[1 oz@Berry-flavored vodka, 1 1⁄2 oz@Pomegrana...","[3⁄4 oz@Finlandia Wild Berries Fusion Vodka, 1...","[2 oz@VeeV Açaí Spirit, 1⁄4 oz@Green Chartreus...","[1 1⁄2 oz@Van Gogh Pineapple Vodka, 1 oz@Van G...","[2 oz@Organic cucumber vodka, 2 1⁄2 oz@Puréed ..."


In [31]:
df.to_csv("Cocktail_Data.csv")