In [110]:
import requests
import pandas as pd
from dotenv import dotenv_values
import json
import os
import time
import re

In [111]:
base_url = "https://www.thecocktaildb.com/api/json/v1/"
api_key = dotenv_values().get('cocktail_api_key')
params = {"c":'Cocktail' }

In [112]:
api_cocktails = f"{base_url}{api_key}/filter.php" #returns a dictionary with drinks as key and a list of all drinks as value.
response = requests.get(api_cocktails,params=params)
if response.status_code == 200:
    #Parse JSON response and get id's of all cocktails
    data = response.json()
    cocktails = data.get('drinks',[]) #Cocktails is a list of dictionaries, which have details of the drinks which are cocktails
    if cocktails: #Check if cocktails has data
        drink_list = [drink.get("idDrink") for drink in cocktails] #gets only the drink names and puts in list. This list will be iterated next to get required details of drinks from different api endpoint.
    print(drink_list)
    print(len(drink_list))


['15346', '14029', '178318', '16108', '16943', '17005', '14560', '17222', '17223', '14107', '17224', '16134', '17225', '17226', '17227', '17228', '14272', '17229', '12560', '12562', '178321', '178325', '178353', '12564', '16311', '178319', '14584', '17074', '17066', '178337', '17180', '17267', '178320', '178317', '17254', '17268', '178336', '17242', '12572', '17251', '178331', '17825', '178311', '178310', '178356', '178329', '17174', '178369', '17830', '17250', '17196', '14133', '14608', '17177', '178334', '17181', '11005', '17182', '178346', '17246', '17212', '178309', '178344', '16485', '17213', '17248', '178352', '178328', '12758', '178340', '17255', '178342', '178314', '178366', '17230', '178365', '17252', '178316', '178345', '17239', '12706', '16987', '16178', '178359', '178335', '14366', '178360', '15224', '178358', '11008', '17256', '11720', '11728', '17188', '178370', '13936', '178343', '14842', '11000', '15841']
100


In [113]:
# Initialize an empty list to store drink data
drink_details = []

def clean_string(text): #To handle edge cases where strings are not standardized or clean.
    # Remove leading/trailing whitespaces
    cleaned = text.strip()
    # Replace multiple newlines with a single space
    cleaned = re.sub(r"\s*\n\s*", " ", cleaned)
    # Replace multiple spaces with a single space
    cleaned = re.sub(r"\s+", " ", cleaned)
    return cleaned

# Fetch details for each drink via the raw data files.
for file in os.listdir(RAW_DATA_DIR):
    file_path = os.path.join(RAW_DATA_DIR, file)
    with open(file_path, "r") as f:
        data = json.load(f)
        drinks = data.get("drinks", [])
        for drink in drinks:
            # Collect all relevant fields
            drink_info = {
                'Drink ID': int(drink.get('idDrink','N/A')), #We could simply pass drink_id here, as well. Id is an integer to convert to INT.
                'Drink Name': drink.get('strDrink', 'N/A'),
                'Category': drink.get('strCategory', 'N/A'), #We can hardcode "Cocktails" here, since we have already filtered the cocktails above. This could potentially be one edge case.
                'Alcoholic': drink.get('strAlcoholic', 'N/A').upper(), #Edge case. Standardize all to upper case.
                'Glass': drink.get('strGlass', 'N/A').upper(), #Edge case. Standardize all to upper case.
                'Instructions': clean_string(drink.get('strInstructions', 'N/A')), #Edge Case : Instructions are not standardized. Some extra characters and spaces and line breaks.
                'Ingredients': clean_string(', '.join([
                    drink.get(f'strIngredient{i}', '') 
                    for i in range(1, 16) 
                    if drink.get(f'strIngredient{i}')
                ])), #This is another edge case where we are checking that for missing ingredient fields and ignoring nulls. Also, cleaning just to be sure.
                'Ingredients Measure': clean_string(', '.join([
                    drink.get(f'strMeasure{i}', '').strip() 
                    for i in range(1, 16) 
                    if drink.get(f'strMeasure{i}') #Cleaning required as extra spaces.
                ])),
                'Ingredients_Full': clean_string(', '.join([
                    f"{drink.get(f'strIngredient{i}')}: {drink.get(f'strMeasure{i}').strip()}"
                    for i in range(1, 16)
                    if drink.get(f'strIngredient{i}') and drink.get(f'strMeasure{i}')
                ])) #Combining the Ingredients and Its Measure for ease of Understanding.
            }
            drink_details.append(drink_info)

# Load the data into a DataFrame
df_drinks = pd.DataFrame(drink_details)
display(df_drinks)

Unnamed: 0,Drink ID,Drink Name,Category,Alcoholic,Glass,Instructions,Ingredients,Ingredients Measure,Ingredients_Full
0,11000,Mojito,Cocktail,ALCOHOLIC,HIGHBALL GLASS,Muddle mint leaves with sugar and lime juice. ...,"Light rum, Lime, Sugar, Mint, Soda water","2-3 oz, Juice of 1, 2 tsp, 2-4","Light rum: 2-3 oz, Lime: Juice of 1, Sugar: 2 ..."
1,11005,Dry Martini,Cocktail,ALCOHOLIC,COCKTAIL GLASS,Straight: Pour all ingredients into mixing gla...,"Gin, Dry Vermouth, Olive","1 2/3 oz, 1/3 oz, 1","Gin: 1 2/3 oz, Dry Vermouth: 1/3 oz, Olive: 1"
2,11008,Manhattan,Cocktail,ALCOHOLIC,COCKTAIL GLASS,"Stirred over ice, strained into a chilled glas...","Sweet Vermouth, Bourbon, Angostura bitters, Ic...","3/4 oz, 2 1/2 oz Blended, dash, 2 or 3, 1, 1 t...","Sweet Vermouth: 3/4 oz, Bourbon: 2 1/2 oz Blen..."
3,11720,Martinez Cocktail,Cocktail,ALCOHOLIC,COCKTAIL GLASS,Stir all ingredients (except cherry) with ice ...,"Gin, Dry Vermouth, Triple sec, Orange bitters,...","1 oz, 1 oz, 1/4 tsp, 1 dash, 1","Gin: 1 oz, Dry Vermouth: 1 oz, Triple sec: 1/4..."
4,11728,Martini,Cocktail,ALCOHOLIC,COCKTAIL GLASS,Straight: Pour all ingredients into mixing gla...,"Gin, Dry Vermouth, Olive","1 2/3 oz, 1/3 oz, 1","Gin: 1 2/3 oz, Dry Vermouth: 1/3 oz, Olive: 1"
...,...,...,...,...,...,...,...,...,...
95,178360,Lemon Elderflower Spritzer,Cocktail,ALCOHOLIC,HIGHBALL GLASS,"Pour all ingredients over ice, stir and enjoy!","Elderflower cordial, Vodka, Soda Water, Fresh ...","2 tsp, 1 shot, 1/3 cup, Top","Elderflower cordial: 2 tsp, Vodka: 1 shot, Sod..."
96,178365,Gin Tonic,Cocktail,ALCOHOLIC,HIGHBALL GLASS,"Fill a highball glass with ice, pour the gin, ...","Gin, Tonic Water, Lemon Peel, Ice","4 cl, 10 cl, 1 Slice, cubes","Gin: 4 cl, Tonic Water: 10 cl, Lemon Peel: 1 S..."
97,178366,Gin Lemon,Cocktail,ALCOHOLIC,HIGHBALL GLASS,For the preparation of the gin lemon you will ...,"Gin, Lemon Juice, Lemon Peel, Ice","6 cl, 8 cl, 1 Slice, cubes","Gin: 6 cl, Lemon Juice: 8 cl, Lemon Peel: 1 Sl..."
98,178369,Cocktail Horse’s Neck,Cocktail,ALCOHOLIC,HIGHBALL GLASS,"Wash and brush an organic, untreated lemon, th...","Cognac, Ginger Beer, Angostura Bitters, Lemon ...","4 cl, 100 ml, 3 drops, 1","Cognac: 4 cl, Ginger Beer: 100 ml, Angostura B..."


In [114]:
df_drinks.to_csv("cocktails.csv",index_label="RowID")

In [None]:

if len(df_drinks)==len(os.listdir('raw_data')): #Check if all data is loaded and if so, delete the raw files after loading. Just an additional step to avoid duplication and preserve storage.
    for filename in os.listdir('raw_data'):
        file_path = os.path.join('raw_data', filename)
        try:
            # Check if it is a file and delete it
            if os.path.isfile(file_path):
                os.remove(file_path)
                print(f"Deleted file: {file_path}")
        except Exception as e:
            print(f"Error deleting file {file_path}: {e}")



all good


In [None]:
for filename in os.listdir('raw_data'):
        file_path = os.path.join('raw_data', filename)
        try:
            # Check if it is a file and delete it
            if os.path.isfile(file_path):
                os.remove(file_path)
                print(f"Deleted file: {file_path}")
        except Exception as e:
            print(f"Error deleting file {file_path}: {e}")

