In [2]:
import requests
import pandas as pd
from dotenv import dotenv_values
import json
import os
import time
import re

In [3]:
base_url = "https://www.thecocktaildb.com/api/json/v1/"
api_key = dotenv_values().get('cocktail_api_key')
params = {"c":'Cocktail' }

In [4]:
api_cocktails = f"{base_url}{api_key}/filter.php" #returns a dictionary with drinks as key and a list of all drinks as value.
response = requests.get(api_cocktails,params=params)
if response.status_code == 200:
    #Parse JSON response and get id's of all cocktails
    data = response.json()
    cocktails = data.get('drinks',[]) #Cocktails is a list of dictionaries, which have details of the drinks which are cocktails
    if cocktails: #Check if cocktails has data
        drink_list = [drink.get("idDrink") for drink in cocktails] #gets only the drink names and puts in list. This list will be iterated next to get required details of drinks from different api endpoint.
    print(drink_list)
    print(len(drink_list))


['15346', '14029', '178318', '16108', '16943', '17005', '14560', '17222', '17223', '14107', '17224', '16134', '17225', '17226', '17227', '17228', '14272', '17229', '12560', '12562', '178321', '178325', '178353', '12564', '16311', '178319', '14584', '17074', '17066', '178337', '17180', '17267', '178320', '178317', '17254', '17268', '178336', '17242', '12572', '17251', '178331', '17825', '178311', '178310', '178356', '178329', '17174', '178369', '17830', '17250', '17196', '14133', '14608', '17177', '178334', '17181', '11005', '17182', '178346', '17246', '17212', '178309', '178344', '16485', '17213', '17248', '178352', '178328', '12758', '178340', '17255', '178342', '178314', '178366', '17230', '178365', '17252', '178316', '178345', '17239', '12706', '16987', '16178', '178359', '178335', '14366', '178360', '15224', '178358', '11008', '17256', '11720', '11728', '17188', '178370', '13936', '178343', '14842', '11000', '15841']
100


In [5]:

# Function to fetch drink details by name
def fetch_drink_details(drink_id):
    api_search = f"{base_url}{api_key}/lookup.php?i={drink_id}"
    response = requests.get(api_search)
    if response.status_code == 200:
        # Parse JSON and return the drinks
        return response.json().get("drinks", [])
    else:
        print(f"Failed to fetch details for {drink_id}. HTTP Status Code: {response.status_code}")
        return []

In [None]:
# I have chosen to go with batch processing approach. Explained in separate file.
# Directory to store raw data
RAW_DATA_DIR = "raw_data"
os.makedirs(RAW_DATA_DIR, exist_ok=True)
max_retries = 3 #This is incase of errors like 429.
# Function to fetch raw data and save to file
def fetch_and_store_raw_data(drinks):
    for drink_id in drinks:
        api_search = f"{base_url}{api_key}/lookup.php?i={drink_id}"
        retries = 0
        while retries <= max_retries:
            response = requests.get(api_search)
            if response.status_code == 200:
                raw_data_path = os.path.join(RAW_DATA_DIR, f"{drink_id}.json")
                with open(raw_data_path, "w") as f:
                    json.dump(response.json(), f)
                print(f"Raw data for {drink_id} saved to {raw_data_path}")
                break #to avoid while loop running.
            elif response.status_code == 429: #Check for too many requests on API.
                # Handle Too Many Requests error
                print(f"Rate limit reached for {drink_id}. Retrying in 10 seconds...")
                time.sleep(10) #Most APIs include a Retry-After header for how long to wait before retrying. I have assumed 10 seconds.
                retries += 1
            else:
                print(f"Failed to fetch data for {drink_id}. HTTP Status Code: {response.status_code}")
                break

In [None]:
fetch_and_store_raw_data(drink_list)


Raw data for 15346 saved to raw_data\15346.json
Raw data for 14029 saved to raw_data\14029.json
Raw data for 178318 saved to raw_data\178318.json
Raw data for 16108 saved to raw_data\16108.json
Raw data for 16943 saved to raw_data\16943.json
Raw data for 17005 saved to raw_data\17005.json
Raw data for 14560 saved to raw_data\14560.json
Raw data for 17222 saved to raw_data\17222.json
Raw data for 17223 saved to raw_data\17223.json
Raw data for 14107 saved to raw_data\14107.json
Raw data for 17224 saved to raw_data\17224.json
Raw data for 16134 saved to raw_data\16134.json
Raw data for 17225 saved to raw_data\17225.json
Raw data for 17226 saved to raw_data\17226.json
Raw data for 17227 saved to raw_data\17227.json
Raw data for 17228 saved to raw_data\17228.json
Raw data for 14272 saved to raw_data\14272.json
Raw data for 17229 saved to raw_data\17229.json
Raw data for 12560 saved to raw_data\12560.json
Raw data for 12562 saved to raw_data\12562.json
Raw data for 178321 saved to raw_data\

In [None]:
if len(os.listdir(RAW_DATA_DIR))==len(drink_list):
    print("All cocktails raw data extracted.")
else:
    raise Exception("Missing cocktails. Please, review code.")

All cocktails raw data extracted.
