In [None]:
#Importing the libraries
import json
import requests
import logging
import time
from datetime import datetime
from fractions import Fraction

In [None]:
# Set up logging to keep track of activity and errors during the workflow
logging.basicConfig(
    filename="cocktail_data_ingestion.log",  
    level=logging.INFO,  # Log only important events
    format="%(asctime)s - %(levelname)s - %(message)s"  
)

In [None]:
# Helper class to handle the variations in measurement units (e.g., "1 oz" to ml)
class IngredientUnitNormalizer:
    # Conversion rates for various units to ml 
    UNIT_CONVERSIONS = {
        "oz": 29.5735,
        "ml": 1,  #base unit for all other conversions
        "cl": 10,
        "cup": 240,
        "tbsp": 14.7868,
        "tsp": 4.92892,
        "shot": 44.36,
        "jigger": 44.36,
        "pint": 473.176
    }

    # Conversion for descriptive measurements like "splash" or "pinch"
    DESCRIPTIVE_UNITS = {
        "pinch": 0.36,
        "splash": 5,
        "dash": 0.92,
        "sprig": 0.5,
        "drop": 0.05,
        "chunk": 15,
        "piece": 10,
        "slice": 10,
        "wedge": 10,
        "handful": 100,
        "scoops": 60,
        "part": None  # "Part" cannot be defined without context
    }

    @staticmethod
    def normalize(measure):
        """
        Convert ingredient measurements into a standard format (ml)
        """
        if not measure:
            return None, None  # Return None if no measurement is provided

        measure = measure.strip().lower()  # Clean up the input
        parts = measure.split()  # Split measurement into value and unit

        try:
            if len(parts) >= 2:  # Handle formats like "1 oz"
                value = float(parts[0])  # Get the numeric value
                unit = parts[1]  # Get the unit (e.g., "oz")
                if unit in IngredientUnitNormalizer.UNIT_CONVERSIONS:
                    return value * IngredientUnitNormalizer.UNIT_CONVERSIONS[unit], "ml"
                elif unit in IngredientUnitNormalizer.DESCRIPTIVE_UNITS and IngredientUnitNormalizer.DESCRIPTIVE_UNITS[unit] is not None:
                    return value * IngredientUnitNormalizer.DESCRIPTIVE_UNITS[unit], "ml"
            elif len(parts) == 1 and parts[0] in IngredientUnitNormalizer.DESCRIPTIVE_UNITS:
                # Handle single-word measures like "pinch"
                return IngredientUnitNormalizer.DESCRIPTIVE_UNITS[parts[0]], "ml"
        except ValueError:
            print(f"Could not process measurement: {measure}")

        return None, "unknown"  # If all else fails, return as unknown

In [None]:
# The class to fetch the cocktail data from the API
class CocktailAPIHandler:
    BASE_URL = "https://www.thecocktaildb.com/api/json/v1/1/search.php?f={}"  #TheCocktailDB API to fetch cocktail data

    def __init__(self):
        self.cocktails = []  # Placeholder for all fetched cocktails

    def fetch_all_cocktails(self):
        """
        Retrieve a dataset of cocktails that can be used for transformation
        """
        all_cocktails = []

        for letter in "abcdefghijklmnopqrstuvwxyz":  # Loop over A-Z
            try:
                logging.info(f"Fetching cocktails starting with: {letter}")
                response = requests.get(self.BASE_URL.format(letter))  # Fetch data
                response.raise_for_status()  # Raise an error if the request fails

                data = response.json().get("drinks", [])  # Get  the drinks list
                if data:
                    all_cocktails.extend(self._process_cocktail_data(data))  # Process the data
            except requests.exceptions.RequestException as e:
                print(f"Error fetching data for letter {letter}: {e}")

        self.cocktails = all_cocktails

    def _process_cocktail_data(self, data):
        """
        Clean and transform raw cocktail data into a standard format
        """
        processed_data = []

        for cocktail in data:
            # Map the raw cocktail data from the API to a common information model
            document = {
                "id": cocktail["idDrink"],
                "name": cocktail["strDrink"],
                "category": cocktail.get("strCategory"),
                "alcoholic": cocktail.get("strAlcoholic"),
                "glass": cocktail.get("strGlass"),
                "instructions": cocktail.get("strInstructions"),
                "thumbnail": cocktail.get("strDrinkThumb"),
                "source": "TheCocktailDB",
                "ingredients": []
            }

            # Identify and address edge cases in the data (normalize units)
            ingredients = []
            for i in range(1, 16):  # The API provides up to 15 ingredients
                ingredient = cocktail.get(f"strIngredient{i}")
                measure = cocktail.get(f"strMeasure{i}")
                if ingredient:  # Only include ingredients that are not empty
                    normalized_value, normalized_unit = IngredientUnitNormalizer.normalize(measure)
                    ingredients.append({
                        "name": ingredient.strip(),
                        "measure": measure.strip() if measure else None,
                        "normalized_value": normalized_value,
                        "normalized_unit": normalized_unit
                    })
            document["ingredients"] = ingredients  # Add ingredients to the document
            processed_data.append(document)

        return processed_data

In [None]:
# The Class to save the processed cocktail data into a file
class CocktailDataSaver:
    @staticmethod
    def save_to_file(data):
        """
        Save cocktail data to a JSON file with a unique name
        """
        if not data:
            print("No data present to be saved.")
            return

        file_name = f"cocktails_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
        try:
            with open(file_name, "w") as file:
                json.dump(data, file, indent=4)  # Save data in a readable format
            print(f"Data saved to '{file_name}'.")
        except IOError as e:
            print(f"Error saving file '{file_name}': {e}")

In [None]:
# Main Class to handle the entire process
class CocktailInsertionWorkflow:
    def __init__(self):
        self.api_handler = CocktailAPIHandler()  # API handler class to fetch and process the data
        self.data_saver = CocktailDataSaver()  # Saver class to save the processed data

    def execute(self):
        """
        Manage the workflow: fetch, process, and save cocktail data
        """
        print("***Starting the cocktail insertion workflow***")
        try:
            self.api_handler.fetch_all_cocktails()  # Fetch and process data
            self.data_saver.save_to_file(self.api_handler.cocktails)  # Save processed data
        except Exception as e:
            print(f"Workflow failed: {e}")
        print("***Cocktail insertion workflow completed***")

# Entry point for the script
if __name__ == "__main__":
    workflow = CocktailInsertionWorkflow()
    workflow.execute()