# Intelligent Document Processing with BAML

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from baml_py import Image
from baml_client import b
import pandas as pd

## Appointment Card

In [None]:
def extract_appointment_from_url(url: str):
    """
    Extracts an image of a receipt stored at a URL.

    Args:
        url (str): The URL of the receipt image.

    Returns:
        dict: The receipt data. See the baml_src/idp.baml file for the structure of the appointment data.

    Raises:
        BamlValidationError: If the llm read of the image could not be parsed into the expected data model.
    """
    img = Image.from_url(url)
    output = b.ExtractAppointmentFromImage(img)
    return output

In [None]:
url = "https://idp-baml-tutorial.s3.us-east-1.amazonaws.com/appointment.jpg"
appointment = extract_appointment_from_url(url)
print(type(appointment))
appointment

In [None]:
# Convert object to a dictionary
appointment_dict = appointment.__dict__

# Create a DataFrame
appointment_df = pd.DataFrame([appointment_dict])
appointment_df

## Nutritional Value Label

In [3]:
def extract_nutrition_from_url(url: str):
    """
    Extracts an image of a nutritional value label stored at a URL.

    Args:
        url (str): The URL of a nutritional value label image.

    Returns:
        dict: The receipt data. See the baml_src/idp.baml file for the structure of the nutritional value data.

    Raises:
        BamlValidationError: If the llm read of the image could not be parsed into the expected data model.
    """
    img = Image.from_url(url)
    output = b.ExtractNutritionLabelFromImage(img)
    return output

In [4]:
url = "https://idp-baml-tutorial.s3.us-east-1.amazonaws.com/nutrition.jpg"
nutrition = extract_nutrition_from_url(url)
print(type(nutrition))
nutrition

<class 'baml_client.types.NutritionLabel'>


NutritionLabel(product='Maruchan Instant Lunch', description='Fast and tasty as a hot snack or delicious meal, anytime.', calories=290, fat=12, fat_ui='g', fat_dv=15.0, sodium=1150, sodium_ui='mg', sodium_dv=50.0, carb=39, carb_ui='g', carb_dv=14.0, protein=6, protein_ui='g', protein_dv=None)

## Drop Off Package Receipt

In [5]:
def extract_package_from_url(url: str):
    """
    Extracts an image of a nutritional value label stored at a URL.

    Args:
        url (str): The URL of a nutritional value label image.

    Returns:
        dict: The receipt data. See the baml_src/idp.baml file for the structure of the nutritional value data.

    Raises:
        BamlValidationError: If the llm read of the image could not be parsed into the expected data model.
    """
    img = Image.from_url(url)
    output = b.ExtractDropOffPackageReceiptFromImage(img)
    return output

In [6]:
url = "https://idp-baml-tutorial.s3.us-east-1.amazonaws.com/package.jpg"
package = extract_package_from_url(url)
print(type(package))
package

<class 'baml_client.types.DropOffPackageReceipt'>


DropOffPackageReceipt(line_item=[ReceiptItem(tracking_number='DkXCPbL3RRMA', weight=1.13, weight_ui='lb')], location='The UPS Store #4458', address='10650 CULEBRA RD STE 104', day_of_week='Tue', day=31, month='Dec', year=2024, hour=11, minute=52, ampm='AM', total_packages=1, total_packages_ui='pkg')