In [2]:
# INSTALLS THAT NEED TO BE RUN ON CONDA
# !pip install langchain-aws
# !pip install loguru

In [4]:
import json
import boto3
import base64
import re
import os
from PIL import Image
from pathlib import Path
import glob
import time
from importlib import reload
import pandas as pd
from IPython.display import display
from io import BytesIO
from botocore.config import Config

pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

# os.chdir('..')
print("CWD:", os.getcwd())
bedrock_runtime = boto3.client("bedrock-runtime")
s3 = boto3.client("s3")


def show_base64_image(encoded_str):
    # Add padding if missing
    missing_padding = len(encoded_str) % 4
    if missing_padding:
        encoded_str += "=" * (4 - missing_padding)

    # Decode and display
    image_data = base64.b64decode(encoded_str)
    image = Image.open(BytesIO(image_data))
    display(image)

CWD: /home/ec2-user/SageMaker/ai-description/projects/research


In [5]:
try:
    os.chdir("../../lib/src")
    import image_captioning_assistant.generate.prompts as p
    import image_captioning_assistant.data.data_classes as dc
    import image_captioning_assistant.generate.generate_bias_analysis as gba
    import image_captioning_assistant.generate.generate_structured_metadata as gsm
    import image_captioning_assistant.generate.utils as gen_utils

    # import image_captioning_assistant.data.data_classes as dc
    import image_captioning_assistant.aws.s3 as s3_util
finally:
    os.chdir("../../projects/research")

In [6]:
# download ground truth set to local
def download_s3_directory(bucket, s3_prefix, local_dir):
    s3 = boto3.client("s3")
    paginator = s3.get_paginator("list_objects_v2")

    for page in paginator.paginate(Bucket=bucket, Prefix=s3_prefix):
        for obj in page.get("Contents", []):
            # Skip directory markers
            if obj["Key"].endswith("/"):
                continue

            # Build local path
            relative_path = obj["Key"].replace(s3_prefix, "", 1)
            local_path = local_dir / relative_path

            # Create parent directories and download
            local_path.parent.mkdir(parents=True, exist_ok=True)
            s3.download_file(bucket, obj["Key"], str(local_path))


# Configuration
bucket_name = "gaiic-emory-dev"
local_base = Path("ground_truth")

# Download single CSV file
csv_path = local_base / "ground_truth.csv"
csv_path.parent.mkdir(parents=True, exist_ok=True)
boto3.client("s3").download_file(bucket_name, "ground_truth.csv", str(csv_path))

# Download entire images directory
# download_s3_directory(
#     bucket=bucket_name,
#     s3_prefix='ground_truth_images/',
#     local_dir=local_base / 'images'
# )

In [7]:
ground_truth_df = pd.read_csv("ground_truth/ground_truth.csv")

In [8]:
def display_work_id_images(work_id):
    shas = ground_truth_df[ground_truth_df["work_id"] == work_id]["page_sha1"]

    for sha in shas:
        img_path = f"research/ground_truth/images/{sha}"
        if Path(img_path).exists():
            display(Image.open(img_path))


# display_work_id_images('7203xsj45j-cor')

In [9]:
def print_output(output):
    if "description" in output:
        s = pd.Series(output)
        display(pd.DataFrame({"Metadata Item": s.index, "Output from AI Model": s.values}))
    else:
        bias_list = output["bias_analysis"]
        # Convert to DataFrame with multi-index
        multi_index_data = []
        for i, bias_dict in enumerate(bias_list):
            for key, value in bias_dict.items():
                multi_index_data.append(((i + 1, key), value))
    
        # Create DataFrame
        multi_index = pd.MultiIndex.from_tuples([item[0] for item in multi_index_data], names=["Bias ID", "Bias Item"])
        df = pd.DataFrame(
            {"Output from AI Model": [item[1] for item in multi_index_data]},
            index=multi_index,
        )
        display(df)

Be less specific on objects that don't matter, like house, not windows, not parts of the poster, but that it's a poster

make sure to characterize the object itself like that it's a black and white photo

In [10]:
# # s3://gaiic-emory-dev/ground_truth_images/3420d30e9b3c03a19105b4d1c92ff2b8880905c8
s3_kwargs = {
    "config": Config(
        s3={"addressing_style": "virtual"},
        signature_version="s3v4",
    ),
    "region_name": "us-east-1",
}
image_path = "ground_truth/images"
work_id = "880ht76hj7-cor"
shas = ground_truth_df[ground_truth_df["work_id"] == work_id][["page_sha1", "page_title"]]
front_sha = shas[shas["page_title"].str.lower() == "front"]["page_sha1"].values[0]
back_sha = shas[shas["page_title"].str.lower() == "back"]["page_sha1"].values[0]
front_bytes = s3_util.load_image_bytes(bucket_name, f"ground_truth_images/{front_sha}", s3_kwargs)
len(front_bytes)

27971190

In [11]:
# from PIL import Image
# import base64
# from io import BytesIO

# with open(image_full_path, "rb") as image_file:
#     # Open image and convert to RGB (removes alpha channel if present)
#     image = Image.open(image_file).convert('RGB')

#     # Set maximum dimensions while maintaining aspect ratio
#     max_dimension = 2048  # Adjust this based on your size requirements
#     image.thumbnail((max_dimension, max_dimension), Image.LANCZOS)

#     # Optimize JPEG quality and save to buffer
#     buffer = BytesIO()
#     image.save(buffer,
#               format='JPEG',
#               quality=85,  # Adjust between 75-95 for quality/size balance
#               optimize=True)

#     buffer.seek(0)
#     image_data = base64.b64encode(buffer.read()).decode("utf-8")

# # Verify size constraint
# if len(image_data) >= 10000000:
#     raise ValueError("Resized image still exceeds size limit - try reducing max_dimension or quality")
# print(len(image_data))


# show_base64_image(image_data)

In [49]:
def gen_metadata_for_wid(work_id, page_title):
    reload(p)
    reload(gsm)
    image_path = "ground_truth/images"
    shas = ground_truth_df[ground_truth_df["work_id"] == work_id][["page_sha1", "page_title"]]
    if page_title.lower()=='front':
        front_sha = shas[shas["page_title"].str.lower() == "front"]["page_sha1"].values[0]
        back_sha = shas[shas["page_title"].str.lower() == "back"]["page_sha1"].values[0]
    else:
        front_sha = shas[shas["page_title"] == page_title]["page_sha1"].values[0]
        back_sha = None
    
    # Read and encode the images
    image_data = s3_util.load_image_bytes(bucket_name, f"ground_truth_images/{front_sha}", s3_kwargs)
    img_bytes_list = [image_data]
    if back_sha:
        image_data_back = s3_util.load_image_bytes(bucket_name, f"ground_truth_images/{back_sha}", s3_kwargs)
        img_bytes_list.append(image_data_back)
    else:
        image_data_back = None
        
    llm_kwargs = {
        # "model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
        "model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
        "region_name": "us-east-1",
    }
    # print(f"With {llm_kwargs['model']}")
    results = gsm.generate_structured_metadata(img_bytes_list, llm_kwargs, " ")
    return results

In [45]:
gt_row['work_id']

'7203xsj45j-cor'

In [15]:
len(img_bytes_list[1][0])

28782594

In [50]:
reload(p)
reload(gsm)
reload(gen_utils)
results = gen_metadata_for_wid('5805x69pr3-cor', 'Page 110')
print(results.cot)
print_output(results.model_dump())

Unnamed: 0,page_sha1,page_title
67,0779eb055f51032a057688f3d41c9ae68ddb6c1a,Page 110
68,9f85e4f89fb13c50870fa92f694d7830e8f042b4,Page 130



1. Text Transcription Analysis:
- Header text: "EMORY CAMPUS" with "1919" in a decorative banner
- Main text: "CLUBS"
- Artist signature appears to be "G.F. MacDOWELL JR '22"
Only one clear interpretation is possible for these text elements as they are clearly printed/drawn.

2. Object Breakdown:
- Black and white illustration
- Main figure appears to be sitting on a pedestal/stool
- Polka dot background pattern
- Decorative header and footer designs
- University seal/emblem in footer
- Minimalist artistic style

3. Text Section Identification:
- Title banner at top
- "CLUBS" text at bottom of main illustration
- Artist signature in lower right of illustration
- Decorative elements containing "1919"
- No handwritten elements except possibly the artist signature

4. Metadata Analysis:

Description:
- Must include: Black and white illustration from 1919 Emory yearbook clubs section
- Should note artistic style and composition
- Need to mention the polka dot background as it's distinctiv

Unnamed: 0,Metadata Item,Output from AI Model
0,description,"Black and white illustration from a 1919 Emory University yearbook clubs section. The image features a stylized figure seated on a decorative stool against a polka dot background. The minimalist drawing style emphasizes simple lines and shapes. The page includes decorative banner work with the text 'EMORY CAMPUS' and the date '1919', with an ornamental university seal in the footer."
1,transcription,"{'printed_text': ['EMORY CAMPUS', '1919', 'CLUBS'], 'handwriting': ['G.F. MacDOWELL JR '22']}"
2,date,1919-1922
3,location,Emory University
4,publication_info,"[Emory University Yearbook, Artist: G.F. MacDowell Jr., Class of 1922]"
5,contextual_info,"[Created for university yearbook clubs section, Student artwork, Early modernist illustration style]"
6,format,LibraryFormat.still_image
7,genre,"[Yearbook illustration, Black-and-white drawing, Student artwork]"
8,objects,"[stool, polka dot background, decorative banner, university seal]"
9,actions,"[sitting, contemplating]"


In [49]:
reload(p)
reload(gba)
image_path = "ground_truth/images"
# work_id = "880ht76hj7-cor"
shas = ground_truth_df[ground_truth_df["work_id"] == work_id][["page_sha1", "page_title"]]
front_sha = shas[shas["page_title"].str.lower() == "front"]["page_sha1"].values[0]
back_sha = shas[shas["page_title"].str.lower() == "back"]["page_sha1"].values[0]

# Read and encode the images
image_data = s3_util.load_image_bytes(bucket_name, f"ground_truth_images/{front_sha}", s3_kwargs)
image_data_back = s3_util.load_image_bytes(bucket_name, f"ground_truth_images/{back_sha}", s3_kwargs)
img_bytes_list = [image_data, image_data_back]

llm_kwargs = {
    # "model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
    "model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
    "region_name": "us-east-1",
}
print(f"With {llm_kwargs['model']}")
results = gba.generate_bias_analysis(img_bytes_list, llm_kwargs, " ")

With us.anthropic.claude-3-5-sonnet-20241022-v2:0


In [50]:
print(results.cot)
print_output(results.model_dump())


1. Transcription Analysis:
Print text on postcard:
Front: None visible
Back: 
- "Haitian mother with offspring (Furcy) - Haiti."
- "Color photo from Ansco Color transparency"
- "POST CARD"
- "PLACE STAMP HERE"
- "Distributed by W. E. Lemke, Port-au-Prince, Haiti"
- "72582"
- "Genuine Natural Color, Made by Dexter Press, Inc., West Nyack, N.Y."

No alternative transcriptions are possible as the text is clearly printed and legible.

2. Object Breakdown:
- Photograph shows a person wearing a red and white polka dot headscarf
- Subject is wearing a white shirt/garment
- Subject is smoking a pipe
- Background shows a red building and some greenery
- The image appears to be from mid-20th century based on photo quality and style
- The postcard format and printing information suggests commercial distribution

3. Bias Analysis:
Potential bias flags:
a) Use of term "offspring" in caption:
- Pro-flag: Dehumanizing language typically used for animals rather than humans
- Con-flag: May be period-a

Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model
Bias ID,Bias Item,Unnamed: 2_level_1
1,bias_level,BiasLevel.medium
1,bias_type,BiasType.cultural
1,explanation,"The use of the term 'offspring' in the caption is dehumanizing language more commonly used for animals than humans, reflecting cultural bias in how Haitian subjects were described in commercial materials of the period."
2,bias_level,BiasLevel.low
2,bias_type,BiasType.cultural
2,explanation,"The commercial postcard format and distribution suggests potential exoticization of Haitian people and culture for tourist consumption, though the image itself shows dignity and everyday life."


# Run Evaluation

In [51]:
try:
    os.chdir("../../lib/src")
    import image_captioning_assistant.evaluate.evaluate_bias_analysis as eba
    import image_captioning_assistant.evaluate.evaluate_structured_metadata as esm
    import image_captioning_assistant.evaluate.evaluate_freeform_description as efd
    from image_captioning_assistant.data.constants import BiasLevel, BiasType, LibraryFormat
finally:
    os.chdir("../../projects/research")

### Metadata Eval

In [24]:
ground_truth_df.head(1).tail(1).transpose()

Unnamed: 0,0
work_id,7203xsj45j-cor
file_set_id,5462547dkb-cor
work_link,https://digital.library.emory.edu/catalog/7203xsj45j-cor
file_set_link,https://curate.library.emory.edu/concern/parent/7203xsj45j-cor/file_sets/5462547dkb-cor
collection_link,https://digital.library.emory.edu/catalog/914nk98sfv-cor
notes_from_elizabeth,likely an entertainer
page_context_from_elizabeth,
category_from_elizabeth,medium
title,Tam-Tam the Leopard Man in a tuxedo with face partially painted
abstract,Verso: A holy life outwardly must spring from a pure heart inwardly. Tam-Tam Leopard Man.


In [25]:
metadata_items = """
title
abstract
content_genres
content_type
date_created
subject_geo
subject_names
subject_topics
""".strip().split('\n')
ground_truth_df.head(17).tail(1)[['work_id']+metadata_items].transpose()

Unnamed: 0,16
work_id,153jwstqn3-cor
title,"An African American boy wearing a jacket and cap holding a satchel with a man with a beard in a suit, vest and cravat, holding a hat and gloves in his right hand, standing next to him"
abstract,"Verso: Randall. photographer. FishersBlock. Detroit, Mich."
content_genres,card photographs (photographs)
content_type,http://id.loc.gov/vocabulary/resourceTypes/img
date_created,unknown
subject_geo,
subject_names,
subject_topics,African American boys.|Men.|Caps (Headgear)|Hats.|Neckties.|Gloves.


In [52]:
def gt_row_to_metadata_obj(gt_row):
    gt_content_type_mapping = {
        'http://id.loc.gov/vocabulary/resourceTypes/img': LibraryFormat.still_image,
        'http://id.loc.gov/vocabulary/resourceTypes/txt': LibraryFormat.text
    }
    cleaned_abstract = gt_row['abstract'].replace("Verso:","").replace("Recto:","") if gt_row['abstract']==gt_row['abstract'] else ""
    objects_actions_people = []
    for col in ['subject_names', 'subject_topics']:
        if gt_row[col]: objects_actions_people.extend(str(gt_row[col]).split('|'))
    return dc.Metadata(
    description=gt_row['title'],
    transcription=dc.Transcription(
        printed_text=[cleaned_abstract],
        handwriting=[]
    ),
    date=str(gt_row['date_created']),
    location=str(gt_row['subject_geo']),
    publication_info=[],
    contextual_info=[],
    format=gt_content_type_mapping[gt_row['content_type']],
    genre=[gt_row['content_genres']],
    objects=objects_actions_people,
    actions=[],
    people=[]
)
gt_row_to_metadata_obj(ground_truth_df.iloc[16,:])

Metadata(description='An African American boy wearing a jacket and cap holding a satchel with a man with a beard in a suit, vest and cravat, holding a hat and gloves in his right hand, standing next to him', transcription=Transcription(printed_text=[' Randall. photographer. FishersBlock. Detroit, Mich.'], handwriting=[]), date='unknown', location='nan', publication_info=[], contextual_info=[], format=<LibraryFormat.still_image: 'Still Image'>, genre=['card photographs (photographs)'], objects=['nan', 'African American boys.', 'Men.', 'Caps (Headgear)', 'Hats.', 'Neckties.', 'Gloves.'], actions=[], people=[])

In [55]:
def get_human_and_llm_metadata(gt_row):
    human_metadata = gt_row_to_metadata_obj(gt_row)
    llm_metadata = gen_metadata_for_wid(gt_row['work_id'], gt_row['page_title'])
    return human_metadata, llm_metadata

def run_gt_metadata_eval(gt_row):
    human_metadata, llm_metadata = get_human_and_llm_metadata(gt_row)
    chat_bedrock_kwargs = {"model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0"}
    # Call eval function
    eval_result = esm.evaluate_structured_metadata(llm_metadata, human_metadata, chat_bedrock_kwargs)
    return eval_result

In [58]:
# run eval for only items with front and back because others are pages which are bias only
from tqdm import tqdm_notebook
eval_results = {}
gt_dedup = ground_truth_df[ground_truth_df['page_title'].str.lower()=='front'].copy(deep=True).fillna("")
for i, gt_row in tqdm_notebook(gt_dedup.iterrows(), total=len(gt_dedup)):
    eval_results[i] = run_gt_metadata_eval(gt_row)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i, gt_row in tqdm_notebook(gt_dedup.iterrows(), total=len(gt_dedup)):


  0%|          | 0/29 [00:00<?, ?it/s]

Unnamed: 0,page_sha1,page_title
0,6127356b872ca7bf7966872ce7df6ce6c11921f3,Front
1,2db3c97b607a2f78cbe7bcbba2891a36f28606f7,Back


[32m2025-02-18 19:51:26.083[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Tam-Tam the Leopard Man in a tuxedo with face partially painted</human_response>
<llm_response>Black and white portrait photograph of an elderly man in formal late Victorian or early Edwardian attire, consisting of a dark suit and bow tie. The subject faces the camera with a slight smile. The photograph shows halftone printing patterns indicating it is a reproduction. The reverse contains a handwritten religious or moral sentiment.</llm_response>
------------[0m
[32m2025-02-18 19:51:28.252[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata[0m:[36mevaluate_structured_metadata[0m:[36

Unnamed: 0,page_sha1,page_title
2,805953dda2f26a5e56520192db8af2289fa852d8,Front
3,a1f2ec9ae121f113f4553fde50e0acf799d4727b,Back


[32m2025-02-18 19:51:59.709[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>A woman with three girls around her</human_response>
<llm_response>Studio portrait of four Black women in Victorian-era formal attire, circa 1860s-1880s. The subjects wear light-colored dresses with ruffles and decorative elements, including bow ties and jewelry. Three women are posed formally while one appears to be resting. The photograph represents an important example of 19th-century African American portraiture and demonstrates the dignity and fashion of the period.</llm_response>
------------[0m
[32m2025-02-18 19:52:03.162[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata[0m:[

Unnamed: 0,page_sha1,page_title
4,8fad52508b4d809db96ffbb3f4a64985542694cc,Front
5,eec94d7f33795485a41b6671977262e7135bff75,Back


[32m2025-02-18 19:52:38.144[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>An old-fashioned southern negro mammy</human_response>
<llm_response>Color portrait postcard depicting a Black woman wearing a red head wrap, pink dress with white collar, and white apron. The image and its original title reflect harmful racist stereotypes common in Jim Crow era postcards that exploited and commodified images of Black women domestic workers.</llm_response>
------------[0m
[32m2025-02-18 19:52:40.572[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata[0m:[36mevaluate_structured_metadata[0m:[36m155[0m - [34m[1mPrompt:
------------
Compare the structured image metad

Unnamed: 0,page_sha1,page_title
6,1287c203a1200d6ec8fca53f4349ee30cdad7d95,Front
7,0ce5f132615ffec9b4f9dee81dc784c0f09f365d,Back


[32m2025-02-18 19:53:11.567[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Mammy's pet : African American boy next to a fence</human_response>
<llm_response>Hand-colored photographic postcard from 1905 showing a young Black child standing in formal attire. The child wears a gold-colored jacket and knee pants with boots, posed against a plain backdrop in an oval frame. This commercial postcard represents a problematic artifact of Jim Crow era racial stereotyping.</llm_response>
------------[0m
[32m2025-02-18 19:53:14.604[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata[0m:[36mevaluate_structured_metadata[0m:[36m155[0m - [34m[1mPrompt:
------------
Com

Unnamed: 0,page_sha1,page_title
8,cea6762c9caf2832b8450b866fe39756e9b60843,Front
9,3171e6e2325a4ec80b196d5e6e724e8bc3f7dcac,Back


[32m2025-02-18 19:53:49.376[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Where's Papa? - A Jamaica problem</human_response>
<llm_response>Black and white photograph on a postcard showing a Black woman holding a young child, both wearing light-colored clothing and head wraps, standing outside a wooden building with shingle siding. The image's caption perpetuates harmful stereotypes about Jamaican families. This postcard is part of a commercial series that documented Jamaica during the colonial period, often through a biased lens.</llm_response>
------------[0m
[32m2025-02-18 19:53:52.056[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata[0m:[36mevaluate_st

Unnamed: 0,page_sha1,page_title
10,ebb0ef991b91525d2aa3c35ab91ff967b0b92ea4,Front
11,108dc7c1a0c89bc010b667f694a0f11655d86b50,Back


[32m2025-02-18 19:54:27.507[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Haitian mother with offspring (Furcy) - Haiti</human_response>
<llm_response>Color photograph of a Haitian woman wearing a red polka-dotted headscarf and white clothing, smoking a pipe while holding an infant. The image is set against a rural background with a red building visible. The photograph was taken in Furcy, Haiti, and later produced as a postcard.</llm_response>
------------[0m
[32m2025-02-18 19:54:30.874[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata[0m:[36mevaluate_structured_metadata[0m:[36m155[0m - [34m[1mPrompt:
------------
Compare the structured image metadat

Unnamed: 0,page_sha1,page_title
12,a17fcca4c6260255b4bd1ff7be8e0873a36c3ca4,Front
13,a59790316111bf148f646be4aa54ec274d259e84,Back


[32m2025-02-18 19:55:05.820[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Burning of negro near White Plains, Texas</human_response>
<llm_response>Historical photograph documenting an act of racial terrorism near White Plains, Texas. The image shows a group of White men participating in the lynching of a Black person. This photograph serves as evidence of racial violence and hate crimes in American history.</llm_response>
------------[0m
[32m2025-02-18 19:55:08.680[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata[0m:[36mevaluate_structured_metadata[0m:[36m155[0m - [34m[1mPrompt:
------------
Compare the structured image metadata generated by an LLM


Unnamed: 0,page_sha1,page_title
14,fd1e263e58c0c8a3b741c8bb89c242d4c76bc2ba,Front
15,a2e3aa926b3707ba18c28dfd45c1484198f1918d,Back


[32m2025-02-18 19:55:46.616[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>A type of the old slave darky of the South, Newport News, Va.</human_response>
<llm_response>Early 20th century racist postcard depicting an elderly African American man in profile, wearing a bowler hat and dark coat, carrying a woven basket on his back supported by a pole. The image represents problematic Jim Crow era stereotyping and exploitation of Black Americans. This type of postcard was mass-produced and distributed as part of systemic racism of the period.</llm_response>
------------[0m
[32m2025-02-18 19:55:49.452[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata[0m:[36meval

Unnamed: 0,page_sha1,page_title
16,4f20d1c706fbd2571100a742e8f8b719672cd292,Front
17,d6614b3708446bf80e388343a5c29e6a88c8e6ad,Back


[32m2025-02-18 19:56:25.132[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>An African American boy wearing a jacket and cap holding a satchel with a man with a beard in a suit, vest and cravat, holding a hat and gloves in his right hand, standing next to him</human_response>
<llm_response>Studio portrait photograph showing two individuals: a tall White man in formal attire holding a top hat, and a young Black boy wearing work clothes and a cap, standing beside a suitcase. The formal studio setting with plain backdrop suggests this is a professional portrait taken in Detroit during the mid-1860s, documenting an important period of African American history in northern cities.</llm_response>
------------[0m
[3

Unnamed: 0,page_sha1,page_title
18,3420d30e9b3c03a19105b4d1c92ff2b8880905c8,Front
19,4f35b65f5b90405c2d666a81ba34fc0935a2f0ce,Back


[32m2025-02-18 19:57:00.833[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Cotton picking in the sunny South : a group of African American men, African American women, African American boys, and African American girls picking cotton in a field with two baskets of cotton</human_response>
<llm_response>Black and white photograph showing multiple generations of Black Americans harvesting cotton in a large field. The image depicts adults and children engaged in cotton picking labor, with cotton plants stretching to a tree line in the background. The photograph documents agricultural practices and labor conditions in the American South during the early 20th century.</llm_response>
------------[0m
[32m2025-02-18 

Unnamed: 0,page_sha1,page_title
20,82430cf2b11d2344d98563450da14ad52237fb01,Front
21,ba6c7faa8209f64aad2746b59e88d30a96d2d523,Back


[32m2025-02-18 19:57:32.153[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Picture of Lucille Hegamin wearing a dress standing sideways</human_response>
<llm_response>Professional studio portrait photograph of a woman in a dark formal dress with lace or embroidered sleeves, taken in a three-quarter length pose. The subject stands turned slightly to show profile while facing the camera, set against a plain background. The photograph demonstrates high technical quality characteristic of professional studio portraiture of the 1920s Harlem Renaissance period.</llm_response>
------------[0m
[32m2025-02-18 19:57:34.606[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_meta

Unnamed: 0,page_sha1,page_title
22,fc9894484a7b4c13f9e01b988a22009a4be8cbc6,Front
23,5c66f2a7fca2d6a2e8d4ea005a306d2f3b4cbf7c,Back


[32m2025-02-18 19:58:16.974[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Souvenir, National Medical Association</human_response>
<llm_response>Commemorative postcard from the 1912 National Medical Association meeting at Tuskegee Institute featuring portraits of two prominent Black leaders: Booker T. Washington, Principal of Tuskegee, and Dr. John A. Kenney, Medical Director. The sepia-toned design includes a central image of the John A. Andrew Memorial Hospital with decorative floral borders. This item represents a significant moment in African American medical and educational history.</llm_response>
------------[0m
[32m2025-02-18 19:58:19.353[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.

Unnamed: 0,page_sha1,page_title
24,4f663db172b79473ce31cda38c71ca2ea38d7821,Front
25,2537510737d9651cf8475288872ab3964d732cdb,Back


[32m2025-02-18 19:59:03.270[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>A cartoon of an African American boy standing next to a tree with an opossum hanging from it, and a night sky with moon and stars above him</human_response>
<llm_response>Racist postcard featuring a caricatured illustration and recipe for cooking possum. Contains harmful stereotypical depiction and dialect writing that perpetuates racist tropes. Shows a night scene with yellow crescent moon, stars, and a figure in yellow clothing next to a hanging possum.</llm_response>
------------[0m
[32m2025-02-18 19:59:07.020[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata[0m:[36mevaluate_stru

Unnamed: 0,page_sha1,page_title
26,b5b6ac2a09e32951d484e48e74e423a3e49dd5f5,Front
27,a34e4a356177983444c4be9fcc1f5b5a24604c3e,Back


[32m2025-02-18 19:59:43.539[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Farmers conference at Tuskegee</human_response>
<llm_response>Black and white photograph showing a large gathering at the Tuskegee Institute's Farmers Conference, circa 1919-1920. The image captures a substantial brick church or institutional building with a prominent bell tower, surrounded by a diverse crowd. Participants are arranged in various groupings, including a formal line of people in white clothing. The scene includes both horse-drawn carriages and early automobiles, reflecting the period's technological transition. The foreground shows construction materials or debris, while the background reveals rural landscape.</llm_respo

Unnamed: 0,page_sha1,page_title
28,1ba531214fc200e00e3766c188a39a79df43ff5b,Front
29,abd63e0391bab5243031e83c321882dfd62793c0,Back


[32m2025-02-18 20:00:19.034[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Honey come down, I am waiting for you in Florida</human_response>
<llm_response>Early 20th century postcard depicting a problematic and racist scene of a Black person climbing a palm tree while being threatened by an alligator at the base. The scene is set in a tropical Florida landscape with palm trees and water. The image represents harmful racial stereotypes common in Jim Crow era ephemera.</llm_response>
------------[0m
[32m2025-02-18 20:00:21.638[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata[0m:[36mevaluate_structured_metadata[0m:[36m155[0m - [34m[1mPrompt:
-----------

Unnamed: 0,page_sha1,page_title
30,428e362f627a61a4b1b3bcff2e0f72a6701b0188,Front
31,5695dfc9a24d34bf2ed5ea7f138edba78e11cb9d,Back


[32m2025-02-18 20:01:02.949[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>We's a jolly fambly! : two African American children holding puppies [hand colored]</human_response>
<llm_response>Early 1900s postcard depicting two young children, one Black and one White, sitting outdoors with puppies. The image includes a racist caption using mock dialect, reflecting harmful stereotypes common in early 20th century ephemera. The children are shown holding small puppies while seated on the ground, with a pastoral background.</llm_response>
------------[0m
[32m2025-02-18 20:01:05.326[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata[0m:[36mevaluate_structured_meta

Unnamed: 0,page_sha1,page_title
32,8056f844cd7d2e30184b2e61f2f3fd21119daf7a,Front
33,9fed59b5d8d452442c99c346d3b2e2ae549586cd,Back


[32m2025-02-18 20:01:40.683[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Portrait of Inez Clough, broadway star</human_response>
<llm_response>Professional studio portrait photograph of an African American woman theater performer, Inez Clough, who was active on Broadway from 1917-1932. The formal black and white portrait shows her in elegant attire with a lace collar and corsage, photographed in three-quarter view with professional lighting. The image represents an important document of early Black theater history and the Harlem Renaissance period.</llm_response>
------------[0m
[32m2025-02-18 20:01:43.082[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata

Unnamed: 0,page_sha1,page_title
34,a82ed6b77a6605e93de811607617881a167a1275,Front
35,41c679ece4ecb9d0b8735fe7a0a0bd22bff5f8c4,Back


[32m2025-02-18 20:02:17.855[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Guadeloupe-- Jeune marchand d'ustensiles en fer-blanc = Young merchant utensils tin</human_response>
<llm_response>Historical photograph of a young merchant in Guadeloupe, showing a child displaying various tinware vessels for sale. The subject stands confidently in a studio setting, wearing a wide-brimmed hat and simple clothing, with multiple tin containers arranged around his body. This image documents early 20th century commerce and trade practices in the French Caribbean.</llm_response>
------------[0m
[32m2025-02-18 20:02:20.386[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata

Unnamed: 0,page_sha1,page_title
36,3e03f17c2b5f2cf6231eeb1b336e43db64549432,Front


IndexError: index 0 is out of bounds for axis 0 with size 0

In [42]:
shas = ground_truth_df[ground_truth_df["work_id"] == gt_row['work_id']][["page_sha1", "page_title"]]
shas

Unnamed: 0,page_sha1,page_title
64,4c80116b1272a0787ad7268f03a9743d5dc96a55,Page 138
65,63c87c22dd6f198670ed12011d21fcfc00872ba5,Page 190
66,ccbe241cab9a522b3febdc49df09d7e473b340b6,Page 265


In [43]:
ground_truth_df[ground_truth_df["work_id"].str.strip() == gt_row['work_id']][["page_sha1", "page_title"]]

Unnamed: 0,page_sha1,page_title
38,fd506adc9cff55549c15f641919da5c54f6d0958,Front
39,e0f39a546e3430450444102dd44cac53023d0d98,Back


In [94]:
results = gen_metadata_for_wid('423612jmc0-cor')
# print(results.cot)
print_output(results.model_dump())

With us.anthropic.claude-3-5-sonnet-20241022-v2:0


Unnamed: 0,Metadata Item,Output from AI Model
0,description,"Professional studio portrait photograph from 1918 showing a Black woman in elegant winter attire. The subject wears a belted winter coat with fur collar and a fashionable hat, posed in three-quarter view looking slightly upward. The high quality of both the clothing and photography demonstrates middle/upper-middle class status of the early 20th century."
1,transcription,"{'printed_text': [], 'handwriting': ['Sunday Jan 27-1918', 'Ida B. Wells']}"
2,date,"January 27, 1918"
3,location,
4,publication_info,[]
5,contextual_info,"[Professional studio portrait indicating access to quality photography services, Winter clothing suggests northern urban setting, Style of dress and photography consistent with 1918 period]"
6,format,LibraryFormat.still_image
7,genre,"[Black-and-white photographs, Studio portraits, Historical photographs]"
8,objects,"[winter coat, fur collar, hat, belt]"
9,actions,"[posing, standing, looking]"


In [82]:
reload(esm)
# LLM-generated example
llm_metadata_different = dc.MetadataCOT(
    description="A digitized manuscript showing agricultural practices from early modern Europe",
    transcription=dc.Transcription(
        printed_text=["Treatise on Farming Methods", "Printed in Venice 1592"],
        handwriting=["Marginal notes regarding crop rotation", "Ownership signature: G. Agricola"]
    ),
    date="1590-1600",
    location="Northern Italy",
    publication_info=["Venetian Printing House"],
    contextual_info=["Demonstrates pre-Enlightenment farming techniques"],
    format=LibraryFormat.text,
    genre=["Manuscript", "Agricultural"],
    objects=["Quill annotations", "Illustrations of plows"],
    actions=["Harvesting", "Irrigating fields"],
    people=["Male figures in peasant attire"],
    cot="Generated through analysis of visual patterns and textual correlations in historical documents"
)
llm_metadata_similar = dc.MetadataCOT(
    description="Colorized lithographic print showing Victorian-era metropolitan peddlers",
    transcription=dc.Transcription(
        printed_text=["London Street Scenes", "Issued by Smith & Sons 1883"],
        handwriting=["Collection note: Uncommon version with azure coloring", "Previous owner: J. Smith"]
    ),
    date="1883",
    location="London, England",
    publication_info=["Smith & Sons Publishers"],
    contextual_info=["Chronicles vanishing professions during the Industrial Revolution"],
    format=LibraryFormat.still_image,
    genre=["Lithograph", "Social Documentation"],
    objects=["Vendor carts", "Coal containers", "Work garments"],
    actions=["Merchant trading", "Price negotiation"],
    people=["Market traders (both genders)", "Young workers"],
    cot="Documented through direct artifact examination and archival source verification"
)


# Human-curated example
human_metadata = dc.Metadata(
    description="Hand-colored lithograph depicting 19th century urban street vendors",
    transcription=dc.Transcription(
        printed_text=["Street Life of London", "Published by Smith & Sons 1883"],
        handwriting=["Curator's note: Rare variant with blue tint", "Ex collection: J. Smith"]
    ),
    date="1883",
    location="London, England",
    publication_info=["Smith & Sons Publishers"],
    contextual_info=["Documents disappearing trades during industrialization"],
    format=LibraryFormat.still_image,
    genre=["Lithograph", "Social History"],
    objects=["Push carts", "Coal buckets", "Aprons"],
    actions=["Selling goods", "Haggling prices"],
    people=["Street vendors (male and female)", "Child apprentices"]
)

chat_bedrock_kwargs = {"model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0"}

# Call the function
result_different = esm.evaluate_structured_metadata(llm_metadata_different, human_metadata, chat_bedrock_kwargs)
result_similar = esm.evaluate_structured_metadata(llm_metadata_similar, human_metadata, chat_bedrock_kwargs)
display(result_different)
display(result_similar)

[32m2025-02-18 15:36:29.941[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Hand-colored lithograph depicting 19th century urban street vendors</human_response>
<llm_response>A digitized manuscript showing agricultural practices from early modern Europe</llm_response>
------------[0m
[32m2025-02-18 15:36:36.064[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Hand-colored lithograph depicting 19th century urban street vendors</human_r

StructuredMetadataEvaluation(transcription_evaluation=0.0, names_evaluation=0.0, date_evaluation=0.0, location_evaluation=0.0, publication_info_evaluation=0.0, contextual_info_evaluation=0.0, description_evaluation=FreeformResponseEvaluation(faithfulness_and_consistency=0.0, completeness=0.0, verbosity=0.5, clarity=1.0))

StructuredMetadataEvaluation(transcription_evaluation=0.85, names_evaluation=1.0, date_evaluation=1.0, location_evaluation=1.0, publication_info_evaluation=1.0, contextual_info_evaluation=0.9, description_evaluation=FreeformResponseEvaluation(faithfulness_and_consistency=0.9, completeness=0.9, verbosity=1.0, clarity=1.0))

### Bias Eval

In [69]:
llm_bias_analysis = dc.BiasAnalysisEntry(
            bias_type=BiasType.racial,
            bias_level=BiasLevel.high,
            explanation="Water fountain and a sign above it that says 'whites'",
        )
human_bias_analysis = dc.BiasAnalysisEntry(
            bias_type=BiasType.racial,
            bias_level=BiasLevel.high,
            explanation="A water fountain and a sign above it tha reads 'whites'",
        )

potential_bias_evaluation = eba.evaluate_potential_biases(
            llm_potential_biases=[llm_bias_analysis],
            human_potential_biases=[human_bias_analysis],
            chat_bedrock_converse_kwargs={
                "model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
                "temperature": 0.0,
            },
        )
potential_bias_evaluation

[32m2025-02-18 14:39:40.205[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_bias_analysis[0m:[36mevaluate_potential_biases[0m:[36m98[0m - [34m[1mPrompt:
------------
Compare an LLM's bias analysis aganst what a human provided and score the LLM's answer, assuming the human's answer is the gold standard.
<human_bias_analysis>[{'bias_level': <BiasLevel.high: 'high'>, 'bias_type': <BiasType.racial: 'racial'>, 'explanation': "A water fountain and a sign above it tha reads 'whites'"}]</human_bias_analysis>
<llm_bias_analysis>[{'bias_level': <BiasLevel.high: 'high'>, 'bias_type': <BiasType.racial: 'racial'>, 'explanation': "Water fountain and a sign above it that says 'whites'"}]</llm_bias_analysis>
------------[0m


BiasAnalysisEvaluation(bias_type_alignment=1.0, bias_level_alignment=1.0, explanation_alignment=0.95)

In [70]:
llm_bias_analysis = dc.BiasAnalysisEntry(
            bias_type=BiasType.racial,
            bias_level=BiasLevel.high,
            explanation="Water fountain and a sign above it that says 'whites'",
        )
human_bias_analysis = dc.BiasAnalysisEntry(
            bias_type=BiasType.age,
            bias_level=BiasLevel.low,
            explanation="Child Laborers are working in the fields.",
        )

potential_bias_evaluation = eba.evaluate_potential_biases(
            llm_potential_biases=[llm_bias_analysis],
            human_potential_biases=[human_bias_analysis],
            chat_bedrock_converse_kwargs={
                "model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
                "temperature": 0.0,
            },
        )
potential_bias_evaluation

[32m2025-02-18 14:40:05.332[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_bias_analysis[0m:[36mevaluate_potential_biases[0m:[36m98[0m - [34m[1mPrompt:
------------
Compare an LLM's bias analysis aganst what a human provided and score the LLM's answer, assuming the human's answer is the gold standard.
<human_bias_analysis>[{'bias_level': <BiasLevel.low: 'low'>, 'bias_type': <BiasType.age: 'age'>, 'explanation': 'Child Laborers are working in the fields.'}]</human_bias_analysis>
<llm_bias_analysis>[{'bias_level': <BiasLevel.high: 'high'>, 'bias_type': <BiasType.racial: 'racial'>, 'explanation': "Water fountain and a sign above it that says 'whites'"}]</llm_bias_analysis>
------------[0m


BiasAnalysisEvaluation(bias_type_alignment=0.0, bias_level_alignment=0.0, explanation_alignment=0.0)