In [2]:
# INSTALLS THAT NEED TO BE RUN ON CONDA
# !pip install langchain-aws
# !pip install loguru

In [4]:
import json
import boto3
import base64
import re
import os
from PIL import Image
from pathlib import Path
import glob
import time
from importlib import reload
import pandas as pd
from IPython.display import display
from io import BytesIO
from botocore.config import Config

pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

# os.chdir('..')
print("CWD:", os.getcwd())
bedrock_runtime = boto3.client("bedrock-runtime")
s3 = boto3.client("s3")


def show_base64_image(encoded_str):
    # Add padding if missing
    missing_padding = len(encoded_str) % 4
    if missing_padding:
        encoded_str += "=" * (4 - missing_padding)

    # Decode and display
    image_data = base64.b64decode(encoded_str)
    image = Image.open(BytesIO(image_data))
    display(image)

CWD: /home/ec2-user/SageMaker/ai-description/projects/research


In [5]:
try:
    os.chdir("../../lib/src")
    import image_captioning_assistant.generate.prompts as p
    import image_captioning_assistant.data.data_classes as dc
    import image_captioning_assistant.generate.generate_bias_analysis as gba
    import image_captioning_assistant.generate.generate_structured_metadata as gsm
    import image_captioning_assistant.generate.utils as gen_utils

    # import image_captioning_assistant.data.data_classes as dc
    import image_captioning_assistant.aws.s3 as s3_util
finally:
    os.chdir("../../projects/research")

In [6]:
# download ground truth set to local
def download_s3_directory(bucket, s3_prefix, local_dir):
    s3 = boto3.client("s3")
    paginator = s3.get_paginator("list_objects_v2")

    for page in paginator.paginate(Bucket=bucket, Prefix=s3_prefix):
        for obj in page.get("Contents", []):
            # Skip directory markers
            if obj["Key"].endswith("/"):
                continue

            # Build local path
            relative_path = obj["Key"].replace(s3_prefix, "", 1)
            local_path = local_dir / relative_path

            # Create parent directories and download
            local_path.parent.mkdir(parents=True, exist_ok=True)
            s3.download_file(bucket, obj["Key"], str(local_path))


# Configuration
bucket_name = "gaiic-emory-dev"
local_base = Path("ground_truth")

# Download single CSV file
csv_path = local_base / "ground_truth.csv"
csv_path.parent.mkdir(parents=True, exist_ok=True)
boto3.client("s3").download_file(bucket_name, "ground_truth.csv", str(csv_path))

# Download entire images directory
# download_s3_directory(
#     bucket=bucket_name,
#     s3_prefix='ground_truth_images/',
#     local_dir=local_base / 'images'
# )

In [7]:
ground_truth_df = pd.read_csv("ground_truth/ground_truth.csv")

In [8]:
def display_work_id_images(work_id):
    shas = ground_truth_df[ground_truth_df["work_id"] == work_id]["page_sha1"]

    for sha in shas:
        img_path = f"research/ground_truth/images/{sha}"
        if Path(img_path).exists():
            display(Image.open(img_path))


# display_work_id_images('7203xsj45j-cor')

In [9]:
def print_output(output):
    if "description" in output:
        s = pd.Series(output)
        display(pd.DataFrame({"Metadata Item": s.index, "Output from AI Model": s.values}))
    else:
        bias_list = output["bias_analysis"]
        # Convert to DataFrame with multi-index
        multi_index_data = []
        for i, bias_dict in enumerate(bias_list):
            for key, value in bias_dict.items():
                multi_index_data.append(((i + 1, key), value))

        # Create DataFrame
        multi_index = pd.MultiIndex.from_tuples([item[0] for item in multi_index_data], names=["Bias ID", "Bias Item"])
        df = pd.DataFrame(
            {"Output from AI Model": [item[1] for item in multi_index_data]},
            index=multi_index,
        )
        display(df)

Be less specific on objects that don't matter, like house, not windows, not parts of the poster, but that it's a poster

make sure to characterize the object itself like that it's a black and white photo

In [10]:
# # s3://gaiic-emory-dev/ground_truth_images/3420d30e9b3c03a19105b4d1c92ff2b8880905c8
s3_kwargs = {
    "config": Config(
        s3={"addressing_style": "virtual"},
        signature_version="s3v4",
    ),
    "region_name": "us-east-1",
}
image_path = "ground_truth/images"
work_id = "880ht76hj7-cor"
shas = ground_truth_df[ground_truth_df["work_id"] == work_id][["page_sha1", "page_title"]]
front_sha = shas[shas["page_title"].str.lower() == "front"]["page_sha1"].values[0]
back_sha = shas[shas["page_title"].str.lower() == "back"]["page_sha1"].values[0]
front_bytes = s3_util.load_image_bytes(bucket_name, f"ground_truth_images/{front_sha}", s3_kwargs)
len(front_bytes)

27971190

In [11]:
# from PIL import Image
# import base64
# from io import BytesIO

# with open(image_full_path, "rb") as image_file:
#     # Open image and convert to RGB (removes alpha channel if present)
#     image = Image.open(image_file).convert('RGB')

#     # Set maximum dimensions while maintaining aspect ratio
#     max_dimension = 2048  # Adjust this based on your size requirements
#     image.thumbnail((max_dimension, max_dimension), Image.LANCZOS)

#     # Optimize JPEG quality and save to buffer
#     buffer = BytesIO()
#     image.save(buffer,
#               format='JPEG',
#               quality=85,  # Adjust between 75-95 for quality/size balance
#               optimize=True)

#     buffer.seek(0)
#     image_data = base64.b64encode(buffer.read()).decode("utf-8")

# # Verify size constraint
# if len(image_data) >= 10000000:
#     raise ValueError("Resized image still exceeds size limit - try reducing max_dimension or quality")
# print(len(image_data))


# show_base64_image(image_data)

In [62]:
def gen_metadata_for_wid(work_id, page_title):
    reload(p)
    reload(gsm)
    image_path = "ground_truth/images"
    shas = ground_truth_df[ground_truth_df["work_id"] == work_id][["page_sha1", "page_title"]]
    if page_title.lower() == "front" and len(shas) > 1:
        front_sha = shas[shas["page_title"].str.lower() == "front"]["page_sha1"].values[0]
        back_sha = shas[shas["page_title"].str.lower() == "back"]["page_sha1"].values[0]
    else:
        front_sha = shas[shas["page_title"] == page_title]["page_sha1"].values[0]
        back_sha = None

    # Read and encode the images
    image_data = s3_util.load_image_bytes(bucket_name, f"ground_truth_images/{front_sha}", s3_kwargs)
    img_bytes_list = [image_data]
    if back_sha:
        image_data_back = s3_util.load_image_bytes(bucket_name, f"ground_truth_images/{back_sha}", s3_kwargs)
        img_bytes_list.append(image_data_back)
    else:
        image_data_back = None

    llm_kwargs = {
        # "model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
        "model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
        "region_name": "us-east-1",
    }
    # print(f"With {llm_kwargs['model']}")
    results = gsm.generate_structured_metadata(img_bytes_list, llm_kwargs, " ")
    return results

In [63]:
gt_row["work_id"]

'477sn02v9x-cor'

In [67]:
reload(p)
reload(gsm)
reload(gen_utils)
results = gen_metadata_for_wid(gt_row["work_id"], gt_row["page_title"])
print(results.cot)
print_output(results.model_dump())



1. Text Analysis and Transcription:
The image contains one printed text line at the bottom:
"Watermelon Time in Florida."
No other text or handwriting is visible. No alternative transcriptions are possible as the text is clearly legible.

2. Detailed Object Analysis:
This is a colorized postcard showing a scene outside a log cabin or similar wooden structure. The image contains problematic racial stereotyping that was common in early 20th century postcards. The scene shows:
- Multiple Black individuals, both adults and children
- A wooden building with log construction
- A wooden fence in the background
- People eating watermelon slices
- Adults wearing period clothing including hats
- Children in simpler clothing, some appearing barefoot
- A barrel or container with what appears to be a scoop

3. Bias and Stereotype Analysis:
This image contains significant racial stereotyping:
- The "watermelon" stereotype was commonly used to demean Black Americans
- The casual setting and barefoo

Unnamed: 0,Metadata Item,Output from AI Model
0,description,Early 20th century colorized postcard depicting a problematic racial stereotype scene outside a log cabin in rural Florida. The image shows African American adults and children seated and standing outside a wooden building. This type of postcard was part of a larger pattern of racist memorabilia that perpetuated harmful stereotypes while documenting aspects of African American life in the rural South.
1,transcription,"{'printed_text': ['Watermelon Time in Florida.'], 'handwriting': []}"
2,date,circa 1900-1920
3,location,"Florida, rural setting"
4,publication_info,"[Mass-produced postcard, Publisher unknown]"
5,contextual_info,"[Example of racist stereotyping in early 20th century commercial photography, Part of broader pattern of racist postcards and ephemera, Documents rural African American life despite problematic intent, Colorized photograph technique common in period postcards]"
6,format,LibraryFormat.still_image
7,genre,"[Postcards, Colorized photographs, Racist memorabilia]"
8,objects,"[log cabin, wooden fence, watermelon, barrel, chair, hats]"
9,actions,"[eating, sitting, standing]"


In [49]:
reload(p)
reload(gba)
image_path = "ground_truth/images"
# work_id = "880ht76hj7-cor"
shas = ground_truth_df[ground_truth_df["work_id"] == work_id][["page_sha1", "page_title"]]
front_sha = shas[shas["page_title"].str.lower() == "front"]["page_sha1"].values[0]
back_sha = shas[shas["page_title"].str.lower() == "back"]["page_sha1"].values[0]

# Read and encode the images
image_data = s3_util.load_image_bytes(bucket_name, f"ground_truth_images/{front_sha}", s3_kwargs)
image_data_back = s3_util.load_image_bytes(bucket_name, f"ground_truth_images/{back_sha}", s3_kwargs)
img_bytes_list = [image_data, image_data_back]

llm_kwargs = {
    # "model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
    "model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0",
    "region_name": "us-east-1",
}
print(f"With {llm_kwargs['model']}")
results = gba.generate_bias_analysis(img_bytes_list, llm_kwargs, " ")

With us.anthropic.claude-3-5-sonnet-20241022-v2:0


In [50]:
print(results.cot)
print_output(results.model_dump())


1. Transcription Analysis:
Print text on postcard:
Front: None visible
Back: 
- "Haitian mother with offspring (Furcy) - Haiti."
- "Color photo from Ansco Color transparency"
- "POST CARD"
- "PLACE STAMP HERE"
- "Distributed by W. E. Lemke, Port-au-Prince, Haiti"
- "72582"
- "Genuine Natural Color, Made by Dexter Press, Inc., West Nyack, N.Y."

No alternative transcriptions are possible as the text is clearly printed and legible.

2. Object Breakdown:
- Photograph shows a person wearing a red and white polka dot headscarf
- Subject is wearing a white shirt/garment
- Subject is smoking a pipe
- Background shows a red building and some greenery
- The image appears to be from mid-20th century based on photo quality and style
- The postcard format and printing information suggests commercial distribution

3. Bias Analysis:
Potential bias flags:
a) Use of term "offspring" in caption:
- Pro-flag: Dehumanizing language typically used for animals rather than humans
- Con-flag: May be period-a

Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model
Bias ID,Bias Item,Unnamed: 2_level_1
1,bias_level,BiasLevel.medium
1,bias_type,BiasType.cultural
1,explanation,"The use of the term 'offspring' in the caption is dehumanizing language more commonly used for animals than humans, reflecting cultural bias in how Haitian subjects were described in commercial materials of the period."
2,bias_level,BiasLevel.low
2,bias_type,BiasType.cultural
2,explanation,"The commercial postcard format and distribution suggests potential exoticization of Haitian people and culture for tourist consumption, though the image itself shows dignity and everyday life."


# Run Evaluation

In [51]:
try:
    os.chdir("../../lib/src")
    import image_captioning_assistant.evaluate.evaluate_bias_analysis as eba
    import image_captioning_assistant.evaluate.evaluate_structured_metadata as esm
    import image_captioning_assistant.evaluate.evaluate_freeform_description as efd
    from image_captioning_assistant.data.constants import BiasLevel, BiasType, LibraryFormat
finally:
    os.chdir("../../projects/research")

### Metadata Eval

In [24]:
ground_truth_df.head(1).tail(1).transpose()

Unnamed: 0,0
work_id,7203xsj45j-cor
file_set_id,5462547dkb-cor
work_link,https://digital.library.emory.edu/catalog/7203xsj45j-cor
file_set_link,https://curate.library.emory.edu/concern/parent/7203xsj45j-cor/file_sets/5462547dkb-cor
collection_link,https://digital.library.emory.edu/catalog/914nk98sfv-cor
notes_from_elizabeth,likely an entertainer
page_context_from_elizabeth,
category_from_elizabeth,medium
title,Tam-Tam the Leopard Man in a tuxedo with face partially painted
abstract,Verso: A holy life outwardly must spring from a pure heart inwardly. Tam-Tam Leopard Man.


In [25]:
metadata_items = """
title
abstract
content_genres
content_type
date_created
subject_geo
subject_names
subject_topics
""".strip().split(
    "\n"
)
ground_truth_df.head(17).tail(1)[["work_id"] + metadata_items].transpose()

Unnamed: 0,16
work_id,153jwstqn3-cor
title,"An African American boy wearing a jacket and cap holding a satchel with a man with a beard in a suit, vest and cravat, holding a hat and gloves in his right hand, standing next to him"
abstract,"Verso: Randall. photographer. FishersBlock. Detroit, Mich."
content_genres,card photographs (photographs)
content_type,http://id.loc.gov/vocabulary/resourceTypes/img
date_created,unknown
subject_geo,
subject_names,
subject_topics,African American boys.|Men.|Caps (Headgear)|Hats.|Neckties.|Gloves.


In [52]:
def gt_row_to_metadata_obj(gt_row):
    gt_content_type_mapping = {
        "http://id.loc.gov/vocabulary/resourceTypes/img": LibraryFormat.still_image,
        "http://id.loc.gov/vocabulary/resourceTypes/txt": LibraryFormat.text,
    }
    cleaned_abstract = (
        gt_row["abstract"].replace("Verso:", "").replace("Recto:", "")
        if gt_row["abstract"] == gt_row["abstract"]
        else ""
    )
    objects_actions_people = []
    for col in ["subject_names", "subject_topics"]:
        if gt_row[col]:
            objects_actions_people.extend(str(gt_row[col]).split("|"))
    return dc.Metadata(
        description=gt_row["title"],
        transcription=dc.Transcription(printed_text=[cleaned_abstract], handwriting=[]),
        date=str(gt_row["date_created"]),
        location=str(gt_row["subject_geo"]),
        publication_info=[],
        contextual_info=[],
        format=gt_content_type_mapping[gt_row["content_type"]],
        genre=[gt_row["content_genres"]],
        objects=objects_actions_people,
        actions=[],
        people=[],
    )


gt_row_to_metadata_obj(ground_truth_df.iloc[16, :])

Metadata(description='An African American boy wearing a jacket and cap holding a satchel with a man with a beard in a suit, vest and cravat, holding a hat and gloves in his right hand, standing next to him', transcription=Transcription(printed_text=[' Randall. photographer. FishersBlock. Detroit, Mich.'], handwriting=[]), date='unknown', location='nan', publication_info=[], contextual_info=[], format=<LibraryFormat.still_image: 'Still Image'>, genre=['card photographs (photographs)'], objects=['nan', 'African American boys.', 'Men.', 'Caps (Headgear)', 'Hats.', 'Neckties.', 'Gloves.'], actions=[], people=[])

In [55]:
def get_human_and_llm_metadata(gt_row):
    human_metadata = gt_row_to_metadata_obj(gt_row)
    llm_metadata = gen_metadata_for_wid(gt_row["work_id"], gt_row["page_title"])
    return human_metadata, llm_metadata


def run_gt_metadata_eval(gt_row):
    human_metadata, llm_metadata = get_human_and_llm_metadata(gt_row)
    chat_bedrock_kwargs = {"model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0"}
    # Call eval function
    eval_result = esm.evaluate_structured_metadata(llm_metadata, human_metadata, chat_bedrock_kwargs)
    return eval_result

In [None]:
# run eval for only items with front and back because others are pages which are bias only
from tqdm import tqdm_notebook

# eval_results = {}
# gt_dedup = ground_truth_df[ground_truth_df['page_title'].str.lower()=='front'].copy(deep=True).fillna("")
for i, gt_row in tqdm_notebook(gt_dedup.iterrows(), total=len(gt_dedup)):
    if i not in eval_results:
        eval_results[i] = run_gt_metadata_eval(gt_row)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i, gt_row in tqdm_notebook(gt_dedup.iterrows(), total=len(gt_dedup)):


  0%|          | 0/29 [00:00<?, ?it/s]

[32m2025-02-18 20:17:55.125[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Watermelon time in Florida [hand colored]</human_response>
<llm_response>Early 20th century colorized photograph showing a group of African American adults and children gathered outside a log cabin in rural Florida. This historical artifact represents problematic racial stereotyping common in commercial photography and postcards of the era. The image shows individuals seated and standing near a wooden building with a fenced area visible in the background.</llm_response>
------------[0m
[32m2025-02-18 20:17:57.756[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_structured_metadata[0m:[36mevaluate_stru

In [42]:
shas = ground_truth_df[ground_truth_df["work_id"] == gt_row["work_id"]][["page_sha1", "page_title"]]
shas

Unnamed: 0,page_sha1,page_title
64,4c80116b1272a0787ad7268f03a9743d5dc96a55,Page 138
65,63c87c22dd6f198670ed12011d21fcfc00872ba5,Page 190
66,ccbe241cab9a522b3febdc49df09d7e473b340b6,Page 265


In [43]:
ground_truth_df[ground_truth_df["work_id"].str.strip() == gt_row["work_id"]][["page_sha1", "page_title"]]

Unnamed: 0,page_sha1,page_title
38,fd506adc9cff55549c15f641919da5c54f6d0958,Front
39,e0f39a546e3430450444102dd44cac53023d0d98,Back


In [94]:
results = gen_metadata_for_wid("423612jmc0-cor")
# print(results.cot)
print_output(results.model_dump())

With us.anthropic.claude-3-5-sonnet-20241022-v2:0


Unnamed: 0,Metadata Item,Output from AI Model
0,description,"Professional studio portrait photograph from 1918 showing a Black woman in elegant winter attire. The subject wears a belted winter coat with fur collar and a fashionable hat, posed in three-quarter view looking slightly upward. The high quality of both the clothing and photography demonstrates middle/upper-middle class status of the early 20th century."
1,transcription,"{'printed_text': [], 'handwriting': ['Sunday Jan 27-1918', 'Ida B. Wells']}"
2,date,"January 27, 1918"
3,location,
4,publication_info,[]
5,contextual_info,"[Professional studio portrait indicating access to quality photography services, Winter clothing suggests northern urban setting, Style of dress and photography consistent with 1918 period]"
6,format,LibraryFormat.still_image
7,genre,"[Black-and-white photographs, Studio portraits, Historical photographs]"
8,objects,"[winter coat, fur collar, hat, belt]"
9,actions,"[posing, standing, looking]"


In [82]:
reload(esm)
# LLM-generated example
llm_metadata_different = dc.MetadataCOT(
    description="A digitized manuscript showing agricultural practices from early modern Europe",
    transcription=dc.Transcription(
        printed_text=["Treatise on Farming Methods", "Printed in Venice 1592"],
        handwriting=["Marginal notes regarding crop rotation", "Ownership signature: G. Agricola"],
    ),
    date="1590-1600",
    location="Northern Italy",
    publication_info=["Venetian Printing House"],
    contextual_info=["Demonstrates pre-Enlightenment farming techniques"],
    format=LibraryFormat.text,
    genre=["Manuscript", "Agricultural"],
    objects=["Quill annotations", "Illustrations of plows"],
    actions=["Harvesting", "Irrigating fields"],
    people=["Male figures in peasant attire"],
    cot="Generated through analysis of visual patterns and textual correlations in historical documents",
)
llm_metadata_similar = dc.MetadataCOT(
    description="Colorized lithographic print showing Victorian-era metropolitan peddlers",
    transcription=dc.Transcription(
        printed_text=["London Street Scenes", "Issued by Smith & Sons 1883"],
        handwriting=["Collection note: Uncommon version with azure coloring", "Previous owner: J. Smith"],
    ),
    date="1883",
    location="London, England",
    publication_info=["Smith & Sons Publishers"],
    contextual_info=["Chronicles vanishing professions during the Industrial Revolution"],
    format=LibraryFormat.still_image,
    genre=["Lithograph", "Social Documentation"],
    objects=["Vendor carts", "Coal containers", "Work garments"],
    actions=["Merchant trading", "Price negotiation"],
    people=["Market traders (both genders)", "Young workers"],
    cot="Documented through direct artifact examination and archival source verification",
)


# Human-curated example
human_metadata = dc.Metadata(
    description="Hand-colored lithograph depicting 19th century urban street vendors",
    transcription=dc.Transcription(
        printed_text=["Street Life of London", "Published by Smith & Sons 1883"],
        handwriting=["Curator's note: Rare variant with blue tint", "Ex collection: J. Smith"],
    ),
    date="1883",
    location="London, England",
    publication_info=["Smith & Sons Publishers"],
    contextual_info=["Documents disappearing trades during industrialization"],
    format=LibraryFormat.still_image,
    genre=["Lithograph", "Social History"],
    objects=["Push carts", "Coal buckets", "Aprons"],
    actions=["Selling goods", "Haggling prices"],
    people=["Street vendors (male and female)", "Child apprentices"],
)

chat_bedrock_kwargs = {"model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0"}

# Call the function
result_different = esm.evaluate_structured_metadata(llm_metadata_different, human_metadata, chat_bedrock_kwargs)
result_similar = esm.evaluate_structured_metadata(llm_metadata_similar, human_metadata, chat_bedrock_kwargs)
display(result_different)
display(result_similar)

[32m2025-02-18 15:36:29.941[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Hand-colored lithograph depicting 19th century urban street vendors</human_response>
<llm_response>A digitized manuscript showing agricultural practices from early modern Europe</llm_response>
------------[0m
[32m2025-02-18 15:36:36.064[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_freeform_description[0m:[36mevaluate_freeform_response[0m:[36m96[0m - [34m[1mPrompt:
------------
Compare an LLM's answer against a human's answer and score the LLM's answer, assuming the human's answer is the gold standard.
<human_response>Hand-colored lithograph depicting 19th century urban street vendors</human_r

StructuredMetadataEvaluation(transcription_evaluation=0.0, names_evaluation=0.0, date_evaluation=0.0, location_evaluation=0.0, publication_info_evaluation=0.0, contextual_info_evaluation=0.0, description_evaluation=FreeformResponseEvaluation(faithfulness_and_consistency=0.0, completeness=0.0, verbosity=0.5, clarity=1.0))

StructuredMetadataEvaluation(transcription_evaluation=0.85, names_evaluation=1.0, date_evaluation=1.0, location_evaluation=1.0, publication_info_evaluation=1.0, contextual_info_evaluation=0.9, description_evaluation=FreeformResponseEvaluation(faithfulness_and_consistency=0.9, completeness=0.9, verbosity=1.0, clarity=1.0))

### Bias Eval

In [69]:
llm_bias_analysis = dc.BiasAnalysisEntry(
    bias_type=BiasType.racial,
    bias_level=BiasLevel.high,
    explanation="Water fountain and a sign above it that says 'whites'",
)
human_bias_analysis = dc.BiasAnalysisEntry(
    bias_type=BiasType.racial,
    bias_level=BiasLevel.high,
    explanation="A water fountain and a sign above it tha reads 'whites'",
)

potential_bias_evaluation = eba.evaluate_potential_biases(
    llm_potential_biases=[llm_bias_analysis],
    human_potential_biases=[human_bias_analysis],
    chat_bedrock_converse_kwargs={
        "model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
        "temperature": 0.0,
    },
)
potential_bias_evaluation

[32m2025-02-18 14:39:40.205[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_bias_analysis[0m:[36mevaluate_potential_biases[0m:[36m98[0m - [34m[1mPrompt:
------------
Compare an LLM's bias analysis aganst what a human provided and score the LLM's answer, assuming the human's answer is the gold standard.
<human_bias_analysis>[{'bias_level': <BiasLevel.high: 'high'>, 'bias_type': <BiasType.racial: 'racial'>, 'explanation': "A water fountain and a sign above it tha reads 'whites'"}]</human_bias_analysis>
<llm_bias_analysis>[{'bias_level': <BiasLevel.high: 'high'>, 'bias_type': <BiasType.racial: 'racial'>, 'explanation': "Water fountain and a sign above it that says 'whites'"}]</llm_bias_analysis>
------------[0m


BiasAnalysisEvaluation(bias_type_alignment=1.0, bias_level_alignment=1.0, explanation_alignment=0.95)

In [70]:
llm_bias_analysis = dc.BiasAnalysisEntry(
    bias_type=BiasType.racial,
    bias_level=BiasLevel.high,
    explanation="Water fountain and a sign above it that says 'whites'",
)
human_bias_analysis = dc.BiasAnalysisEntry(
    bias_type=BiasType.age,
    bias_level=BiasLevel.low,
    explanation="Child Laborers are working in the fields.",
)

potential_bias_evaluation = eba.evaluate_potential_biases(
    llm_potential_biases=[llm_bias_analysis],
    human_potential_biases=[human_bias_analysis],
    chat_bedrock_converse_kwargs={
        "model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
        "temperature": 0.0,
    },
)
potential_bias_evaluation

[32m2025-02-18 14:40:05.332[0m | [34m[1mDEBUG   [0m | [36mimage_captioning_assistant.evaluate.evaluate_bias_analysis[0m:[36mevaluate_potential_biases[0m:[36m98[0m - [34m[1mPrompt:
------------
Compare an LLM's bias analysis aganst what a human provided and score the LLM's answer, assuming the human's answer is the gold standard.
<human_bias_analysis>[{'bias_level': <BiasLevel.low: 'low'>, 'bias_type': <BiasType.age: 'age'>, 'explanation': 'Child Laborers are working in the fields.'}]</human_bias_analysis>
<llm_bias_analysis>[{'bias_level': <BiasLevel.high: 'high'>, 'bias_type': <BiasType.racial: 'racial'>, 'explanation': "Water fountain and a sign above it that says 'whites'"}]</llm_bias_analysis>
------------[0m


BiasAnalysisEvaluation(bias_type_alignment=0.0, bias_level_alignment=0.0, explanation_alignment=0.0)