In [30]:
# INSTALLS THAT NEED TO BE RUN ON CONDA
# !pip install langchain-aws
# !pip install loguru

In [31]:
import json
import boto3
import base64
import re
import os
from PIL import Image
from pathlib import Path
import glob
import time
from importlib import reload
import pandas as pd
from IPython.display import display
from io import BytesIO
from botocore.config import Config
import logging

logging.basicConfig(level=logging.INFO, force=True)  # Resets handlers

%load_ext autoreload
%autoreload 2

pd.set_option("display.max_colwidth", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

# os.chdir('..')
print("CWD:", os.getcwd())
bedrock_runtime = boto3.client("bedrock-runtime")
s3 = boto3.client("s3")


def show_base64_image(encoded_str):
    # Add padding if missing
    missing_padding = len(encoded_str) % 4
    if missing_padding:
        encoded_str += "=" * (4 - missing_padding)

    # Decode and display
    image_data = base64.b64decode(encoded_str)
    image = Image.open(BytesIO(image_data))
    display(image)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
CWD: /home/ec2-user/SageMaker/ai-description/projects/research


In [32]:
try:
    os.chdir("../../lib/src")
    import image_captioning_assistant.generate.prompts as p
    import image_captioning_assistant.data.data_classes as dc

    # legacy
    # import image_captioning_assistant.generate.generate_bias_analysis as gba
    # import image_captioning_assistant.generate.generate_structured_metadata as gsm

    # current
    import image_captioning_assistant.generate.bias_analysis.find_biases_in_short_work as gbsw
    import image_captioning_assistant.generate.bias_analysis.find_biases_in_long_work as gblw
    import image_captioning_assistant.generate.utils as gen_utils

    # import image_captioning_assistant.data.data_classes as dc
    import image_captioning_assistant.aws.s3 as s3_util
finally:
    os.chdir("../../projects/research")

In [33]:
# download ground truth set to local
def download_s3_directory(bucket, s3_prefix, local_dir):
    s3 = boto3.client("s3")
    paginator = s3.get_paginator("list_objects_v2")

    for page in paginator.paginate(Bucket=bucket, Prefix=s3_prefix):
        for obj in page.get("Contents", []):
            # Skip directory markers
            if obj["Key"].endswith("/"):
                continue

            # Build local path
            relative_path = obj["Key"].replace(s3_prefix, "", 1)
            local_path = local_dir / relative_path

            # Create parent directories and download
            local_path.parent.mkdir(parents=True, exist_ok=True)
            s3.download_file(bucket, obj["Key"], str(local_path))


# Configuration
bucket_name = "gaiic-emory-dev"
local_base = Path("ground_truth")

# Download single CSV file
csv_path = local_base / "ground_truth.csv"
csv_path.parent.mkdir(parents=True, exist_ok=True)
boto3.client("s3").download_file(bucket_name, "ground_truth.csv", str(csv_path))

# Download entire images directory
# download_s3_directory(
#     bucket=bucket_name,
#     s3_prefix='ground_truth_images/',
#     local_dir=local_base / 'images'
# )

INFO:botocore.httpchecksum:Skipping checksum validation. Response did not contain one of the following algorithms: ['crc32', 'sha1', 'sha256'].


In [34]:
ground_truth_df = pd.read_csv("ground_truth/ground_truth.csv")

In [35]:
def display_work_id_images(work_id):
    shas = ground_truth_df[ground_truth_df["work_id"] == work_id]["page_sha1"]

    for sha in shas:
        img_path = f"research/ground_truth/images/{sha}"
        if Path(img_path).exists():
            display(Image.open(img_path))


# display_work_id_images('7203xsj45j-cor')

In [36]:
def display_bias(bias_list, attribute):
    # Convert to DataFrame with multi-index
    multi_index_data = []
    for i, bias_dict in enumerate(bias_list):
        for key, value in bias_dict.items():
            multi_index_data.append(((i + 1, key), value))

    # Create DataFrame
    multi_index = pd.MultiIndex.from_tuples([item[0] for item in multi_index_data], names=["Bias ID", "Bias Item"])
    df = pd.DataFrame(
        {f"Output from AI Model for {attribute}": [item[1] for item in multi_index_data]},
        index=multi_index,
    )
    display(df)


def print_output(output):
    if "description" in output:
        s = pd.Series(output)
        display(pd.DataFrame({"Metadata Item": s.index, "Output from AI Model": s.values}))
    else:
        display_bias(output["metadata_biases"]["biases"], "Metadata")
        for i, bias_list in enumerate(output["page_biases"]):
            display_bias(bias_list["biases"], f"Page {i+1}")

Be less specific on objects that don't matter, like house, not windows, not parts of the poster, but that it's a poster

make sure to characterize the object itself like that it's a black and white photo

In [37]:
# # s3://gaiic-emory-dev/ground_truth_images/3420d30e9b3c03a19105b4d1c92ff2b8880905c8
# s3_kwargs = {
#     "config": Config(
#         s3={"addressing_style": "virtual"},
#         signature_version="s3v4",
#     ),
#     "region_name": "us-east-1",
# }
# image_path = "ground_truth/images"
# work_id = "880ht76hj7-cor"
# shas = ground_truth_df[ground_truth_df["work_id"] == work_id][["page_sha1", "page_title"]]
# front_sha = shas[shas["page_title"].str.lower() == "front"]["page_sha1"].values[0]
# back_sha = shas[shas["page_title"].str.lower() == "back"]["page_sha1"].values[0]
# front_bytes = s3_util.load_image_bytes(bucket_name, f"ground_truth_images/{front_sha}", s3_kwargs)
# len(front_bytes)

In [38]:
# from PIL import Image
# import base64
# from io import BytesIO

# with open(image_full_path, "rb") as image_file:
#     # Open image and convert to RGB (removes alpha channel if present)
#     image = Image.open(image_file).convert('RGB')

#     # Set maximum dimensions while maintaining aspect ratio
#     max_dimension = 2048  # Adjust this based on your size requirements
#     image.thumbnail((max_dimension, max_dimension), Image.LANCZOS)

#     # Optimize JPEG quality and save to buffer
#     buffer = BytesIO()
#     image.save(buffer,
#               format='JPEG',
#               quality=85,  # Adjust between 75-95 for quality/size balance
#               optimize=True)

#     buffer.seek(0)
#     image_data = base64.b64encode(buffer.read()).decode("utf-8")

# # Verify size constraint
# if len(image_data) >= 10000000:
#     raise ValueError("Resized image still exceeds size limit - try reducing max_dimension or quality")
# print(len(image_data))


# show_base64_image(image_data)

In [39]:
def gen_bias_for_wid(work_id, page_title, model_id="us.anthropic.claude-3-5-sonnet-20241022-v2:0"):
    image_path = "ground_truth/images"
    shas = ground_truth_df[ground_truth_df["work_id"] == work_id][["page_sha1", "page_title"]]
    image_s3_uris = []
    if page_title.lower() == "front" and len(shas) > 1:
        front_sha = shas[shas["page_title"].str.lower() == "front"]["page_sha1"].values[0]
        image_s3_uris.append(f"s3://gaiic-emory-dev/ground_truth_images/{front_sha}")
        back_sha = shas[shas["page_title"].str.lower() == "back"]["page_sha1"].values[0]
        image_s3_uris.append(f"s3://gaiic-emory-dev/ground_truth_images/{back_sha}")
    else:
        front_sha = shas[shas["page_title"] == page_title]["page_sha1"].values[0]
        image_s3_uris.append(f"s3://gaiic-emory-dev/ground_truth_images/{front_sha}")
        back_sha = None

    s3_kwargs = {
        "config": Config(
            s3={"addressing_style": "virtual"},
            signature_version="s3v4",
        ),
        "region_name": "us-east-1",
    }

    llm_kwargs = {
        # "model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
        "model_id": model_id,
    }

    return gbsw.find_biases_in_short_work(
        image_s3_uris,
        s3_kwargs,
        llm_kwargs,
        {},
        # work_context: str | None = None,
        # original_metadata: str | None = None,
    )


def gen_bias_for_wid_long(work_id, model_id="us.anthropic.claude-3-5-sonnet-20241022-v2:0"):
    image_path = "ground_truth/images"
    sha_df = ground_truth_df[ground_truth_df["work_id"] == work_id][["page_sha1", "page_title"]]
    shas = list(sha_df["page_sha1"].values)
    image_page_names = list(sha_df["page_title"].values)
    image_s3_uris = [f"s3://gaiic-emory-dev/ground_truth_images/{sha}" for sha in shas]

    s3_kwargs = {
        "config": Config(
            s3={"addressing_style": "virtual"},
            signature_version="s3v4",
        ),
    }

    llm_kwargs = {
        # "model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
        "model_id": model_id,
        "region_name": "us-east-1",
    }

    return gblw.find_biases_in_long_work(
        image_s3_uris,
        s3_kwargs,
        llm_kwargs,
        {},
        # work_context: str | None = None,
        # original_metadata: str | None = None,
    )

In [40]:
model_id = "amazon.nova-pro-v1:0"
results = gen_bias_for_wid("880ht76hj7-cor", "Front", model_id=model_id)  # cotton in sunny south 689d51c5f7-cor
# results = gen_bias_for_wid("989r2280h5-cor", "Front")  # Hatian mother with offspring
# results = gen_bias_for_wid("689d51c5f7-cor", "Front")  # AA boy pointing at possum in tree
# results = gen_bias_for_wid('24298sf7s0-cor', "Front")  # Burning of AA man
# print(results.cot)
print_output(results.model_dump())

INFO:botocore.httpchecksum:Skipping checksum validation. Response did not contain one of the following algorithms: ['crc32', 'sha1', 'sha256'].
INFO:botocore.httpchecksum:Skipping checksum validation. Response did not contain one of the following algorithms: ['crc32', 'sha1', 'sha256'].
INFO:image_captioning_assistant.generate.bias_analysis.find_biases_in_short_work:

********** CHAIN OF THOUGHT **********
 **Transcription Analysis:**

The image shows a postcard with a photograph of cotton pickers in the South. The text on the postcard reads: "COTTON PICKING IN THE SUNNY SOUTH. COPYRIGHT 1908 BY H. TEES."

The back of the postcard contains a handwritten message: "How are you?" and a signature "Marjorie."

**Object Breakdown and Bias Analysis:**

1. **Image Content:**
   - The image depicts a group of individuals, presumably African American, engaged in cotton picking. The setting is a cotton field, and the individuals are shown bending over to pick cotton.
   - The title "COTTON PICKIN

Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Metadata
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.medium
1,type,BiasType.racial
1,explanation,"The image depicts African American workers engaged in manual labor, which could perpetuate stereotypes about race and labor. The postcard's title and imagery may contribute to a romanticized and simplified view of the South that ignores the complex racial history and the exploitation of African American labor."
2,level,BiasLevel.low
2,type,BiasType.cultural
2,explanation,"The postcard's title suggests a romanticized view of the South, which may not accurately represent the cultural and social complexities of the region."


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 1
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.medium
1,type,BiasType.racial
1,explanation,"The image depicts African American workers engaged in manual labor, which could perpetuate stereotypes about race and labor. The postcard's title and imagery may contribute to a romanticized and simplified view of the South that ignores the complex racial history and the exploitation of African American labor."
2,level,BiasLevel.low
2,type,BiasType.cultural
2,explanation,"The postcard's title suggests a romanticized view of the South, which may not accurately represent the cultural and social complexities of the region."


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 2
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.low
1,type,BiasType.cultural
1,explanation,The handwritten message on the back of the postcard is neutral and does not contain any biases.


In [41]:
model_id = "amazon.nova-pro-v1:0"
model_id = "us.anthropic.claude-3-5-sonnet-20241022-v2:0"
results = gen_bias_for_wid("880ht76hj7-cor", "Front", model_id=model_id)  # cotton in sunny south 689d51c5f7-cor
# results = gen_bias_for_wid("989r2280h5-cor", "Front")  # Hatian mother with offspring
# results = gen_bias_for_wid("689d51c5f7-cor", "Front")  # AA boy pointing at possum in tree
# results = gen_bias_for_wid('24298sf7s0-cor', "Front")  # Burning of AA man
# print(results.cot)
print_output(results.model_dump())

INFO:botocore.httpchecksum:Skipping checksum validation. Response did not contain one of the following algorithms: ['crc32', 'sha1', 'sha256'].
INFO:botocore.httpchecksum:Skipping checksum validation. Response did not contain one of the following algorithms: ['crc32', 'sha1', 'sha256'].
INFO:image_captioning_assistant.generate.bias_analysis.find_biases_in_short_work:

********** CHAIN OF THOUGHT **********
 
TRANSCRIPTION ANALYSIS:

Printed text on image:
1. "COTTON PICKING IN THE SUNNY SOUTH"
2. "COPYRIGHT 1909 BY H.TEES"

Handwritten text on postcard reverse:
Multiple possible interpretations:
1. "Wakward" followed by "How are you?"
2. "Wakefield" followed by "How are you?"
3. "Walward" followed by "How are you?"

After careful review, I believe "Wakward" is most likely correct given the letter formation, though certainty is not 100%.

OBJECT ANALYSIS:
The image shows African American adults and children working in a cotton field in 1909. The workers include men, women, and young chi

Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Metadata
Bias ID,Bias Item,Unnamed: 2_level_1


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 1
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.medium
1,type,BiasType.racial
1,explanation,"The postcard's presentation of cotton picking in the 'Sunny South' romanticizes what was often exploitative agricultural labor primarily performed by African Americans. While the image itself is documentary, its use as a tourist postcard and the romanticized title minimizes the serious social and economic issues at play."
2,level,BiasLevel.medium
2,type,BiasType.age
2,explanation,"The image shows young children engaged in cotton picking labor, normalizing child labor practices of the period. While this represents historical reality, the casual presentation of child labor in a tourist postcard format suggests acceptance of this practice."
3,level,BiasLevel.medium
3,type,BiasType.cultural
3,explanation,"The phrase 'Sunny South' and the postcard format present serious cultural issues of labor exploitation as a picturesque scene for tourist consumption, diminishing the cultural and social significance of the agricultural labor system depicted."


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 2
Bias ID,Bias Item,Unnamed: 2_level_1


## Run for longer works

In [42]:
# response = gen_bias_for_wid_long('648ffbg7pg-cor') # 3 pages (high, none, low)
response = gen_bias_for_wid_long("26663xsjkv-cor")  # 4 pages (medium, high, none, none)
print_output(response.model_dump())

INFO:image_captioning_assistant.generate.bias_analysis.find_biases_in_long_work:Analyzing 4 images
INFO:botocore.httpchecksum:Skipping checksum validation. Response did not contain one of the following algorithms: ['crc32', 'sha1', 'sha256'].
INFO:image_captioning_assistant.generate.bias_analysis.find_biases_in_short_work:

********** CHAIN OF THOUGHT **********
 
TRANSCRIPTION ANALYSIS:

Print text:
"George Marshall Jackson, B.S.
LaFayette, Georgia

Entered College Fall, '06.
Chi Phi Fraternity; "Owls" Social Club; "Susie Dam;" Few Literary Society; Honor Roll, '06, '07, '08, '09; Commencement Speaker, '10; President of Class, '08, '09; Athletic Editor Emory Phoenix; President Pan-Hellenic Council; Baseball Team, '07, '08, '10, '11; Manager Baseball Team, '08; Basketball Team, '08, '09, '10, '11; Captain of Basketball Team, '09, '10; Relay Team, '06.

There are tides in the affairs of men. If you don't believe it ask "Big Jack." He was once a politician; now he is not. He was once not

Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Metadata
Bias ID,Bias Item,Unnamed: 2_level_1


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 1
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.medium
1,type,BiasType.gender
1,explanation,"The use of the term 'lady killer' objectifies women and reduces them to conquests, promoting stereotypical masculine behavior. While this was common vernacular for the time period (hence the medium rather than high rating), it still represents gender bias through the objectification of women and reinforcement of problematic male stereotypes about pursuing multiple women."


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 2
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.high
1,type,BiasType.cultural
1,explanation,"The page contains multiple instances of Native American cultural appropriation and stereotyping, including the use of the racial slur 'Injun', stereotypical terms like 'Big Chief' and 'Wig Wam', and a stereotypical illustration of a Native American with feathered headdress. While the historical context of 1911 is noted, the presence of a racial slur and multiple instances of cultural appropriation, including the casual use of sacred cultural elements like the headdress, warrants a high-level bias classification. The cumulative effect creates harmful stereotyping regardless of the intended playful nature."


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 3
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.low
1,type,BiasType.gender
1,explanation,"While this appears to be from a male college yearbook during a time of gender-segregated education, the complete focus on male-only activities and achievements reflects the systemic educational inequities of the period. The bias is rated as low because it reflects the historical context rather than intentional exclusion."
2,level,BiasLevel.low
2,type,BiasType.cultural
2,explanation,"The text promotes tobacco use as a solution to social integration, suggesting smoking as a means of fitting in. While clearly meant to be humorous, it normalizes potentially harmful behavior as a social tool. Rated as low due to the light-hearted, self-deprecating context rather than aggressive promotion."


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 4
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.low
1,type,BiasType.gender
1,explanation,"The image and accompanying text exclusively depict male participants in what appears to be a college setting, reflecting historical gender-based exclusion in higher education. While this may be documentary rather than intentionally exclusive, it represents systemic gender imbalances of the time period."
2,level,BiasLevel.low
2,type,BiasType.cultural
2,explanation,"The text's treatment of religious authority (Bible lecture) in juxtaposition with gambling activities could be interpreted as showing disrespect toward religious institutions. While likely intended as humorous, it may reflect a dismissive attitude toward religious authority."


In [46]:
model_id = "amazon.nova-pro-v1:0"
# model_id = "us.anthropic.claude-3-5-sonnet-20241022-v2:0"
# response = gen_bias_for_wid_long('648ffbg7pg-cor') # 3 pages (high, none, low)
response = gen_bias_for_wid_long("26663xsjkv-cor", model_id=model_id)  # 4 pages (medium, high, none, none)
print_output(response.model_dump())

INFO:image_captioning_assistant.generate.bias_analysis.find_biases_in_long_work:Analyzing 4 images
INFO:botocore.httpchecksum:Skipping checksum validation. Response did not contain one of the following algorithms: ['crc32', 'sha1', 'sha256'].
INFO:image_captioning_assistant.generate.bias_analysis.find_biases_in_short_work:

********** CHAIN OF THOUGHT **********
 **Transcription Analysis:**

The image contains a portrait of George Marshall Jackson, B.S., along with a biographical sketch and a signature. The text is clearly legible and can be transcribed as follows:

- Portrait caption: "George Marshall Jackson, B.S. LA FAYETTE, GEORGIA"
- Biographical sketch:
  - "Entered College Fall, '06."
  - "Chi Phi Fraternity; 'Owls' Social Club; 'Susie Dam;' Few Literary Society; 'Honor Roll,' '06, '08, '09; Commencement Speaker, '10; President of Class, '08, '09; Athletic Editor Emory Phoenix; President Pan-Hellenic Council; Baseball Team, '07, '08, '10, '11; Manager Baseball Team, '08; Basketb

Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Metadata
Bias ID,Bias Item,Unnamed: 2_level_1


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 1
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.medium
1,type,BiasType.gender
1,explanation,"The text refers to Jackson as a 'lady killer,' which is a stereotypical phrase that objectifies women and reduces them to objects of male conquest. This perpetuates harmful gender stereotypes and reinforces the idea that men's worth is measured by their ability to attract women."
2,level,BiasLevel.low
2,type,BiasType.cultural
2,explanation,"The text includes a reference to 'Hotch Armor,' which could be interpreted as a cultural bias if 'Armor' is meant to imply a connection to military or aristocratic heritage, though this is speculative without more context."


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 2
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.medium
1,type,BiasType.cultural
1,explanation,The use of the term 'BIG INUJN' and the accompanying illustration resembling a Native American headdress suggests cultural appropriation and stereotyping of Native American imagery. This is problematic as it reduces a rich and diverse culture to a simplistic and potentially offensive stereotype.


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 3
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.medium
1,type,BiasType.cultural
1,explanation,"The narrative about Henry Jackson Peavy includes a reference to purchasing a 'quarter pipe' and 'a sack of Duke's' to cultivate 'the gentle art of smoking.' This reflects a cultural norm of the time period where smoking was more socially acceptable and even considered a sophisticated activity. However, this could be seen as promoting a harmful behavior (smoking) which is now widely recognized as detrimental to health. The level is medium because while it reflects historical norms, it also promotes a behavior that is now known to be harmful."
2,level,BiasLevel.low
2,type,BiasType.gender
2,explanation,"The page exclusively features a male student and uses masculine pronouns ('he,' 'him'). While this is reflective of the time period when such publications often featured predominantly male students, it does represent a gap in the representation of female students. The level is low because it is more of an unintentional exclusion rather than an overt bias."


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 4
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.low
1,type,BiasType.gender
1,explanation,"The image exclusively depicts men, which may indicate a gender bias due to the exclusion of women and non-binary individuals. However, the context of the scene is specific (a card game among staff members), and the language used is formal and neutral, suggesting that the exclusion may not be intended to represent a broader societal norm."


In [45]:
model_id = "amazon.nova-pro-v1:0"
# model_id = "us.anthropic.claude-3-5-sonnet-20241022-v2:0"
model_id = "us.meta.llama3-2-90b-instruct-v1:0"
# response = gen_bias_for_wid_long('648ffbg7pg-cor') # 3 pages (high, none, low)
response = gen_bias_for_wid_long("26663xsjkv-cor", model_id=model_id)  # 4 pages (medium, high, none, none)
print_output(response.model_dump())

INFO:image_captioning_assistant.generate.bias_analysis.find_biases_in_long_work:Analyzing 4 images
INFO:botocore.httpchecksum:Skipping checksum validation. Response did not contain one of the following algorithms: ['crc32', 'sha1', 'sha256'].
INFO:image_captioning_assistant.generate.bias_analysis.find_biases_in_short_work:

********** CHAIN OF THOUGHT **********
 

The image is a page from a yearbook, featuring a photo of George Marshall Jackson, B.S. The page includes his name, degree, and college information, as well as a list of his extracurricular activities and achievements. The page also features a handwritten signature and a cartoon drawing of a man in a top hat.

Upon analyzing the object, I noticed that the page does not contain any explicit bias or stereotyping. However, I did notice that the page is written in a formal and traditional style, which may be perceived as exclusive or elitist. Additionally, the use of the term "B.S." to refer to George Marshall Jackson's degree m

Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Metadata
Bias ID,Bias Item,Unnamed: 2_level_1


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 1
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.low
1,type,BiasType.cultural
1,explanation,"The page reflects a traditional and formal style that may be perceived as exclusive or elitist. Additionally, the lack of diversity and inclusion on the page may be seen as problematic."


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 2
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.high
1,type,BiasType.racial
1,explanation,"The use of the term 'Big Injun' to refer to a Native American man is a stereotype and a form of cultural appropriation. This term is often used in a derogatory manner to refer to Native Americans, and it perpetuates negative attitudes towards indigenous peoples."
2,level,BiasLevel.high
2,type,BiasType.racial
2,explanation,"The drawing of the Native American man wearing a headdress is also a stereotype. Headdresses are sacred objects in many Native American cultures, and they are not typically worn as a fashion statement. The drawing perpetuates the idea that Native Americans are 'exotic' and 'primitive,' and it reinforces negative attitudes towards indigenous peoples."
3,level,BiasLevel.medium
3,type,BiasType.cultural
3,explanation,"The page also features a list of Robert C. Mizell's affiliations, including the Delta Tau Delta Fraternity and the Phi Gamma Literary Society. These organizations have historically been associated with white, male, and affluent individuals, and they may perpetuate exclusivity and elitism."


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 3
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.low
1,type,BiasType.cultural
1,explanation,"The illustration of the boy playing tennis reinforces a stereotype about the types of activities that are suitable for certain groups of people. However, the language used on the page is polite and respectful, with no apparent bias or stereotyping."


Unnamed: 0_level_0,Unnamed: 1_level_0,Output from AI Model for Page 4
Bias ID,Bias Item,Unnamed: 2_level_1
1,level,BiasLevel.high
1,type,BiasType.racial
1,explanation,"The image contains several elements that could be considered racist or culturally insensitive, such as the depiction of a Native American headdress on one of the men's heads."
2,level,BiasLevel.medium
2,type,BiasType.gender
2,explanation,"The image reinforces traditional gender roles, with the men being portrayed as the dominant figures in the scene."


# Run Evaluation

In [None]:
try:
    os.chdir("../../lib/src")
    import image_captioning_assistant.evaluate.evaluate_bias_analysis as eba
    import image_captioning_assistant.evaluate.evaluate_structured_metadata as esm
    import image_captioning_assistant.evaluate.evaluate_freeform_description as efd
    from image_captioning_assistant.data.constants import BiasLevel, BiasType, LibraryFormat
finally:
    os.chdir("../../projects/research")

### Metadata Eval

In [None]:
ground_truth_df.head(1).tail(1).transpose()

In [None]:
metadata_items = """
title
abstract
content_genres
content_type
date_created
subject_geo
subject_names
subject_topics
""".strip().split(
    "\n"
)
ground_truth_df.head(17).tail(1)[["work_id"] + metadata_items].transpose()

In [None]:
def gt_row_to_metadata_obj(gt_row):
    gt_content_type_mapping = {
        "http://id.loc.gov/vocabulary/resourceTypes/img": LibraryFormat.still_image,
        "http://id.loc.gov/vocabulary/resourceTypes/txt": LibraryFormat.text,
    }
    cleaned_abstract = (
        gt_row["abstract"].replace("Verso:", "").replace("Recto:", "")
        if gt_row["abstract"] == gt_row["abstract"]
        else ""
    )
    topics = []
    for col in ["subject_topics"]:
        if gt_row[col]:
            topics.extend(str(gt_row[col]).split("|"))
    return dc.Metadata(
        description=gt_row["title"],
        transcription=dc.Transcription(printed_text=[cleaned_abstract], handwriting=[]),
        date=str(gt_row["date_created"]),
        location=str(gt_row["subject_geo"]),
        publication_info=[],
        contextual_info=[],
        format=gt_content_type_mapping[gt_row["content_type"]],
        genre=[gt_row["content_genres"]],
        topics=topics,
        objects=[],
        actions=[],
        people=[],
    )


gt_row_to_metadata_obj(ground_truth_df.iloc[16, :])

In [None]:
import aiohttp
import asyncio
import nest_asyncio

nest_asyncio.apply()


def get_human_and_llm_metadata(gt_row):
    human_metadata = gt_row_to_metadata_obj(gt_row)
    llm_metadata = gen_metadata_for_wid(gt_row["work_id"], gt_row["page_title"])
    return human_metadata, llm_metadata


def run_gt_metadata_eval(gt_row):
    human_metadata, llm_metadata = get_human_and_llm_metadata(gt_row)
    chat_bedrock_kwargs = {"model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0"}
    # Call eval function
    return esm.evaluate_structured_metadata(llm_metadata, human_metadata, chat_bedrock_kwargs)

In [None]:
# run eval for only items with front and back because others are pages which are bias only
from tqdm import tqdm_notebook

eval_results = {}
gt_dedup = ground_truth_df[ground_truth_df["page_title"].str.lower() == "front"].copy(deep=True).fillna("")
for i, gt_row in tqdm_notebook(gt_dedup.iterrows(), total=len(gt_dedup)):
    if i not in eval_results:
        eval_results[i] = run_gt_metadata_eval(gt_row)

In [None]:
eval_results[i]

In [None]:
eval_results[i]

In [None]:
reload(esm)
esm.combine_structured_metadata_evaluations(list(eval_results.values())).model_dump()

In [None]:
shas = ground_truth_df[ground_truth_df["work_id"] == gt_row["work_id"]][["page_sha1", "page_title"]]
shas

In [None]:
ground_truth_df[ground_truth_df["work_id"].str.strip() == gt_row["work_id"]][["page_sha1", "page_title"]]

In [None]:
results = gen_metadata_for_wid("423612jmc0-cor")
# print(results.cot)
print_output(results.model_dump())

In [None]:
reload(esm)
# LLM-generated example
llm_metadata_different = dc.MetadataCOT(
    description="A digitized manuscript showing agricultural practices from early modern Europe",
    transcription=dc.Transcription(
        printed_text=["Treatise on Farming Methods", "Printed in Venice 1592"],
        handwriting=["Marginal notes regarding crop rotation", "Ownership signature: G. Agricola"],
    ),
    date="1590-1600",
    location="Northern Italy",
    publication_info=["Venetian Printing House"],
    contextual_info=["Demonstrates pre-Enlightenment farming techniques"],
    format=LibraryFormat.text,
    genre=["Manuscript", "Agricultural"],
    objects=["Quill annotations", "Illustrations of plows"],
    actions=["Harvesting", "Irrigating fields"],
    people=["Male figures in peasant attire"],
    cot="Generated through analysis of visual patterns and textual correlations in historical documents",
)
llm_metadata_similar = dc.MetadataCOT(
    description="Colorized lithographic print showing Victorian-era metropolitan peddlers",
    transcription=dc.Transcription(
        printed_text=["London Street Scenes", "Issued by Smith & Sons 1883"],
        handwriting=["Collection note: Uncommon version with azure coloring", "Previous owner: J. Smith"],
    ),
    date="1883",
    location="London, England",
    publication_info=["Smith & Sons Publishers"],
    contextual_info=["Chronicles vanishing professions during the Industrial Revolution"],
    format=LibraryFormat.still_image,
    genre=["Lithograph", "Social Documentation"],
    objects=["Vendor carts", "Coal containers", "Work garments"],
    actions=["Merchant trading", "Price negotiation"],
    people=["Market traders (both genders)", "Young workers"],
    cot="Documented through direct artifact examination and archival source verification",
)


# Human-curated example
human_metadata = dc.Metadata(
    description="Hand-colored lithograph depicting 19th century urban street vendors",
    transcription=dc.Transcription(
        printed_text=["Street Life of London", "Published by Smith & Sons 1883"],
        handwriting=["Curator's note: Rare variant with blue tint", "Ex collection: J. Smith"],
    ),
    date="1883",
    location="London, England",
    publication_info=["Smith & Sons Publishers"],
    contextual_info=["Documents disappearing trades during industrialization"],
    format=LibraryFormat.still_image,
    genre=["Lithograph", "Social History"],
    objects=["Push carts", "Coal buckets", "Aprons"],
    actions=["Selling goods", "Haggling prices"],
    people=["Street vendors (male and female)", "Child apprentices"],
)

chat_bedrock_kwargs = {"model": "us.anthropic.claude-3-5-sonnet-20241022-v2:0"}

# Call the function
result_different = esm.evaluate_structured_metadata(llm_metadata_different, human_metadata, chat_bedrock_kwargs)
result_similar = esm.evaluate_structured_metadata(llm_metadata_similar, human_metadata, chat_bedrock_kwargs)
display(result_different)
display(result_similar)

### Bias Eval

In [None]:
llm_bias_analysis = dc.BiasAnalysisEntry(
    bias_type=BiasType.racial,
    bias_level=BiasLevel.high,
    explanation="Water fountain and a sign above it that says 'whites'",
)
human_bias_analysis = dc.BiasAnalysisEntry(
    bias_type=BiasType.racial,
    bias_level=BiasLevel.high,
    explanation="A water fountain and a sign above it tha reads 'whites'",
)

potential_bias_evaluation = eba.evaluate_potential_biases(
    llm_potential_biases=[llm_bias_analysis],
    human_potential_biases=[human_bias_analysis],
    chat_bedrock_converse_kwargs={
        "model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
        "temperature": 0.0,
    },
)
potential_bias_evaluation

In [None]:
llm_bias_analysis = dc.BiasAnalysisEntry(
    bias_type=BiasType.racial,
    bias_level=BiasLevel.high,
    explanation="Water fountain and a sign above it that says 'whites'",
)
human_bias_analysis = dc.BiasAnalysisEntry(
    bias_type=BiasType.age,
    bias_level=BiasLevel.low,
    explanation="Child Laborers are working in the fields.",
)

potential_bias_evaluation = eba.evaluate_potential_biases(
    llm_potential_biases=[llm_bias_analysis],
    human_potential_biases=[human_bias_analysis],
    chat_bedrock_converse_kwargs={
        "model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
        "temperature": 0.0,
    },
)
potential_bias_evaluation