## Image -> Text Task

Goal: Set up a pipeline to Claude to identify non-text parts of the image (shape, icongraphy, etc)

In [None]:
# Having errors? Want to see the code? Look at llm_helper functions! 
from llm_helper_functions import *
from ocr_helper_functions import * 

#TODO
#
# Run / Test / Find Errors
    # Edit the transcription prompt to add ? for unknown characters? 

# Better Error Handleing in the final functions

# Look into OCR
# Edit last function to take the OCR Transcription 

# Go take more pictures
# Make the API Set up thing better / include constants for those paths as well 

### Folder and API Set Up

In [2]:
# Note there is a 5MB limit on images
# It took 5 minutes to run 38 images
INPUT_FOLDER = "../data/input/" # TODO change to ..data/input/
OUTPUT_FOLDER = "../data/output/"
OUTPUT_FILENAME = "results.csv"

API_KEY = get_api_key("credentials.txt")
HEADERS = {
    "Content-Type": "application/json",
    "x-api-key": API_KEY,
    "anthropic-version": "2023-06-01"
}
MODEL = ""

### Prompts:
Feel free to change or add more!

In [3]:
# All of these prompts will be accompanied by an image
ICON_PROMPT = "Hi! Can you identify the iconography of this gravestone? Most of the icongraphy should be towards the top of the stone. " \
"If there is no icongoraphy, just say None. Please only return exactly what the iconography is. Do not say anything else in your answer."

SHAPE_PROMPT = "Hi! Can you identify the shape of this gravestone? Please only return exactly what the shape is. Do not say anything else in your answer."

MATERIAL_PROMPT = "Hi! Can you tell me which material this gravestone is made of? It should be one of granite, marble, or slate. " \
"Please only return exactly what the material is. Do not say anything else in your answer." 

TRANSCRIPTION_PROMPT = "Hi! Can you transcribe the text on this gravestone? Please deliminate each line of the transcription with a hyphen. " \
"Please only return the transcription. Do not say anything else in your answer."

YOUR_PROMPT_HERE = ""

# You can add your prompt variable and corresponding column here
PROMPTS = [ICON_PROMPT, SHAPE_PROMPT, MATERIAL_PROMPT, TRANSCRIPTION_PROMPT] # Dont put the info prompt in here
COLUMNS = ["Image Name", "Iconography Description", "Shape Description", "Material", "Claude Transcription"] # Don't change first/last column order

# Separate Task to translate the transcription
INFO_PROMPT = "Hi! The following is a transcription from a gravestone. Each line is separated by a newline character." \
"Can you tell me the first name, middle name, last name, date of birth, date of death, age at death." \
"The information will not be labeled. You might have to calculate age on death. If there is information missing for a field, put None. Please only return exactly " \
"the information requested, in order separated by a comma. Do not say anything else in your answer. Here is the Transcription: "

INFO_COLUMNS = ["First Name", "Middle Name", "Last Name", "Date of Birth", "Date of Death", "Age at Death", "Claude Transcription"]



In [None]:
def gravestone_desc(input_folder, prompts, columns, headers, debug=False):
    """
    Uses the helper function to get all the names of the images, then calls claude with each prompt for each image.
    Puts all the information for each image in a row of a dataframe.
    
    Args:
        input_folder str: Folder path with the images
        prompts list(str): List of User-Specified Prompts for Claude
        columns list(str): Corresponding list of columns to store the results of the above prompts
        debug boolean: Debug mode. Turn on if you encounter errors and want to see the full debug message from anthropic. 
    Returns:
        df(DataFrame): Dataframe with the columns specified in columns
    """

    files = list_files_in_folder(input_folder)
    all_results = []

    for image in files:

        image_result = [image]
        for prompt in prompts:
        # Call Claude
                
            result = call_claude(prompt, headers=headers, image_path=input_folder + image, debug=debug)
            image_result.append(result['content'][0]['text'])
        # Extract Text
        all_results.append(image_result)

    # Put in a dataframe and return 
    df = pd.DataFrame(all_results, columns=columns)
    return df



def transcription_info(transcriptions, prompt, columns, headers, debug=False):
    """
    Uses the helper function to get all the names of the images, then calls claude with each prompt for each image.
    Puts all the information for each image in a row of a dataframe.
    
    Args:
        input_folder str: Folder path with the images
        prompts list(str): List of User-Specified Prompts for Claude
        columns list(str): Corresponding list of columns to store the results of the above prompts
        debug boolean: Debug mode. Turn on if you encounter errors and want to see the full debug message from anthropic. 
    Returns:
        df(DataFrame): Dataframe with the columns specified in columns. 
    """
    
    all_results = []
    
    for trans in transcriptions:

        # Call Claude
        result = call_claude(prompt + trans, headers=headers, debug=debug)

        print(prompt + trans)

        # Split on commas: (#TODO Error Handleing)
        result = str.split((result['content'][0]['text']), ",")


        print(result)

        result.append(trans) # Include the transcription for joining purpose later

        all_results.append(result)

    # Put in a dataframe and return 
    df = pd.DataFrame(all_results, columns=columns)
    return df


### Run the code here

In [35]:
df_desc = gravestone_desc(INPUT_FOLDER, PROMPTS, COLUMNS, HEADERS, debug=False)
df_desc.to_csv(OUTPUT_FOLDER + OUTPUT_FILENAME)

df_desc.head()

Unnamed: 0,Image Name,Iconography Description,Shape Description,Material,Claude Transcription
0,_DSC0470.jpeg,,Rectangle,Granite,ELLEN H. CUNLIFF\n-\nDAUGHTER OF\n-\nJOSEPH & ...
1,_DSC0469.jpeg,,Rectangle,Granite,MARY E. CUNLIFF\n-\nWIFE OF\n-\nSYLVANUS G. BU...


In [36]:
df_desc.head()

Unnamed: 0,Image Name,Iconography Description,Shape Description,Material,Claude Transcription
0,_DSC0470.jpeg,,Rectangle,Granite,ELLEN H. CUNLIFF\n-\nDAUGHTER OF\n-\nJOSEPH & ...
1,_DSC0469.jpeg,,Rectangle,Granite,MARY E. CUNLIFF\n-\nWIFE OF\n-\nSYLVANUS G. BU...


In [39]:
df_info = transcription_info(df_desc["Claude Transcription"], INFO_PROMPT, INFO_COLUMNS, HEADERS, debug=False)
df_all = pd.concat([df_desc, df_info])
df_all.to_csv(OUTPUT_FOLDER + OUTPUT_FILENAME)

Hi! The following is a transcription from a gravestone. Each line is separated by a newline character.Can you tell me the first name, middle name, last name, date of birth, date of death, age at death.The information will not be labeled. You might have to calculate age on death. If there is information missing for a field, put None. Please only return exactly the information requested, in order separated by a comma. Do not say anything else in your answer. Here is the Transcription: ELLEN H. CUNLIFF
-
DAUGHTER OF
-
JOSEPH & MARY M.
-
CUNLIFF
-
1815 - 1907
['ELLEN', ' H.', ' CUNLIFF', ' 1815', ' 1907', ' 92']
Hi! The following is a transcription from a gravestone. Each line is separated by a newline character.Can you tell me the first name, middle name, last name, date of birth, date of death, age at death.The information will not be labeled. You might have to calculate age on death. If there is information missing for a field, put None. Please only return exactly the information reques

## OCR

In [None]:



df = tesseract_ocr(INPUT_FOLDER)



Collecting easyocr
  Downloading easyocr-1.7.2-py3-none-any.whl.metadata (10 kB)
Collecting opencv-python-headless (from easyocr)
  Downloading opencv_python_headless-4.12.0.88-cp37-abi3-macosx_13_0_x86_64.whl.metadata (19 kB)
Collecting scikit-image (from easyocr)
  Downloading scikit_image-0.25.2-cp312-cp312-macosx_10_13_x86_64.whl.metadata (14 kB)
Collecting python-bidi (from easyocr)
  Downloading python_bidi-0.6.6-cp312-cp312-macosx_10_12_x86_64.whl.metadata (4.9 kB)
Collecting Shapely (from easyocr)
  Downloading shapely-2.1.1-cp312-cp312-macosx_10_13_x86_64.whl.metadata (6.8 kB)
Collecting pyclipper (from easyocr)
  Downloading pyclipper-1.3.0.post6-cp312-cp312-macosx_10_13_x86_64.whl.metadata (9.0 kB)
Collecting ninja (from easyocr)
  Downloading ninja-1.11.1.4-py3-none-macosx_10_9_universal2.whl.metadata (5.0 kB)
Collecting imageio!=2.35.0,>=2.33 (from scikit-image->easyocr)
  Downloading imageio-2.37.0-py3-none-any.whl.metadata (5.2 kB)
Collecting tifffile>=2022.8.12 (from sc

In [11]:
# Trying out EasyOCR

import cv2
import pytesseract
import easyocr
import os
from PIL import Image
import numpy as np


def preprocess_for_easyocr(image_path):
    # Grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Enhance Contrast
    img_eq = cv2.equalizeHist(img)

    # Gaussian Blur to reduce noise
    img_blur = cv2.GaussianBlur(img_eq, (3, 3), 0)

    # Otsu’s Thresholding (better than adaptive here)
    _, thresh = cv2.threshold(img_blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Morphological closing to fix broken letters
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
    morph = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

    return img


def easy_ocr(image_path, confidence_threshold):
    # Preprocess the image
    preprocessed = preprocess_for_easyocr(image_path)

    # Save or display preprocessed result for sanity check
    debug_path = OUTPUT_FOLDER + "preprocessed_easyocr.png"
    cv2.imwrite(debug_path, preprocessed)

    # Run EasyOCR on the preprocessed image
    reader = easyocr.Reader(['en'], gpu=True)
    result = reader.readtext(preprocessed)

    # Extract and print detected text
    extracted_text = []
    print("\n🔎 OCR Output:")
    for detection in result:
        bbox, text, conf = detection
        
        if conf >= confidence_threshold:
            print(f"✅ {text} (Confidence: {conf:.2f})")
            extracted_text.append(text)
        else:
            print(f"❌ {text} (Confidence: {conf:.2f}) - REJECTED")
    
    return " ".join(extracted_text)
    


def process_gravestone_images(folder_path):
    results = []

    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp')):
            full_path = os.path.join(folder_path, filename)
            print(f"Processing {filename}...")
            text = easy_ocr(full_path, 0.1)
            results.append({"Image Name": filename, "OCR Transcription": text})

    df = pd.DataFrame(results)
    return df 

df = process_gravestone_images(INPUT_FOLDER)

Processing _DSC0470.jpeg...

🔎 OCR Output:
❌ EzLea y CuLIfe (Confidence: 0.06) - REJECTED
✅ DAUCTE? (Confidence: 0.14)
✅ 2 = (Confidence: 0.19)
✅ JosE?x & MARYV (Confidence: 0.34)
✅ Cun_iez (Confidence: 0.19)
✅ 1815 (Confidence: 0.29)
✅ 90 (Confidence: 0.54)
Processing _DSC0469.jpeg...

🔎 OCR Output:
✅ 3~ (Confidence: 0.29)
✅ E CIULIFF (Confidence: 0.14)
❌ WFE (Confidence: 0.07) - REJECTED
✅ 0F (Confidence: 0.20)
✅ VILVANUS €. Bulloci (Confidence: 0.15)
✅ SEPT 284,839 (Confidence: 0.22)
✅ JLE (Confidence: 0.27)


In [9]:
df.to_csv(OUTPUT_FOLDER + "ocr_results.csv")
df.head()

Unnamed: 0,Image Name,OCR Transcription
0,_DSC0470.jpeg,LLEN X Cunlif DAvGHTER JoSEPH & MARY M
1,_DSC0469.jpeg,0 2 Ec WRE OF NUus G BULLoCHF 1839
