In [1]:
import base64
import os
from anthropic import Anthropic

In [2]:
def encode_image(image_path):
    """
    Encode an image file to base64 for API transmission.
    
    :param image_path: Path to the image file
    :return: Base64 encoded string of the image
    """
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [24]:
def extract_text_from_image(api_key, image_paths, prompt_text):
    """
    Use Claude to extract text from multiple images.
    
    :param api_key: Your Anthropic API key
    :param image_paths: List of paths to image files
    :param prompt_text: Text prompt for Claude
    :return: Extracted text from the images
    """
    # Initialize the Anthropic client
    client = Anthropic(api_key=api_key)
    
    try:
        # Create content array
        content = []
        
        # Add each image to the content array
        for image_path in image_paths:
            # Determine media type based on file extension
            media_type = "image/jpeg"  # Default
            if image_path.lower().endswith(".png"):
                media_type = "image/png"
            elif image_path.lower().endswith(".gif"):
                media_type = "image/gif"
            
            # Encode the image
            base64_image = encode_image(image_path)
            
            # Add image to content array
            content.append({
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": media_type,
                    "data": base64_image
                }
            })
        
        # Add text prompt at the end
        content.append({
            "type": "text",
            "text": prompt_text
        })
        
        # Send request to Claude
        response = client.messages.create(
            model="claude-3-haiku-20240307",  # or any other model that supports multiple images
            max_tokens=1000,
            messages=[
                {
                    "role": "user",
                    "content": content
                }
            ]
        )
        
        # Return the extracted text
        return response.content[0].text
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [19]:
def main():
    # Replace with your actual Anthropic API key
    API_KEY ='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'
    
    if not API_KEY:
        print("Please set the ANTHROPIC_API_KEY environment variable.")
        return
    
    # Path to your image file
    IMAGE_PATH = './test_images/Menu_10.jpg'
    
    # Extract text from the image
    extracted_text = extract_text_from_image(API_KEY, IMAGE_PATH)
    
    if extracted_text:
        print("Extracted Text:")
        print(extracted_text)
    else:
        print("Failed to extract text from the image.")

In [22]:
if __name__ == '__main__':
    main()

Extracted Text:
Based on the image, here are the menu item and price pairs:

1. Tebasaki Namban with Wafu Tar Tar Sauce - 190 THB
2. Shiromi Sakana Tempura - 690 THB
3. Menchi Katsu (X-Wagyu Japanese Beef) - 490 THB
4. Sukiyaki Cheese Tamagoyaki (X-Wagyu) - 290 THB
