In [1]:
import base64
import os
from anthropic import Anthropic

In [2]:
def encode_image(image_path):
    """
    Encode an image file to base64 for API transmission.
    
    :param image_path: Path to the image file
    :return: Base64 encoded string of the image
    """
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [24]:
def extract_text_from_image(api_key, image_paths, prompt_text):
    """
    Use Claude to extract text from multiple images.
    
    :param api_key: Your Anthropic API key
    :param image_paths: List of paths to image files
    :param prompt_text: Text prompt for Claude
    :return: Extracted text from the images
    """
    # Initialize the Anthropic client
    client = Anthropic(api_key=api_key)
    
    try:
        # Create content array
        content = []
        
        # Add each image to the content array
        for image_path in image_paths:
            # Determine media type based on file extension
            media_type = "image/jpeg"  # Default
            if image_path.lower().endswith(".png"):
                media_type = "image/png"
            elif image_path.lower().endswith(".gif"):
                media_type = "image/gif"
            
            # Encode the image
            base64_image = encode_image(image_path)
            
            # Add image to content array
            content.append({
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": media_type,
                    "data": base64_image
                }
            })
        
        # Add text prompt at the end
        content.append({
            "type": "text",
            "text": prompt_text
        })
        
        # Send request to Claude
        response = client.messages.create(
            model="claude-3-haiku-20240307",  # or any other model that supports multiple images
            max_tokens=1000,
            messages=[
                {
                    "role": "user",
                    "content": content
                }
            ]
        )
        
        # Return the extracted text
        return response.content[0].text
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [19]:
def main():
    # Replace with your actual Anthropic API key
    API_KEY ='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'
    
    if not API_KEY:
        print("Please set the ANTHROPIC_API_KEY environment variable.")
        return
    
    # Path to your image file
    IMAGE_PATH = './test_images/Menu_10.jpg'
    
    # Extract text from the image
    extracted_text = extract_text_from_image(API_KEY, IMAGE_PATH)
    
    if extracted_text:
        print("Extracted Text:")
        print(extracted_text)
    else:
        print("Failed to extract text from the image.")

In [29]:
import os
import base64
import re
from anthropic import Anthropic
from collections import defaultdict

def encode_image(image_path):
    """
    Encode an image file to base64.
    
    :param image_path: Path to the image file
    :return: Base64 encoded string of the image
    """
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

def extract_text_from_image(api_key, image_paths, prompt_text):
    """
    Use Claude to extract text from multiple images.
    
    :param api_key: Your Anthropic API key
    :param image_paths: List of paths to image files
    :param prompt_text: Text prompt for Claude
    :return: Extracted text from the images
    """
    # Initialize the Anthropic client
    client = Anthropic(api_key=api_key)
    
    try:
        # Create content array
        content = []
        
        # Add each image to the content array
        for image_path in image_paths:
            # Determine media type based on file extension
            media_type = "image/jpeg"  # Default
            if image_path.lower().endswith(".png"):
                media_type = "image/png"
            elif image_path.lower().endswith(".gif"):
                media_type = "image/gif"
            
            # Encode the image
            base64_image = encode_image(image_path)
            
            # Add image to content array
            content.append({
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": media_type,
                    "data": base64_image
                }
            })
        
        # Add text prompt at the end
        content.append({
            "type": "text",
            "text": prompt_text
        })
        
        # Send request to Claude
        response = client.messages.create(
            model="claude-3-haiku-20240307",  # or any other model that supports multiple images
            max_tokens=2500,  # Increased to handle larger menus
            messages=[
                {
                    "role": "user",
                    "content": content
                }
            ]
        )
        
        # Return the extracted text
        return response.content[0].text
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

def parse_menu_items(extracted_text):
    """
    Parse menu items and their prices from extracted text.
    
    :param extracted_text: Text extracted from the image
    :return: Dictionary of menu items and their prices
    """
    menu_items = {}
    
    # Split the text into lines
    lines = extracted_text.split('\n')
    
    for line in lines:
        # Skip empty lines
        if not line.strip():
            continue
            
        # Look for price patterns with multiple formats
        # This covers $10.99, $10, 10.99, €10.99, £10.99, etc.
        price_match = re.search(r'(?:[$€£¥]\s*)?(\d+(?:,\d{3})*(?:\.\d{1,2})?)', line)
        
        if price_match:
            # Extract the price and convert to float
            price_str = price_match.group(1).replace(',', '')
            price = float(price_str)
            
            # Extract the item name (everything before the price)
            item_name = line[:price_match.start()].strip()
            
            # Clean up the item name (remove any dots, dashes or other separators)
            item_name = re.sub(r'[.…\-_]+\s*$', '', item_name).strip()
            
            # If we have a valid item name and price, add to the dictionary
            # Using a unique key if there are duplicate item names
            if item_name and price > 0:
                if item_name in menu_items:
                    # If there's a duplicate, append a number to make it unique
                    count = 1
                    new_name = f"{item_name} (variant {count})"
                    while new_name in menu_items:
                        count += 1
                        new_name = f"{item_name} (variant {count})"
                    menu_items[new_name] = price
                else:
                    menu_items[item_name] = price
        
        # If no price was found but the line contains text, it could be part of a multi-line item description
        # For simplicity, we're skipping these for now, but could be enhanced to handle them
    
    # Log the number of items found
    print(f"Found {len(menu_items)} menu items with prices")
    
    return menu_items

def categorize_menu_items(menu_items, num_categories=3, price_threshold=None):
    """
    Categorize menu items based on their price.
    
    :param menu_items: Dictionary of menu items and their prices
    :param num_categories: Number of price categories
    :param price_threshold: Optional threshold to use for determining category ranges
                           If None, will be calculated based on data
    :return: Dictionary of categories and their items
    """
    if not menu_items:
        return {}
    
    # Sort items by price
    sorted_items = sorted(menu_items.items(), key=lambda x: x[1], reverse=True)
    
    # Determine price ranges for categories
    prices = [price for _, price in sorted_items]
    max_price = max(prices)
    min_price = min(prices)
    price_range = max_price - min_price
    
    # Log price range information
    print(f"\nPrice range: ${min_price:.2f} to ${max_price:.2f} (spread: ${price_range:.2f})")
    
    # Create categories based on price ranges
    categories = defaultdict(list)
    
    # If there's only one category or all prices are the same
    if num_categories <= 1 or price_range == 0:
        for item, price in sorted_items:
            categories['A'].append((item, price))
        return categories
    
    # For large price ranges, use adaptive categorization
    # Check if the price range is very large compared to the minimum price
    large_range = price_range > min_price * 3  # If range is more than 3x the minimum price
    
    if large_range:
        print("Large price range detected - using adaptive categorization")
        
        # Use percentile-based categorization instead of equal divisions
        # This handles outliers better
        
        # Sort prices
        sorted_prices = sorted(prices, reverse=True)
        num_items = len(sorted_prices)
        
        # Calculate boundary indices for each category
        boundaries = []
        for i in range(1, num_categories):
            idx = int((i * num_items) / num_categories)
            if idx < len(sorted_prices):
                boundaries.append(sorted_prices[idx])
        
        # Add the minimum price as the last boundary
        boundaries.append(min_price - 0.01)  # Slightly below min to include all items
        
        # Print the category boundaries
        boundary_strs = [f"${b:.2f}" for b in boundaries]
        print(f"Category boundaries: {boundary_strs}")
        
        # Assign items to categories
        for item, price in sorted_items:
            # Find which category this price belongs to
            for i, boundary in enumerate(boundaries):
                if price > boundary:
                    category_letter = chr(65 + i)  # A, B, C, etc.
                    categories[category_letter].append((item, price))
                    break
    else:
        # Use standard equal division
        category_range = price_range / num_categories
        
        for item, price in sorted_items:
            # Determine which category this item belongs to
            category_index = min(num_categories - 1, int((max_price - price) / category_range))
            category_letter = chr(65 + category_index)  # A, B, C, etc.
            categories[category_letter].append((item, price))
    
    # Log the distribution of items across categories
    for category, items in sorted(categories.items()):
        category_min = min([price for _, price in items]) if items else 0
        category_max = max([price for _, price in items]) if items else 0
        print(f"Category {category}: {len(items)} items, price range ${category_min:.2f} - ${category_max:.2f}")
    
    return categories

def process_menu_images(api_key, image_folder, num_categories=3):
    """
    Process all menu images in a folder and categorize items by price.
    
    :param api_key: Your Anthropic API key
    :param image_folder: Path to folder containing menu images
    :param num_categories: Number of price categories
    :return: Categorized menu items
    """
    # List all image files in the folder
    image_extensions = ['.jpg', '.jpeg', '.png', '.gif']
    image_paths = []
    
    for file in os.listdir(image_folder):
        if any(file.lower().endswith(ext) for ext in image_extensions):
            image_paths.append(os.path.join(image_folder, file))
    
    if not image_paths:
        print(f"No image files found in {image_folder}")
        return {}
    
    print(f"Found {len(image_paths)} images to process")
    
    # Process images one by one to avoid hitting API limits
    all_menu_items = {}
    
    for idx, image_path in enumerate(image_paths):
        print(f"\nProcessing image {idx+1}/{len(image_paths)}: {os.path.basename(image_path)}")
        prompt_text = """
        Please extract all menu items and their corresponding prices from this image. 
        Format each item on a new line with the item name followed by the price.
        Include EVERY menu item and price visible in the image.
        For each item, include:
        1. The complete item name
        2. The exact price (with currency symbol if present)
        
        Example format:
        Chicken Caesar Salad $12.99
        Margherita Pizza $14.50
        """
        
        extracted_text = extract_text_from_image(api_key, [image_path], prompt_text)
        
        if extracted_text:
            print(f"Text extraction successful - analyzing content...")
            
            # Save the extracted text for debugging (optional)
            with open(f"extracted_{os.path.basename(image_path)}.txt", "w") as f:
                f.write(extracted_text)
            
            # Parse menu items and prices
            menu_items = parse_menu_items(extracted_text)
            
            # Add to overall menu items, avoid overwriting duplicates
            for item, price in menu_items.items():
                if item in all_menu_items and all_menu_items[item] != price:
                    # If same item has different price, make it unique
                    all_menu_items[f"{item} ({os.path.basename(image_path)})"] = price
                else:
                    all_menu_items[item] = price
            
            print(f"Running total: {len(all_menu_items)} unique menu items")
        else:
            print(f"Failed to extract text from {os.path.basename(image_path)}")
    
    # Print summary before categorization
    print(f"\nExtraction complete. Total unique menu items found: {len(all_menu_items)}")
    
    if not all_menu_items:
        print("No menu items found in any images.")
        return {}
    
    # Calculate a reasonable number of categories based on the number of items
    if len(all_menu_items) < num_categories * 2:
        adjusted_categories = max(1, len(all_menu_items) // 2)
        if adjusted_categories != num_categories:
            print(f"Adjusting number of categories from {num_categories} to {adjusted_categories} based on item count")
            num_categories = adjusted_categories
    
    # Categorize menu items
    categories = categorize_menu_items(all_menu_items, num_categories)
    
    return categories

def main():
    # Replace with your actual Anthropic API key
    API_KEY = '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'
    
    if not API_KEY:
        print("Please set the ANTHROPIC_API_KEY environment variable.")
        return
    
    # Path to your folder containing menu images
    IMAGE_FOLDER = './test_menu/'
    
    # Number of price categories (A, B, C, etc.)
    NUM_CATEGORIES = 4
    
    print("===== MENU ITEM EXTRACTION AND CATEGORIZATION =====")
    print(f"Processing menu images from: {IMAGE_FOLDER}")
    print(f"Target number of price categories: {NUM_CATEGORIES}")
    
    # Process menu images
    categories = process_menu_images(API_KEY, IMAGE_FOLDER, NUM_CATEGORIES)
    
    if categories:
        # Count total items across all categories to verify all were categorized
        total_categorized = sum(len(items) for items in categories.values())
        
        print("\n========== MENU ITEMS BY PRICE CATEGORY ==========")
        for category, items in sorted(categories.items()):
            if items:
                category_min = min([price for _, price in items])
                category_max = max([price for _, price in items])
                print(f"\n----- Category {category} (${category_min:.2f} - ${category_max:.2f}) -----")
                print(f"{len(items)} items:")
                
                for item, price in sorted(items, key=lambda x: x[1], reverse=True):
                    print(f"  ${price:.2f} - {item}")
        
        # Save categorized items to a file
        try:
            with open("categorized_menu_items.txt", "w") as f:
                f.write("===== MENU ITEMS BY PRICE CATEGORY =====\n")
                for category, items in sorted(categories.items()):
                    if items:
                        category_min = min([price for _, price in items])
                        category_max = max([price for _, price in items])
                        f.write(f"\n----- Category {category} (${category_min:.2f} - ${category_max:.2f}) -----\n")
                        
                        for item, price in sorted(items, key=lambda x: x[1], reverse=True):
                            f.write(f"  ${price:.2f} - {item}\n")
            print(f"\nResults also saved to categorized_menu_items.txt")
        except Exception as e:
            print(f"Error saving results to file: {e}")
    else:
        print("No menu items found in the images.")

if __name__ == "__main__":
    main()

===== MENU ITEM EXTRACTION AND CATEGORIZATION =====
Processing menu images from: ./test_menu/
Target number of price categories: 4
Found 11 images to process

Processing image 1/11: Menu_6.png
Text extraction successful - analyzing content...
Found 4 menu items with prices
Running total: 4 unique menu items

Processing image 2/11: Menu_7.png
Text extraction successful - analyzing content...
Found 6 menu items with prices
Running total: 10 unique menu items

Processing image 3/11: Menu_5.png
Text extraction successful - analyzing content...
Found 18 menu items with prices
Running total: 28 unique menu items

Processing image 4/11: Menu_4.png
Text extraction successful - analyzing content...
Found 9 menu items with prices
Running total: 37 unique menu items

Processing image 5/11: Menu_1.png
Text extraction successful - analyzing content...
Found 27 menu items with prices
Running total: 64 unique menu items

Processing image 6/11: Menu_3.png
Text extraction successful - analyzing content

In [22]:
if __name__ == '__main__':
    main()

Extracted Text:
Based on the image, here are the menu item and price pairs:

1. Tebasaki Namban with Wafu Tar Tar Sauce - 190 THB
2. Shiromi Sakana Tempura - 690 THB
3. Menchi Katsu (X-Wagyu Japanese Beef) - 490 THB
4. Sukiyaki Cheese Tamagoyaki (X-Wagyu) - 290 THB
