In [1]:
openaiKey = "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"


In [None]:
import base64
import json
import os
from openai import OpenAI
from typing import Dict, Any

def analyze_receipt_with_openai(image_path: str, api_key: str) -> Dict[str, Any] | None:
    """
    Analyzes a UK supermarket receipt image using the OpenAI GPT-4o model.

    Args:
        image_path: The file path to the receipt image.
        api_key: Your OpenAI API key.

    Returns:
        A dictionary containing the extracted receipt data in a structured format,
        or None if an error occurs.
    """
    # --- 1. Input Validation ---
    if not os.path.exists(image_path):
        print(f"Error: Image file not found at {image_path}")
        return None
    if not api_key:
        print("Error: OpenAI API key is required.")
        return None

    # --- 2. Encode Image to Base64 ---
    # This is the required format for sending images to the OpenAI API.
    try:
        with open(image_path, "rb") as image_file:
            base64_image = base64.b64encode(image_file.read()).decode('utf-8')
    except Exception as e:
        print(f"Error encoding image: {e}")
        return None

    # --- 3. Initialize OpenAI Client ---
    try:
        client = OpenAI(api_key=api_key)
    except Exception as e:
        print(f"Error initializing OpenAI client: {e}")
        return None

    # --- 4. Construct the Detailed Prompt ---
    # This is the most critical part. We are instructing the AI on its role,
    # the exact output format (JSON schema), and what to include/exclude.
    prompt_text = """
    You are an expert data extraction specialist for UK supermarket receipts.
    Analyze the provided receipt image and extract the following information:
    - The name of the 'store'.
    - The 'date' of the purchase in "dd/mm/yyyy" format.
    - A list of all purchased 'items'.

    For each item in the list, extract:
    - 'quantity': The number of units purchased for that line item. If not specified, assume 1.
    - 'item': The full description of the product.
    - 'price': The price for that line item as a float.
    - 'category': A relevant category (e.g., 'Groceries', 'Groceries - Meat', 'Groceries - Dairy', 'Fruit & Veg', 'Snacks', 'Drinks', 'Household', 'Toiletries', 'Bakery', 'Alcohol').

    Return the output as a single, valid JSON object. Do not include any explanatory text.
    The JSON object should have three top-level keys: "store", "date", and "items".
    Exclude non-product lines like 'Subtotal', 'TOTAL', 'Card Payment', 'VAT', 'Savings', 'Clubcard points', etc.

    Example JSON structure:
    {
      "store": "Tesco",
      "date": "02/03/2024",
      "items": [
        { "quantity": 1, "item": "Tesco Bananas Loose", "price": 1.35, "category": "Fruit & Veg" },
        { "quantity": 2, "item": "Tesco Free Range Eggs 6 Pack", "price": 3.70, "category": "Groceries - Dairy" }
      ]
    }
    """

    # --- 5. Make the API Call ---
    try:
        print("Sending request to OpenAI API... This may take a moment.")
        response = client.chat.completions.create(
            model="gpt-4o", # Using the latest and most capable multimodal model
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt_text},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            },
                        },
                    ],
                }
            ],
            max_tokens=2048, # Generous token limit for detailed receipts
        )

        # --- 6. Process the Response ---
        response_text = response.choices[0].message.content
        
        # Clean up potential markdown formatting
        if response_text.startswith("```json"):
            response_text = response_text.strip().replace('```json', '').replace('```', '')
            
        # Parse the JSON string into a Python dictionary
        parsed_json = json.loads(response_text)
        return parsed_json

    except Exception as e:
        print(f"An error occurred during the API call or processing: {e}")
        # Optionally, print the raw response for debugging
        if 'response' in locals() and hasattr(response, 'text'):
            print("\n--- Raw API Response for Debugging ---")
            print(response.text)
            print("------------------------------------")
        return None


# --- Example Usage ---
if __name__ == '__main__':
    # To run this, replace with your actual image path and API key.
    # It's highly recommended to use environment variables for your API key.
    
    # IMPORTANT: Replace with the actual path to your receipt image.
    # For example: './receipts/IMG_3200.jpg'
    receipt_image_path = 'path_to_your_receipt.jpg' 
    
    # IMPORTANT: It's best practice to load your API key from an environment variable.
    # For example: os.environ.get("OPENAI_API_KEY")
    openai_api_key = "sk-YOUR_OPENAI_API_KEY_HERE"

    if receipt_image_path == 'path_to_your_receipt.jpg' or openai_api_key == "sk-YOUR_OPENAI_API_KEY_HERE":
        print("="*60)
        print("!!! PLEASE CONFIGURE YOUR `receipt_image_path` AND `openai_api_key` !!!")
        print("="*60)
    else:
        extracted_data = analyze_receipt_with_openai(receipt_image_path, openai_api_key)

        if extracted_data:
            print("\n--- ✅ Receipt Analysis Complete ---")
            print(f"Store: {extracted_data.get('store', 'N/A')}")
            print(f"Date: {extracted_data.get('date', 'N/A')}")
            print("\n--- Items ---")
            items = extracted_data.get('items', [])
            if items:
                for item in items:
                    print(
                        f"- Qty: {item.get('quantity', 'N/A')}, "
                        f"Item: {item.get('item', 'N/A')}, "
                        f"Price: £{item.get('price', 0.0):.2f}, "
                        f"Category: {item.get('category', 'N/A')}"
                    )
            else:
                print("No items were extracted.")
            print("------------------------------------")
            # You can now save this 'extracted_data' dictionary to your SQLite database.

