In [1]:
from openai import OpenAI
import os

In [2]:
#create OpenAI Object by passing in the API Key
client = OpenAI(api_key="yyyyyyyyyy")

In [None]:
import base64

# Path to the image file
image_path = "receipt1.jpeg"

# Function to convert the image file to a base64 string
def convert_image_to_base64(image_path):
    # Open the image file in binary read mode
    with open(image_path, "rb") as image_file:
        # Read the file's contents
        image_data = image_file.read()
        # Encode the contents to base64
        base64_encoded_data = base64.b64encode(image_data)
        # Convert the base64 bytes to a string
        base64_string = base64_encoded_data.decode("utf-8")
    return base64_string

# Convert the image to a base64 string and store it in a variable
base64_image_string = convert_image_to_base64(image_path)
base64_image_string

### Create JSON Schema
- type - type of the tool
- name - name of the function
- description - Prompt to AI, when to trigger the function

In [4]:
# Define the structure for the function call to itemize a receipt from an image
function_call = [
  {
    "type": "function",  # Specify that this is a function call
    "function": {
      "name": "itemize_receipt",  # The name of the function to be called
      "description": "Itemize a receipt from an image",  # Description of the function's purpose
      "parameters": {
        "type": "object",  # Parameters are structured as an object
        "properties": {
          "vendor": {
            "type": "string",  # The vendor's name will be a string
            "description": "Name of vendor",  # Description of the vendor parameter
          },
          "date": {
            "type": "string",  # The date of purchase will be a string
            "format": "date",  # The format for this string is expected to be a date
            "description": "Date of purchase",  # Description of the date parameter
          },
          "items": {
            "type": "array",  # Items purchased will be an array of objects
            "items": {
              "type": "object",  # Each item in the array is an object
              "properties": {
                  "name": {
                    "type": "string",  # The name of the item as a string
                    "description": "Name of item",  # Description of the name parameter
                  },
                  "price": {
                    "type": "number",  # The price of the item as a number
                    "description": "Price of item",  # Description of the price parameter
                  },
                  "quantity": {
                    "type": "integer",  # The quantity of the item as an integer
                    "description": "Quantity of item",  # Description of the quantity parameter
                  },
                  "category": {
                    "type": "string",  # The category of the item as a string
                    "description": "Category of item",  # Description of the category parameter
                    "enum": ["take-out", "meal", "groceries", "clothing", "electronics", "supplies", "other"],  # Allowed categories
                  },
              },
            },
            "description": "List of items purchased",  # Description of the items parameter
          },
          "payment_method": {
            "type": "string",  # The payment method as a string
            "description": "Payment method",  # Description of the payment method parameter
            "enum": ["cash", "credit", "debit", "mobile", "other"],  # Allowed payment methods
          },
        },
        "required": ["vendor","date","items","payment_method"],  # These fields are mandatory
      },
    }
  }
]

In [6]:
# Import necessary libraries for displaying JSON data and working with JSON objects
from IPython.display import JSON
import json

# Generate a completion by making a chat API call
response = client.chat.completions.create(
  model="gpt-4o-mini",  # Specify the model to use
  messages=[
    {"role": "system", "content": "If the image is a receipt, process the data. If it's not a receipt, ask for a receipt."},
    # Instructs the model to itemize a receipt if the image is valid
    {"role": "user", "content": [
      {"type": "image_url", "image_url": {
        "url": f"data:image/png;base64,{base64_image_string}"}
      }
    ]},
    # User provides an image of a receipt encoded as a base64 string
  ],
  tools=function_call,  # Pass the function call structure to the model
  tool_choice="auto",  # Let the model automatically choose whether to use the function
  temperature=0.0,  # Set temperature to 0 for deterministic output
)

# Print the response received from the API
print(response)

# Parse the JSON data from the response to extract receipt information
receipt_data = json.loads(response.choices[0].message.tool_calls[0].function.arguments)

# Display the JSON data in an expanded view for better readability
JSON(receipt_data, expanded=True)

ChatCompletion(id='chatcmpl-9uptfzf1jJel9ahg8eRJPWEzL5KzX', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, role='assistant', function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_FZgJkXdNAzJDy3mAS8VyoDGh', function=Function(arguments='{"vendor":"Countdown","date":"2023-10-01","items":[{"name":"Apple Simply Red Md","price":1.8,"quantity":1,"category":"groceries"},{"name":"Mini Square Cheese& Chive 8pk","price":3.84,"quantity":1,"category":"groceries"},{"name":"Brioche Sliders 8pk","price":3.0,"quantity":1,"category":"groceries"},{"name":"Paseo 3 Ply T/tissue 24pk Value","price":15.0,"quantity":1,"category":"supplies"},{"name":"Free From Lactose Lite Milk 1 L","price":3.5,"quantity":1,"category":"groceries"},{"name":"Countdown Eggs Barn Size 7 12pk","price":9.5,"quantity":1,"category":"groceries"},{"name":"Countdown Ice Cream Vanilla 2L","price":6.3,"quantity":1,"category":"groceries"},{"name":"Equal Fresh 

<IPython.core.display.JSON object>

In [8]:
# Import pandas for working with tabular data
import pandas as pd

# Load existing expenses from a CSV file into a DataFrame
expenses_df = pd.read_csv("expenses.csv")
expenses_df

Unnamed: 0,Date,Vendor,Name,Quantity,Price,Category,Payment method


In [10]:
# Import date functionality to work with dates
from datetime import date

# Initialize a list to store new rows of data extracted from the receipt
new_rows = []
for item in receipt_data['items']:
  # Iterate through each item in the receipt data

  print(f"Adding item: {item['name']}")
  # Print a message for each item being added
  
  # Create a new row dictionary for each item with relevant details
  new_row = {
    "Date": receipt_data.get("date", date.today().isoformat()),  # Use the date from the receipt or today's date if not available
    "Vendor": receipt_data.get("vendor", ""),  # Use the vendor name from the receipt
    "Name": item.get("name", ""),  # Use the item name
    "Quantity": item.get("quantity", 1),  # Use the quantity, defaulting to 1 if not specified
    "Price": item.get("price", 0),  # Use the price, defaulting to 0 if not specified
    "Category": item.get("category", "Uncategorized"),  # Use the category, defaulting to 'Uncategorized' if not specified
    "Payment method": receipt_data.get("payment_method", "Unknown"),  # Use the payment method from the receipt
  }
  new_rows.append(new_row)  # Add the new row to the list

# Convert the list of new rows to a DataFrame
new_rows_df = pd.DataFrame(new_rows)

# Concatenate the new rows DataFrame to the existing expenses DataFrame
if expenses_df.empty:
  expenses_df = new_rows_df  # If the existing DataFrame is empty, just use the new DataFrame
else:
  expenses_df = pd.concat([expenses_df, new_rows_df], ignore_index=True)  # Otherwise, append the new data

# Display the updated expenses DataFrame
expenses_df

Adding item: Apple Simply Red Md
Adding item: Mini Square Cheese& Chive 8pk
Adding item: Brioche Sliders 8pk
Adding item: Paseo 3 Ply T/tissue 24pk Value
Adding item: Free From Lactose Lite Milk 1 L
Adding item: Countdown Eggs Barn Size 7 12pk
Adding item: Countdown Ice Cream Vanilla 2L
Adding item: Equal Fresh Lean Meat 500g
Adding item: WW Frozen Chicken Nuggets Battered 1kg
Adding item: Asparagus Bunch NZ
Adding item: Westgold Butter Unsalted 400G
Adding item: Dove Bar Regular 4x90g
Adding item: Mars Bar 47g
Adding item: Hrtland Potato Chpssthrn Salt 150g


Unnamed: 0,Date,Vendor,Name,Quantity,Price,Category,Payment method
0,2023-10-01,Countdown,Apple Simply Red Md,1,1.8,groceries,other
1,2023-10-01,Countdown,Mini Square Cheese& Chive 8pk,1,3.84,groceries,other
2,2023-10-01,Countdown,Brioche Sliders 8pk,1,3.0,groceries,other
3,2023-10-01,Countdown,Paseo 3 Ply T/tissue 24pk Value,1,15.0,supplies,other
4,2023-10-01,Countdown,Free From Lactose Lite Milk 1 L,1,3.5,groceries,other
5,2023-10-01,Countdown,Countdown Eggs Barn Size 7 12pk,1,9.5,groceries,other
6,2023-10-01,Countdown,Countdown Ice Cream Vanilla 2L,1,6.3,groceries,other
7,2023-10-01,Countdown,Equal Fresh Lean Meat 500g,1,6.55,groceries,other
8,2023-10-01,Countdown,WW Frozen Chicken Nuggets Battered 1kg,1,11.0,groceries,other
9,2023-10-01,Countdown,Asparagus Bunch NZ,1,1.99,groceries,other


In [11]:
# Save the updated expenses DataFrame back to the CSV file
expenses_df.to_csv('expenses.csv', index=False)