##### Copyright 2025 Google LLC.

In [39]:
# @title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Extract Information From INVOICE

This notebook demonstrates how to use Google Gemini Pro Vision to automatically extract structured data (like invoice numbers, dates, line items, and totals) from invoice images. By converting images to Base64 and leveraging a custom JSON-structured prompt, it outputs machine-readable data for easy integration into accounting systems or spreadsheets. The solution addresses common pitfalls like local file restrictions and API errors, offering a scalable workflow for accounts payable automation, expense tracking, and document digitization using multimodal AI.



<a target="_blank" href="https://colab.research.google.com/github/google-gemini/cookbook/blob/main/examples/Extract_Information_From_INVOICE.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" height=30/></a>

In [40]:
%pip install -U -q "google-genai>=1.0.0"

In [41]:
from google import genai

from IPython.display import Markdown

## Configure your API key

To run the following cell, your API key must be stored it in a Colab Secret named `GOOGLE_API_KEY`. If you don't already have an API key, or you're not sure how to create a Colab Secret, see [Authentication](https://github.com/google-gemini/cookbook/blob/main/quickstarts/Authentication.ipynb) for an example.

In [42]:
from google.colab import userdata
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
client = genai.Client(api_key=GOOGLE_API_KEY)

In [43]:
MODEL_ID="gemini-2.0-flash" # @param ["gemini-2.0-flash-lite","gemini-2.0-flash","gemini-2.5-pro-exp-03-25"] {"allow-input":true, isTemplate: true}

In [44]:
!pip install -q -U google-generativeai

In [45]:
import google.generativeai as genai

# LIST OF MODELS

In [47]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.5-pro-exp-03-25
models/gemini-2.5-pro-preview-03-25
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-exp-image-generation
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinki

In [48]:
# Model Configuration
MODEL_CONFIG = {
  "temperature": 0.2,
  "top_p": 1,
  "top_k": 32,
  "max_output_tokens": 4096,
}

## Safety Settings of Model
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  }
]

# LOAD GEMINI MODEL WITH MODEL CONFIGURATIONS

In [49]:
model = genai.GenerativeModel(model_name = "gemini-2.0-flash",
                              generation_config = MODEL_CONFIG,
                              safety_settings = safety_settings)

# DEFINE IMAGE FORMAT TO INPUT IN GEMINI

In [50]:
from pathlib import Path

def image_format(image_path):
    img = Path(image_path)

    if not img.exists():
        raise FileNotFoundError(f"Could not find image: {img}")

    image_parts = [
        {
            "mime_type": "image/png", ## Mime type are PNG - image/png. JPEG - image/jpeg. WEBP - image/webp
            "data": img.read_bytes()
        }
    ]
    return image_parts


# GEMINI MODEL OUTPUT


In [51]:
def gemini_output(image_path, system_prompt, user_prompt):

    image_info = image_format(image_path)
    input_prompt= [system_prompt, image_info[0], user_prompt]
    response = model.generate_content(input_prompt)
    return response.text

# EXTRACTING PART OF THE INFORMATION FROM INVOICE

In [52]:
system_prompt = """
               You are a specialist in comprehending receipts.
               Input images in the form of receipts will be provided to you,
               and your task is to respond to questions based on the content of the input image.
               """

image_path = "/content/sample_invoice.jpg"

user_prompt = "What is the balance amount in the image?"

gemini_output(image_path, system_prompt, user_prompt)

'The balance amount in the image is $116.00.'

# EXTRACTING WHOLE DATA IN JSON FROM INVOICE


In [56]:
system_prompt = """
               You are a specialist in comprehending receipts.
               Input images in the form of receipts will be provided to you,
               and your task is to respond to questions based on the content of the input image.
               """
#system_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "
image_path = "/content/sample_invoice.jpg"
user_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "

In [57]:
output = gemini_output(image_path, system_prompt, user_prompt)

In [58]:
from IPython.display import Markdown
Markdown(output)

```json
{
  "invoice": {
    "invoice_to": {
      "name": "NAME SURENAME",
      "address": "1700 Biscayne Blvd, Miami, FL 33132, United Satates"
    },
    "invoice_number": "1234",
    "date": "10/10/2024",
    "items": [
      {
        "no": "1",
        "service_description": "Lorem ipsum dolor sit",
        "price": "$25.00",
        "quantity": "1",
        "total": "$25.00"
      },
      {
        "no": "2",
        "service_description": "Lorem ipsum dolor sit",
        "price": "$25.00",
        "quantity": "1",
        "total": "$25.00"
      },
      {
        "no": "3",
        "service_description": "Lorem ipsum dolor sit",
        "price": "$50.00",
        "quantity": "1",
        "total": "$50.00"
      }
    ],
    "subtotal": "$100.00",
    "shipping": "$15.00",
    "tax_rate": "$1.00",
    "total": "$116.00",
    "terms_and_conditions": "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat.",
    "questions": {
      "email": "mail@yourcompany.com",
      "call": "+12 345 6789 0"
    },
    "payment_info": {
      "account_number": "1234 5677 5432",
      "account_name": "Lorem ipsum",
      "bank_details": "Add your bank detais"
    },
    "social_media": {
      "facebook": "@Username",
      "youtube": "@Username",
      "instagram": "@Username"
    }
  }
}
```