In [2]:
!pip install pytesseract
!pip install numpy
!pip install google-generativeai
!pip install python-dotenv



In [3]:
import numpy as np
import google.generativeai as genai
import os
from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
load_dotenv()
api_key = os.getenv("GOOGLE_API_KEY")
model = genai.GenerativeModel(model_name = "gemini-1.5-pro")

# Fetching data from images

In [12]:
#using glob to fetch all the image paths in a folder
#then using PIL to open images and add them to a list

import glob
from PIL import Image

In [23]:
# img_array = []
# for img_path in glob.glob("./test_data/images/*.png"):
#     temp = Image.open(img_path)
#     img_array.append(temp)

img1 = Image.open("test_data/images/inv1.png")

In [33]:
template = """{
    "organisation_name" : "",
    "customer_name" : "",
    "invoice_date" : "",
    "invoice_number" : "",
    "items_list" : [{
        "item_name" : "",
        "item_cost" : "",
        "item_quantity" : ""
    }],
    "total_bill" : ""
}"""

prompt = "Given an image of a invoice. Return the appropriate details in the form of the template : \n" + template + "\n. Do not return anything else."

In [38]:
response = model.generate_content([img1, prompt])

In [39]:
response.text

'{\n    "organisation_name" : "Company Name",\n    "customer_name" : "John Smith",\n    "invoice_date" : "06/10/2021",\n    "invoice_number" : "INVO-005",\n    "items_list" : [\n        {\n            "item_name" : "Sink",\n            "item_cost" : "200",\n            "item_quantity" : "2"\n        },\n        {\n            "item_name" : "Nest Smart Filter",\n            "item_cost" : "150",\n            "item_quantity" : "1"\n        },\n        {\n            "item_name" : "Labor Fee",\n            "item_cost" : "50",\n            "item_quantity" : "1"\n        },\n        {\n            "item_name" : "Service Fee",\n            "item_cost" : "25",\n            "item_quantity" : "1"\n        }\n    ],\n    "total_bill" : "$425"\n}'

In [40]:
import json
json.loads(response.text)

{'organisation_name': 'Company Name',
 'customer_name': 'John Smith',
 'invoice_date': '06/10/2021',
 'invoice_number': 'INVO-005',
 'items_list': [{'item_name': 'Sink',
   'item_cost': '200',
   'item_quantity': '2'},
  {'item_name': 'Nest Smart Filter', 'item_cost': '150', 'item_quantity': '1'},
  {'item_name': 'Labor Fee', 'item_cost': '50', 'item_quantity': '1'},
  {'item_name': 'Service Fee', 'item_cost': '25', 'item_quantity': '1'}],
 'total_bill': '$425'}

# Fetching data from pdfs

In [41]:
#since we need an image, we'll have to capture the pdf page
!pip install pdf2image

I0000 00:00:1721372147.060126  574571 work_stealing_thread_pool.cc:320] WorkStealingThreadPoolImpl::PrepareFork


Collecting pdf2image
  Downloading pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)
Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)
Installing collected packages: pdf2image
Successfully installed pdf2image-1.17.0


In [47]:
from pdf2image import convert_from_path

images = convert_from_path("test_data/pdfs/GOLDEN FALCON QUOTE Q2686 (1).pdf")

In [50]:
response = model.generate_content([images[0], prompt])

In [51]:
json.loads(response.text)

{'organisation_name': 'Fenix Aerospace LLC',
 'customer_name': 'GOLDEN FALCON AVIATION',
 'invoice_date': '10/17/2022',
 'invoice_number': '2686',
 'items_list': [{'item_name': 'BRAKE - MULTIPLE DISK',
   'item_cost': '26,000.00',
   'item_quantity': '1'},
  {'item_name': 'BRAKE - MULTIPLE DISK',
   'item_cost': '20,000.00',
   'item_quantity': '1'}],
 'total_bill': '46,000.00'}