<a href="https://colab.research.google.com/github/Vajja-Premsai/.net_Journey/blob/main/Extract_Information_From_Image_using_GEMINI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
!pip install -q -U google-generativeai

In [9]:
import google.generativeai as genai

## SET API KEY

In [11]:
# Used to securely store your API key
from google.colab import userdata

GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

genai.configure(api_key=GOOGLE_API_KEY)

In [13]:
# Model Configuration
MODEL_CONFIG = {
  "temperature": 0.2,
  "top_p": 1,
  "top_k": 32,
  "max_output_tokens": 4096,
}

## Safety Settings of Model
safety_settings = [
  {
    "category": "HARM_CATEGORY_HARASSMENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_HATE_SPEECH",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  },
  {
    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
  }
]

## LOAD GEMINI MODEL WITH MODEL CONFIGURATIONS

In [14]:
model = genai.GenerativeModel(model_name = "gemini-1.5-flash",
                              generation_config = MODEL_CONFIG,
                              safety_settings = safety_settings)

## DEFINE IMAGE FORMAT TO INPUT IN GEMINI

In [15]:
from pathlib import Path

def image_format(image_path):
    img = Path(image_path)

    if not img.exists():
        raise FileNotFoundError(f"Could not find image: {img}")

    image_parts = [
        {
            "mime_type": "image/png", ## Mime type are PNG - image/png. JPEG - image/jpeg. WEBP - image/webp
            "data": img.read_bytes()
        }
    ]
    return image_parts


## GEMINI MODEL OUTPUT

In [16]:
def gemini_output(image_path, system_prompt, user_prompt):

    image_info = image_format(image_path)
    input_prompt= [system_prompt, image_info[0], user_prompt]
    response = model.generate_content(input_prompt)
    return response.text

## EXTRACTING PART OF THE INFORMATION


In [17]:
system_prompt = """
               You are a specialist in comprehending receipts.
               Input images in the form of receipts will be provided to you,
               and your task is to respond to questions based on the content of the input image.
               """

image_path = "/content/p1.png"

user_prompt = "Extract the complete text from the image and do not add any addition text"

gemini_output(image_path, system_prompt, user_prompt)

"Here's a summary of the information from the provided Jamaican Driver's License image:\n\n**Government of Jamaica Driver's Licence**\n\n* **Class:** GENERAL\n* **TRN:** 121068820\n* **Date Issued:** 2022-05-05\n* **Collectorate:** 011 KINGSTON\n* **Expiry Date:** 2026-07-20\n* **Birth Date:** 1990-07-20\n* **Sex:** F\n* **Name:** BROWN JELICA ALICIA\n* **Address:** 30 BROTHERTON AVENUE KINGSTON 13"

## EXTRACTING WHOLE DATA IN JSON FROM INVOICE

In [19]:
system_prompt = """
               You are a specialist in comprehending receipts.
               Input images in the form of receipts will be provided to you,
               and your task is to respond to questions based on the content of the input image.
               """
#system_prompt = "Convert Invoice data into json format with appropriate json tags as required for the data in image "
image_path = "/content/p1.png"
user_prompt = "Convert data into json format with appropriate json tags as required for the data in image "

In [20]:
output = gemini_output(image_path, system_prompt, user_prompt)

In [21]:
from IPython.display import Markdown
Markdown(output)

```json
{
  "driver_license": {
    "government": "GOVERNMENT OF JAMAICA",
    "license_type": "DRIVER'S LICENCE",
    "class": "GENERAL",
    "trn": "121068820",
    "date_issued": "2022-05-05",
    "collectorate": "011 KINGSTON",
    "expiry_date": "2026-07-20",
    "birth_date": "1990-07-20",
    "sex": "F",
    "name": {
      "last_name": "BROWN",
      "first_name": "JELICA ALICIA"
    },
    "address": "30 BROTHERTON AVENUE\nKINGSTON 13",
    "signature": "Brown"
  }
}
```