In [1]:
from dotenv import load_dotenv
import os
import mimetypes
import json
import base64
from PIL import Image
import google.generativeai as genai
import time

In [2]:
## Configuring API Key
load_dotenv()  # Ensure this line is executed to load the environment variables
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [3]:
## Function to load Gemini Pro vision model and get response
from google.generativeai.types import HarmCategory, HarmBlockThreshold
def get_gemini_response(input, image, prompt):
    model = genai.GenerativeModel('gemini-1.5-flash')
    # model = genai.GenerativeModel('gemini-1.5-pro')
    try:
        # response = model.generate_content([input, image[0], prompt])
        response = model.generate_content(
            [input, image[0], prompt],
            safety_settings={
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                # HarmCategory.HARM_CATEGORY_DANGEROUS: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            }
        )
        
        # Check if the response was blocked due to safety ratings
        if response.candidates and response.candidates[0].content:
            return response.candidates[0].content.parts[0].text
        else:
            # Check safety ratings
            if response.candidates and response.candidates[0].safety_ratings:
                safety_issues = [f"{rating.category}: {rating.probability}"
                                 for rating in response.candidates[0].safety_ratings
                                 if rating.probability != "NEGLIGIBLE"]
                return f"Response blocked due to safety concerns: {', '.join(safety_issues)}"
            else:
                return "No valid response generated. Please try again with a different prompt or image."
    except Exception as e:
        return f"An error occurred: {str(e)}"

In [4]:
def input_image_setup(file_path):
    if file_path is not None and os.path.exists(file_path):
        # Determine the MIME type based on the file extension
        mime_type, _ = mimetypes.guess_type(file_path)
        
        if mime_type and mime_type.startswith('image/'):
            with open(file_path, "rb") as file:
                bytes_data = file.read()

            image_parts = [
                {
                    "mime_type": mime_type,
                    "data": bytes_data,
                }
            ]
            return image_parts
        else:
            raise ValueError("Unsupported file type. Please provide a valid image file.")
    else:
        raise FileNotFoundError("No file provided or file does not exist.")

In [5]:
def encode_image_to_base64(file_path):
    with open(file_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

In [6]:
def classify_document(image_data):
    # Simple classification prompt
    classification_prompt = """
    You are an expert in understanding Indian Documents like Aadhaar card, Pancard, Gate scorecard and birth certificates.
    Identify whether the given document is an Aadhaar card or PAN card or Gate scorecard or birth certificate.
    Give 'aadhar' as output if it is Aadhaar card, give 'pan' if it is Pancard,
    Give 'gatescore' as output if it is Gate Scorecard, give 'birth_cartificate' if it is birth certificate.
    """
    # Get the classification result
    classification_result = get_gemini_response(classification_prompt, image_data, "")
    return classification_result.strip().lower()

ZERO SHOT PROMPTING FOR DATA EXTARCTION

In [71]:
def ExtractData(image_file_path):
    start_time = time.time()

    # Display the uploaded image
    # image = Image.open(image_file_path)
    # image.show()

    # Convert the uploaded file to bytes and process
    image_data = input_image_setup(image_file_path)

    # Classify the document type
    document_type = classify_document(image_data)
    print(f"Document type detected: {document_type}")


    end_time = time.time()  # End the timer
    print(f"Time taken for doc detection: {end_time - start_time:.2f} seconds")

    # Set the appropriate few-shot examples and input prompt based on the document type
    if "aadhar" in document_type:
        # few_shot_examples = aadhaar_few_shot_examples
        input_text = "Extract the name, dob, gender, Aadhar Number from the provided Aadharcard Document."
    elif "pan" in document_type:
        # few_shot_examples = pan_few_shot_examples
        input_text = "Extract the name, father's name, dob, PAN Number from the provided Pancard Document."
    elif "gatescore" in document_type:
        # few_shot_examples = pan_few_shot_examples
        input_text = "Extract the name, Registration number, Examination paper, Gate score, Marks out of 100 from the provided gate scorecard document."
    elif "birth_certificate" in document_type:
        # few_shot_examples = pan_few_shot_examples
        input_text = "Extract the name, father's name, dob from the provided Birthcertificate document."
    else:
        print("Unknown document type detected. Please provide a proper image.")
        return

    input_prompt = """
    You are an expert in understanding Indian Documents like Aadhaar card and Pancard and birth certificates and Gate scorecard in any Indian Language.
    You will recieve input document as image and input text for question on input image document.
    Questions will based on the extraction of data from the image like name, dob, father's name, aadharnumber, pannumber or address or score, marks etc.
    You have to give the json reponse of this details. please don't include any other text expect the json.
    Don't write json and ``` in the output. Only return the json.
    """
    response = get_gemini_response(input_prompt, image_data, input_text)
    
    if response is None:
        print("No valid response generated. Please give a proper prompt.")
    elif response.startswith("Response blocked due to safety concerns:"):
        print("Response blocked due to safety concerns. Please use a different prompt.")
    elif response.startswith("An error occurred:"):
        print("An error occurred. Please use a different prompt.")
    else:
        try:
            response_json = json.loads(response)
            # print("Extracted Information:")
            # print(json.dumps(response_json, indent=4))
            end_time = time.time()  # End the timer
            print(f"Total Time taken for data extraction with document detection: {end_time - start_time:.2f} seconds")
            return response_json
        except json.JSONDecodeError:
            print("The response is not a valid JSON. Here is the response text:")
            print(response)
    return response


In [72]:
# Manually provide the path to the image file
# image_file_path = "Aadharcard-Data/sample_image3.jpeg"
# image_file_path = "Pancard-Data/pan3.jpeg"
# image_file_path = "Gatescorecard-Data/gate1.jpeg"
image_file_path = "Birthcertificate-Data/meet.jpeg"

extracted_data = ExtractData(image_file_path)    

Document type detected: birth_certificate
Time taken for doc detection: 2.04 seconds
Total Time taken for data extraction with document detection: 4.85 seconds


In [73]:
extracted_data
# now if the data is in another language then need to traslate it to english
# and if the detected language is other than english then try with one extra prompt for reprompting with the form data

{'name': 'મિત કુમાર', "father's name": 'વ્યાસ ભન', 'dob': '૧૦/૧૨/૦૧'}

From here to


In [59]:
#Detect the languge

from langdetect import detect
first_key, first_value = next(iter(extracted_data.items()))
detected_lang = detect(first_value)
detected_lang


'gu'

In [60]:

#now make a dictionary with all the languages full form with shortform

detected_lang = "gujarati"

In [67]:
# Cheking again with actual name from form data (Improve response)
# if required make dictionary of numebrs for 0 to 9 


def CheckAnotherLanguageData(image_file_path, detected_lang):
    name = "Mit kumar"
    father_name = "Niranjanbhai jesangbhai patel"
    dob = "10/11/01"
    
    image_data = input_image_setup(image_file_path)
    #make if else cases for document
    input_text = "Extract the name, father's name, dob from the provided Birthcertificate document, When extracting the details please see if the name if name is {name}, father's name is {father_name} and dob is {dob}"
    input_prompt="""
    In the document all the details are in {detected_lang} not in english so we to be very carefull when extracting the data.
    Yor are a expert in the detection of text of {detected_lang} from the image document.
    Now extract the related information in {detected_lang} language it self.
    In the input you will get some details also you have to only related those details so you can easily eaxtract the data from the image if that language is not properly visible
    Don't write json and ``` in the output. Only return the json.
    """

    # Now translate all the details from {detected_lang} to english and make a json.
    response = get_gemini_response(input_prompt, image_data, input_text)
    return response
imporoved_json = CheckAnotherLanguageData(image_file_path, detected_lang=detected_lang)

In [68]:
imporoved_json

'{\n"name": "મિતુકુમાર",\n"father_name": "જયેન્દ્રભાઈ",\n"dob": "૧૦ / ૧૨ / ૦૧"\n}'

HERE


In [74]:
#Traslate the json in to the english
from deep_translator import GoogleTranslator

def translate_text(text, dest_language='en'):
    translator = GoogleTranslator(source='auto', target=dest_language)
    translated_json = {}

    for key, value in text.items():
        translated_key = translator.translate(key)
        translated_value = translator.translate(value)
        translated_json[translated_key] = translated_value
    
    return translated_json
traslated_json = translate_text(extracted_data)

In [75]:
traslated_json

{'name': 'Mit Kumar', "father's name": 'Vyasa Bhan', 'dob': '૧૦/૧૨/૦૧'}