In [None]:
FIREWORKS_API_KEY = "use your api key"

### Code starts

In [None]:
import os
import base64
from datetime import date
from pydantic import BaseModel, Field
import fireworks.client
from typing import List


def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

  
def extract_document_type_from_filename(file_path):
    file_name = os.path.basename(file_path).lower()
    if "passport" in file_name:
        return "passport"
    elif "license" in file_name:
        return "license"

model = "accounts/fireworks/models/deepseek-v3"
# model = "accounts/fireworks/models/phi-3-vision-128k-instruct"

def extract_identity_info(image_path, document_type):
    image_base64 = encode_image(image_path)
    fireworks.client.api_key = FIREWORKS_API_KEY

    # Create a different Pydantic model and prompt based on the document type
    if document_type == "license":
        class IdentityData(BaseModel):
            Name: str
            Date_of_birth: date
            Address: str
            Date_of_expiry: date
            Date_of_issuance: date
            Class: str = Field(
                ...,
                description="Class is usually a single letter like A or C"
            )
            Sex: str = Field(
                ...,
                description="Sex is usally a single letter like M or F against Sex")
            
            Height: str = Field(
                ...,
                description="Height can be written againts HGT or height"
            )
            Eyes: str
            Weight: str = Field(
                ...,
                description="Weight is written againts WGT. If not present return None"
            )
            License_number: str = Field(
                ...,
                description="License number appears after DL or DLN"
            )
        
        user_prompt = (
            """Extract the data: name, date of birth, address, date of expiry (EXP), date of issuance (ISS), and license number. Class is usally a single letter like A or C For license documents, the identification number appears after DL or DLN. If you are unable to extract any information, return None againts the field."""
        )

    elif document_type == "passport":
        class IdentityData(BaseModel):
            Name: str
            Date_of_birth: date
            Address: str
            Date_of_expiry: date
            Date_of_issuance: date
            Passport_number: str = Field(
                ...,
                description="Passport number is shown under PASSPORT NO."
            )
        
        user_prompt = (
            """Extract the data: name, date of birth, address, date of expiry (EXP), date of issuance (ISS), and passport number. For passports, the number is displayed under PASSPORT NO. If you are unable to extract any information, return None."""
        )
    else:
    
        class IdentityData(BaseModel):
            Name: str
            Date_of_birth: date
            Address: str
            Date_of_expiry: date
            Date_of_issuance: date
            Document_identification_number: str = Field(
                ...,
                description="General identification number"
            )
        
        user_prompt = (
            "Extract the data: name, date of birth, address, date of expiry (EXP), "
            "date of issuance (ISS), and document identification number. If you are "
            "unable to extract any information, return None."
        )

    system_message = (
        "You are a vision AI model for image analysis. You can view images. The user can include "
        "images with their messages as input. Expect to receive and analyze attached images as your primary function."
    )

    response = fireworks.client.ChatCompletion.create(
        model=model,
        response_format={"type": "json_object", "schema": IdentityData.model_json_schema()},
        messages=[
            {"role": "system", "content": system_message},
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": user_prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{image_base64}#transform=inline"}
                    }
                ]
            }
        ]
    )

    return response.choices[0].message.content



In [14]:
def process_identity_documents(folder_path):
    supported_extensions = ('.jpg', '.jpeg', '.png')
    results = {}

    for file_name in os.listdir(folder_path):
        if file_name.lower().endswith(supported_extensions):
            file_path = os.path.join(folder_path, file_name)
            print(f"Processing file: {file_path}")
            document_type = extract_document_type_from_filename(file_name)
            identity_info = extract_identity_info(file_path, document_type)
            results[file_name] = identity_info
    return results


if __name__ == "__main__":
    folder = "Identity Documents"  
    analysis_results = process_identity_documents(folder)

    for file_name, info in analysis_results.items():
        print(f"\nResults for {file_name}:")
        print(info)


Processing file: Identity Documents\License 1.png
Processing file: Identity Documents\License-2.jpg
Processing file: Identity Documents\License-3.jpeg
Processing file: Identity Documents\passport-1.jpeg
Processing file: Identity Documents\passport-2.jpg

Results for License 1.png:
{
"Name": "IMA CARDHOLDER",
"Date_of_birth": "08/31/1977",
"Address": "2570 24TH STREET, ANYTOWN, CA 95818",
"Date_of_expiry": "08/31/2014",
"Date_of_issuance": "08/31/2009",
"Class": "C",
"Sex": "F",
"Height": "5'-05\"",
"Eyes": "BRN",
"Weight": "125 lb",
"License_number": "11234568"
}

Results for License-2.jpg:
{
"Name": "Janice Sample",
"Date_of_birth": "01/07/2005",
"Address": "123 MAIN STREET APT. 1, HARRISBURG, PA 17101-0000",
"Date_of_expiry": "01/08/2026",
"Date_of_issuance": "01/07/2022",
"Class": "C",
"Sex": "F",
"Height": "5'-05\"",
"Eyes": "BLU",
"Weight": "None",
"License_number": "99 999 999"
}

Results for License-3.jpeg:
{
"Name": "None",
"Date_of_birth": "None",
"Address": "None",
"Date_of_e

### Sandbox

In [8]:
# Function to extract identity verification information from a document image
class identity_data(BaseModel):
    Name: str
    Date_of_birth: str 
    Address: str
    Date_of_expiry: str 
    Date_of_issuance: str 
    Document_identification_number: str = Field(pattern="^(99 999 9999| A56789458|127890875|000678156729)$")
    # DLN: str
    # Passport_number: str  

In [9]:
system_message= "You are a vision AI model for image analysis. You can view images. The user can include images with their messages as input. Expect to receive and analyze attached images as your primary function."

user_prompt= "Extract the data name, date of birth, address, date of expiry, date of issuance (if possible), document type in a JSON format. ISS or Iss stand for date of issuance and EXP or Exp stand for date of expiry. REMEMBER thast date of issuance would always be before date of expiry. Document identification number would be alphanumeric. For license,  Document identification number appears after DL or DLN and for PASSPORT IT SHOWS UNDER PASSPORT NO. The number at the bottom of the id is NOT AN IDENTIFICATION NUMBER. If you are unable to extract any information return None."

In [10]:
import fireworks.client
import base64

# Helper function to encode the image
def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

# # The path to your image
image_path = "Identity Documents\License-3.jpeg"

# The base64 string of the image
image_base64 = encode_image(image_path)

fireworks.client.api_key = FIREWORKS_API_KEY

response = fireworks.client.ChatCompletion.create(
  # model = "accounts/fireworks/models/phi-3-vision-128k-instruct",
  model= "accounts/fireworks/models/deepseek-v3",
  # response_format={ "type": "json_object" , "schema": identity_data.model_json_schema()},
  messages = [{"role": "system", "content": system_message},{
    "role": "user",
    "content": [{
      "type": "text",
      "text": "extract key information in this image?",
    }, {
      "type": "image_url",
      "image_url": {
        "url": f"data:image/jpeg;base64,{image_base64}#transform=inline"
      },
    }, ],
  }],
)
print(response.choices[0].message.content)

  image_path = "Identity Documents\License-3.jpeg"


The provided text appears to be a repetitive sequence of numbers and some contextual information. Here’s the key information extracted:

1. **Location**:  
   - **State**: North Carolina  

2. **Dates**:  
   - **May 5, 2024**  
   - **05/28/2024**  
   - **05/28/2029**  

3. **Identifier**:  
   - **5 DD 12345689**  

4. **Status**:  
   - **VETERAN**  

5. **Repetitive Numbers**:  
   - A long sequence of repeated numbers, primarily **123456789**.  

This seems to be part of a document or record, possibly related to identification or licensing, but the repetitive numbers suggest a placeholder or dummy data.


In [None]:
# identity_grammar = """
# root ::= identity-record

# identity-record ::= "Name: " name " Date of Birth: " dob " Gender: " gender " Address: " address " Date of expiry: " EXP " Date of issuance: " ISS " Identification Number: " id

# name ::= ("John Doe" | "Jane Smith" | "Alice Johnson" | "Bob Brown")
# dob ::= ("02/01/2008")
# gender ::= ("Male" | "Female" | "Non-binary")
# address ::= ("218 Eddy Street, Ithaca, NY, 14850")
# EXP ::= ("05/07/2029")
# ISS ::= ("05/07/2024")
# id ::= ("23 444 889" | "V24568104")
# """