<a target="_blank" href="https://colab.research.google.com/github/jmanuelc87/nmp-autoavanza/blob/main/notebooks/MontePiedad_Extraction.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [1]:
import os
import csv
import glob
import json
import math
import torch
import base64
import concurrent.futures
import threading
import numpy as np
import pytesseract as pyt
import matplotlib.pyplot as plt

from tqdm import tqdm

from pydantic import BaseModel, Field

from langchain.output_parsers import PydanticOutputParser
from langchain.chains import TransformChain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

## Extract Information using ChatOpenAI

In [2]:
if "OPENAI_API_KEY" not in os.environ:
    os.environ['OPENAI_ORG_ID'] = '***'
    os.environ['OPENAI_PROJECT_ID'] = '***'
    os.environ['OPENAI_API_KEY'] = '***'

In [3]:
llm = ChatOpenAI(
    model = "*",
    temperature=0,
    base_url='https://8000-01jv65ke6030mp4ecew7n9dppy.cloudspaces.litng.ai/v1',
)

In [4]:
messages = [
    {
        "role": "system",
        "content": "Eres un asistente lector de documentos servicial, tus respuestas son concisas y usando OCR extraes los campos de la imagen.",
    },
    {
        "role": "user",
        "content": [
            {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,{image}"}},
        ],
    },
]

In [5]:
class Factura(BaseModel):
    cliente: str = Field(description="El nombre del cliente en la factura")
    fecha: str = Field(description="La fecha de la factura")
    rfc: str = Field(description="registro federal de contribuyentes RFC")
    marca: str = Field(description="La marca del vehiculo")
    modelo: str = Field(description="El modelo del vehiculo")
    clase: str = Field(description="La clase del vehiculo")
    tipo: str = Field(description="El tipo del vehiculo")
    no_puertas: str = Field(description="La cantidad de puertas")
    combustible: str = Field(description="El tipo de combustible")
    motor: str = Field(description="El tipo del motor")
    descripcion: str = Field(description="La descripcion del vehiculo")
    color: str = Field(description="El color exterior del vehiculo")
    total: str = Field(description="El total de la factura")

In [6]:
class PersonalID(BaseModel):
    nombre: str = Field(description="El nombre de la persona en la credencial")
    domicilio: str = Field(description="El domicilio de la persona en la credencial")

In [7]:
class VehicleID(BaseModel):
    propietario: str = Field(description="El nombre de la persona en la tarjeta de circulacion")
    vehiculo: str = Field(description="El tipo de vehiculo en la en la tarjeta de circulacion")
    marca: str = Field(description="El nombre de la marca en la tarjeta de circulacion")
    modelo: str = Field(description="El nombre del modelo en la tarjeta de circulacion")
    placa: str = Field(description="La placa del vehiculo como aparece en la tarjeta de circulacion")
    clase_tipo: str = Field(desciption="El nombre de la clase y tipo en la tarjeta de circulacion")
    fecha_expedicion: str = Field(description="La fecha de expedicion de la tarjeta de circulacion")
    vigencia: str = Field(description="La fecha de vigencia de la tarjeta de circulacion")
    origen: str = Field(description="El origen del vehiculo en la tarjeta de circulacion")

In [8]:
prompt = ChatPromptTemplate(messages=messages)

In [9]:
def load_image(inputs):
    """Load image from file and encode it as base64."""
    image_path = inputs["image_path"]
  
    def encode_image(image_path):
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    image_base64 = encode_image(image_path)
    return {"image": image_base64}

In [10]:
load_image_chain = TransformChain(
    input_variables=["image_path"],
    output_variables=["image"],
    transform=load_image
)

In [11]:
parser = PydanticOutputParser(pydantic_object=Factura)

In [12]:
vision_chain = load_image_chain | prompt | llm.bind_tools([Factura])

In [13]:
image_path = "./data/bronze/BASE_AUTOAVANZA/documentos_clean/FAC_FRENTE/Caso 1_TK 62853-1 FAC_FRENTE_otsu.jpg"

In [14]:
response = vision_chain.invoke(input={"image_path": image_path})

In [15]:
import pprint

pprint.pprint(response.tool_calls[0]['args'])

{'clase': 'CAMIONETA',
 'cliente': 'RODRIGUEZ FLIZONDO FRANCISCO',
 'color': 'NEGRO',
 'combustible': 'GASOLINA',
 'descripcion': 'UN VEHICULO NUEVO 7895 TOYOTA HILUX DOB CAB SR MODELO 2019 '
                'MOTOR 2.7 LTS. TRANSMISION MANUAL ORIGEN IMPORTADA COLOR EXT. '
                '1E7 COLOR INT. NEGRO',
 'fecha': '2019-10-01T17:39:51',
 'marca': 'TOYOTA',
 'modelo': 'HILUX',
 'motor': '2TR-AG 88.6',
 'no_puertas': '4',
 'rfc': 'ROEF-690608-6G3',
 'tipo': 'HILUX DOB CAB SR',
 'total': '$325,217.24'}


In [16]:
csv_file = "./data/bronze/BASE_AUTOAVANZA/documents/vehicles.csv"
invoices = glob.glob("./data/bronze/BASE_AUTOAVANZA/documentos_clean/FAC_FRENTE/*_otsu.jpg")

In [17]:
def extract_documents(documents, out_file, chain):
    t = tqdm(total=len(documents))

    def process_document(doc):
        response = chain.invoke(input={"image_path": doc})
        data = {}
        if len(response.tool_calls) > 0:
            data = response.tool_calls[0]['args']
        t.update()
        return data

    def write_document(data):
        fieldnames = data.keys()
        try:
            with open(out_file, "a", newline="") as f:
                writer = csv.DictWriter(f, fieldnames=fieldnames)

                if f.tell() == 0:
                    writer.writeheader()

                writer.writerow(data)
        except Exception as e:
            print(f"Error {e} in {data}")

    futures = []

    with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
        while len(documents) > 0:
            doc = documents.pop()
            future = executor.submit(process_document, doc)
            futures.append(future)
    
    for f in futures:
        data = f.result()
        write_document(data)

In [18]:
extract_documents(invoices, csv_file, vision_chain)

100%|██████████| 37/37 [03:52<00:00,  6.28s/it]


In [19]:
id_vision_chain = load_image_chain | prompt | llm.bind_tools([PersonalID])

In [20]:
csv_file = "./data/bronze/BASE_AUTOAVANZA/documents/credenciales.csv"
personal_ids = glob.glob("./data/bronze/BASE_AUTOAVANZA/documentos_clean/INE_FRENTE/*_adaptativo.jpg")

In [21]:
extract_documents(personal_ids, csv_file, id_vision_chain)

100%|██████████| 36/36 [01:29<00:00,  2.49s/it]


In [22]:
vehicle_id_vision_chain = load_image_chain | prompt | llm.bind_tools([VehicleID])

In [23]:
csv_file = "./data/bronze/BASE_AUTOAVANZA/documents/tarjetas_circulacion.csv"
tcs = glob.glob("./data/bronze/BASE_AUTOAVANZA/documentos_clean/TC_FRENTE/*_adaptativo.jpg")

In [24]:
extract_documents(tcs, csv_file, vehicle_id_vision_chain)

100%|██████████| 35/35 [02:28<00:00,  4.24s/it]
