<a href="https://colab.research.google.com/github/fsommers/ICMR24/blob/main/Structured_Data_with_Langchain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain
!pip install langchain-openai

In [None]:
import base64
import langchain
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.messages import HumanMessage
from langchain_core.output_parsers import JsonOutputParser
from langchain.chains import TransformChain
from langchain_core.runnables import chain

from PIL import Image

from langchain_openai import ChatOpenAI

from google.colab import userdata
OPENAI_API_KEY=userdata.get('OPENAI_API_KEY')

class DocumentItem(BaseModel):
  """Structured information extracted from an image"""
  customer_name: str = Field("The name of the customer")
  address: str = Field("The address of the customer")
  vin: str = Field("The Vehicle Identification Number (VIN)")
  car: str = Field("The car make and model")

auto_contract_prompt = """
  You are an expert at information extraction from images of automobile loan contracts.

  Given this page of an automobile loan contract, extract the following information:
    - The name of the customer
    - The address of the customer
    - The Vehicle Identification Number (VIN)
    - The car make and model

    Do not guess. If some information is missing just return "N/A" in the relevant field.
    If you determine that the image is not of an automobile loan contract, just set all the fields in the formatting instructions to "N/A".

    You must obey the output format under all circumstances. Please follow the formatting instructions exactly.
    Do not return any additional comments or explanation.
"""

def load_image(inputs: dict) -> dict:
  """Load the image from a file and encode it as base64"""
  image_path = inputs["image_path"]

  def encode_image(image_path):
    with open(image_path, "rb") as image_file:
      return base64.b64encode(image_file.read()).decode('utf-8')

  image_base64 = encode_image(image_path)
  return {"image": image_base64}

parser = JsonOutputParser(pydantic_object=DocumentItem)

@chain
def doc_model(inputs: dict) -> str | list[str] | dict:
  """Invoke the model with an image and prompt"""
  model = ChatOpenAI(temperature=0.0, model="gpt-4-turbo", max_tokens=1024, api_key=OPENAI_API_KEY)
  msg = model.invoke(
      [HumanMessage(
          content=[
              {"type": "text", "text": inputs["prompt"]},
              {"type": "text", "text": parser.get_format_instructions()},
              {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64, {inputs['image']}"}}
          ]
      )]
  )
  return msg.content

doc_transform = TransformChain(
    input_variables=["image_path"],
    output_variables=["image"],
    transform=load_image
)

def get_document_information(image_path: str) -> dict:
  doc_chain = doc_transform | doc_model | parser
  return doc_chain.invoke({'image_path': f'{image_path}',
                               'prompt': auto_contract_prompt})


file = "/content/drive/MyDrive/DOC_EXAMPLES/UNSEEN/CACONTRACT/2-JvUQx2cEWl3Oqu_M9ni.jpg"
info = get_document_information(file)
print(info)