## This is a trivial example notebook for Document Classification
This is a very simple solution using an LLM.

In [1]:
import base64
from IPython.display import display, HTML

import instructor
from openai import OpenAI
from pydantic import BaseModel
from enum import Enum

### Let's view our example documents

In [2]:
img1 = "example_docs/img1.png"
img2 = "example_docs/img2.png"
img3 = "example_docs/img3.png"

html_code = f"""
<div style="width: 100%; overflow-x: auto; white-space: nowrap;">
    <img src="{img1}" style="width: 800px">
    <img src="{img2}" style="width: 800px;">
    <img src="{img3}" style="width: 800px;">
</div>
"""

display(HTML(html_code))

ground_truth = {img1:"Purchase Order", img2: "Invoice", img3: "Tax Form"}

## Let's use openai's chatgpt-4oto classify these pages

### First let's write some code to call ChatGPT

In [3]:
client = instructor.from_openai(OpenAI())

In [4]:
class DocumentType(str, Enum):
    PURCHASE_ORDER = "Purchase Order"
    INVOICE = "Invoice"
    TAX_DOCUMENT = "Tax Document"

In [5]:
def classifier(img, response_model=DocumentType):
    with open(img, "rb") as f:
        encoded_file = base64.b64encode(f.read()).decode("utf-8")
    completion = client.chat.completions.create(
        model="gpt-4o",
        response_model=response_model,
        messages=[
            {"role": "system", "content": "Find the document type."},
            {"role": "user", "content": {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{encoded_file}"}}},
        ],
    )
    return completion

### Let's double check to see if the results are correct

In [6]:
prediction = classifier(img1)
print(f"The LLM predicted: {prediction}. This document is: {ground_truth[img1]}")
prediction2 = classifier(img2)
print(f"The LLM predicted: {prediction2}. This document is: {ground_truth[img2]}")
prediction3 = classifier(img3)
print(f"The LLM predicted: {prediction3}. This document is: {ground_truth[img3]}")

The LLM predicted: DocumentType.PURCHASE_ORDER. This document is: Purchase Order
The LLM predicted: DocumentType.INVOICE. This document is: Invoice
The LLM predicted: DocumentType.TAX_DOCUMENT. This document is: Tax Form


### We can add another document type by simply modifying the DocumentType class
Let's add an other class which will hopefully collect all documents which are not defined

In [25]:
img4 = "example_docs/img4.png"

In [26]:
html_code = f"""
<div style="width: 100%; overflow-x: auto; white-space: nowrap;">
    <img src="{img4}" style="width: 800px">
</div>
"""
display(HTML(html_code))

Modify the DocumentType to include "Other"

In [27]:
class DocumentType2(str, Enum):
    PURCHASE_ORDER = "Purchase Order"
    INVOICE = "Invoice"
    TAX_DOCUMENT = "Tax Document"
    OTHER = "Other"

In [28]:
prediction = classifier(img4 ,response_model=DocumentType2)
prediction

<DocumentType2.OTHER: 'Other'>