In [1]:
import os
import requests
from langchain_community.chat_models import ChatOpenAI

# OpenAI API Key
os.environ["OPENAI_API_KEY"] = "" # Your OpenAI API Key

In [2]:
# prompts
HEADERS = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",
}

INFO_EXTRACTION_RPOMPT = """
    Analyze the provided images to summarize key information about the applicant's case based on their responses. Focus on accurately capturing:

    - Specifics of the crime(s) they were convicted of, including dates and locations.
    - Their account of the events surrounding the crime, emphasizing their description and any claims of innocence.
    - Any alibi or evidence they provide that supports their case.
    - Connections or relationships with the victim(s) or others involved in the case.
    - Clarify the applicant's stance on their conviction and any aspects they are disputing.

    Ensure to maintain the original meaning and intention of the applicant's responses, avoiding any assumptions or modifications beyond what is explicitly stated in their answers.
"""

MISSINFO_CHECK_PROMPT_TEMPLATE = """
    Review the summarized information extracted from the applicant's intake letter. 
    
    Information: '{background}'. 
    
    Determine if all necessary details are provided, including specifics of the conviction, the applicant's account and evidence, connections with involved parties, and their stance on the conviction. If any key information is missing, respond with 'YES' and draft a letter requesting the specific missing information from the applicant. The letter should be polite, concise, and clearly specify what information is needed and why it is important for their case. If the narrative is complete, simply respond with 'NO'.
    
    {format_instructions}.
"""

CRITERIA_CHECK_PROMPT_TEMPLATE = """
    Assess in details the provided narrative against the Innocence Project's criteria for cases they do not handle, which include consent/transaction cases, self-defense/justification, sustained abuse, illegal substance charges, RICO/Hobbs Act charges, DWI/DUI, fraud/identity theft/forgery, stalking/harassment, and sentencing reduction/overcharge issues.
    
    Narritive: '{background}'.

    1. Step by step, evaluate each criterion, explaining your decision process in details why the case does or does not fit within these excluded categories.
    2. Conclude whether the case should be rejected based on these criteria or if it matches the criteria for further review.
    3. If the narrative matches one of the excluded criteria, draft a polite and concise rejection letter explaining the specific reason(s) why the case does not meet the project's guidelines. If the narrative does not match any excluded criteria, indicate that the case is given to a different team for further handling.
    
    {format_instructions}.
"""

LLM = ChatOpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"))

  warn_deprecated(


# Pdf to Image

In [3]:
import fitz  # Import the PyMuPDF library
import base64
from PIL import Image
from io import BytesIO
from typing import List, Union


def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")


def encode_image_pil(image) -> str:
    """
    Encodes a PIL Image object to a base64 string.
    """
    with BytesIO() as image_buffer:
        image.save(image_buffer, format="PNG")  # Save image to buffer in PNG format
        return base64.b64encode(image_buffer.getvalue()).decode("utf-8")


def pdf_to_images(pdf_path: str) -> List[str]:
    """
    Converts each page of a PDF file into a list of base64-encoded images.

    Args:
        pdf_path (str): The file path of the PDF.

    Returns:
        List of base64-encoded strings, where each string represents an image of a PDF page.
    """
    encoded_images = []  # Initialize an empty list to store the base64 strings

    with fitz.open(pdf_path) as pdf:
        for page in pdf:
            # Render page to a pixmap (an image)
            pix = page.get_pixmap()
            # Convert the pixmap to an image bytes
            img_bytes = pix.tobytes("png")
            # Create a PIL Image object from the bytes
            image = Image.open(BytesIO(img_bytes))
            # Use the modified encode function to get a base64 string
            encoded_image = encode_image_pil(image)
            # Append the base64 string to the list
            encoded_images.append(encoded_image)

    return encoded_images

In [4]:
def analyze_applicant_intake_letters(file_path_or_images: Union[str, List[str]]) -> str:
    """
    Analyzes provided intake letters or images to summarize key information about an applicant's case.

    Args:
        file_path_or_images (Union[str, List[str]]): The file path of the PDF or a list of image file paths containing the applicant's intake letters.

    Returns:
        The response from the API call.
    """
    if isinstance(file_path_or_images, str) and file_path_or_images.endswith(".pdf"):
        encoded_images = pdf_to_images(file_path_or_images)
    elif isinstance(file_path_or_images, list):
        encoded_images = [
            encode_image(image_path)
            for image_path in file_path_or_images
            if image_path.endswith((".png", ".jpeg", ".jpg"))
        ]
    else:
        raise ValueError(
            "Unsupported file format or type. Please provide a PDF path or a list of PNG/JPEG image paths."
        )

    messages_content = [{"type": "text", "text": INFO_EXTRACTION_RPOMPT}] + [
        {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image}"}}
        for image in encoded_images
    ]

    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [
            {
                "role": "user",
                "content": messages_content,
            }
        ],
        "max_tokens": 2000,
    }

    HEADERS = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",
    }

    response = requests.post(
        "https://api.openai.com/v1/chat/completions", headers=HEADERS, json=payload
    )

    return response.json()["choices"][0]["message"]["content"]

In [21]:
file_path = "path/to/your/file.pdf"
background = analyze_applicant_intake_letters(file_path)
background

'The images provided appear to be pages from a handwritten letter by an individual identifying himself as Mr. Archie C. Williams. From the content of the letter, the following key information about the applicant\'s case can be summarized:\n\n- Personal identification details: The applicant, Archie C. Williams, is 35 years old.\n  \n- Specifics of the crime: Williams is serving a life sentence for the attempted murder of a Baton Rouge woman, which he states he did not commit.\n\n- Account of events surrounding the crime: The letter does not detail Williams\' specific account of the events other than him asserting his innocence.\n\n- Alibi or evidence: Williams claims that a DNA test could prove his innocence. He seems to strongly believe that this evidence would be enough to establish his innocence regarding the crime he was convicted for.\n\n- Connections or relationships: He references the victim as the wife of a "big-time Baton Rouge Attorney," suggesting that this connection may hav

# MissingInfo Check

In [5]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field, validator

In [6]:
class MissInfoCheckOutput(BaseModel):
    response: str = Field(
        ...,  
        description="Yes or No reply to the question: 'Is there any missing information?'",
    )
    letter: str = Field(
        ...,
        description="The letter that asks for missing information, provided only if response is 'Yes'.",
    )

    @validator("response")
    def response_must_be_yes_or_no(cls, v):
        if v.lower() not in ["yes", "no"]:
            raise ValueError('Response must be either "yes" or "no".')
        return v.lower()

    class Config:
        schema_extra = {
            "example": {
                "response": "yes",
                "next_steps": "Dear [Applicant Name],\n\nWe have reviewed your submission and found that it is missing critical information needed for further evaluation. Specifically, we require [missing information]. Please provide this at your earliest convenience.\n\nSincerely,\n[Your Name]",
            }
        }

In [7]:
MISSINFO_CHECK_PARSER = JsonOutputParser(pydantic_object=MissInfoCheckOutput)
MISSINFO_CHECK_PARSER.get_format_instructions()

'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"response": {"title": "Response", "description": "Yes or No reply to the question: \'Is there any missing information?\'", "type": "string"}, "letter": {"title": "Letter", "description": "The letter that asks for missing information, provided only if response is \'Yes\'.", "type": "string"}}, "required": ["response", "letter"], "example": {"response": "yes", "letter": "Dear [Applicant Name],\\n\\nWe have reviewed your submission and found that it is missing critical information needed for further evaluation. Specificall

In [8]:
MISSINFO_CHECK_PROMPT = PromptTemplate(
    input_variables=["background"],
    template=MISSINFO_CHECK_PROMPT_TEMPLATE,
    partial_variables={
        "format_instructions": MISSINFO_CHECK_PARSER.get_format_instructions()
    }
)

MISSINFO_CHECK_CHAIN = MISSINFO_CHECK_PROMPT | LLM | MISSINFO_CHECK_PARSER

In [80]:
missinfo_check = MISSINFO_CHECK_CHAIN.invoke({"background": background})
missinfo_check

{'response': 'YES',
 'letter': 'Dear John Smith,\n\nWe have reviewed your intake letter and found that there is missing critical information needed for further evaluation of your case. Specifically, we require the following details:\n1. Specifics of your alibi for the time of the crime on January 15, 2023.\n2. Any evidence or witnesses that can corroborate your alibi.\n3. Any connections or interactions you may have had with the store clerk or anyone involved in the case.\n\nPlease provide the above information at your earliest convenience to assist us in assessing your situation.\n\nSincerely,\n[Your Name]'}

# Criterial Check

In [14]:
class CriteriaCheckOutput(BaseModel):
    evaluation: str = Field(
        ...,
        description="The step by step evaluation of the applicant's case against the Innocence Project's criteria, including detailed reasoning for each point of consideration.",
    )
    conclusion: str = Field(
        ...,
        description="The conclusion of the evaluation, stating whether the case matches the excluded criteria or is suitable for further review.",
    )
    next_steps: str = Field(
        ...,
        description="The next steps to be taken based on the evaluation. This could be drafting a rejection letter if the case meets the excluded criteria or indicating the case is forwarded to a different team for cases that do not match the excluded criteria.",
    )

    class Config:
        schema_extra = {
            "example": {
                "evaluation": "The applicant's case was reviewed step by step against the project's criteria. No evidence of self-defense, illegal substance charges, or fraud was found because [detailed reasoning].",
                "conclusion": "The case does not match any of the excluded criteria and is suitable for further review.",
                "next_steps": "The case is forwarded to the review team for detailed evaluation.",
            }
        }


CRITERIA_CHECK_PARSER = JsonOutputParser(pydantic_object=CriteriaCheckOutput)

# Assuming JsonOutputParser is a predefined class that formats and handles the output based on the CriteriaCheckOutput structure.
print(CRITERIA_CHECK_PARSER.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"properties": {"evaluation": {"title": "Evaluation", "description": "The step by step evaluation of the applicant's case against the Innocence Project's criteria, including detailed reasoning for each point of consideration.", "type": "string"}, "conclusion": {"title": "Conclusion", "description": "The conclusion of the evaluation, stating whether the case matches the excluded criteria or is suitable for further review.", "type": "string"}, "next_steps": {"title": "Next Steps", "description": "The next steps to be taken based on the evaluatio

In [10]:
CRITERIA_CHECK_PROMPT = PromptTemplate(
    input_variables=["background"],
    template=CRITERIA_CHECK_PROMPT_TEMPLATE,
    partial_variables={"format_instructions": CRITERIA_CHECK_PARSER.get_format_instructions()}
)

CRITERIA_CHECK_CHAIN = CRITERIA_CHECK_PROMPT | LLM | CRITERIA_CHECK_PARSER

In [17]:
critiria_check_response = CRITERIA_CHECK_CHAIN.invoke({"background": background})
critiria_check_response

{'evaluation': "The applicant's case was reviewed step by step against the Innocence Project's criteria. The case does not fall into the excluded categories of consent/transaction cases, self-defense/justification, sustained abuse, illegal substance charges, RICO/Hobbs Act charges, DWI/DUI, fraud/identity theft/forgery, stalking/harassment, or sentencing reduction/overcharge issues.",
 'conclusion': 'The case does not match any of the excluded criteria and is suitable for further review.',
 'next_steps': 'The case is forwarded to the review team for detailed evaluation.'}

# Pipeline

In [11]:
from termcolor import colored


def eval_pipeline(file_path_or_images: Union[str, List[str]]) -> None:
    """
    Evaluates an applicant's intake letter and provides a response based on the extracted information.
    
    Args:
        file_path_or_images (Union[str, List[str]]): The file path of the PDF or a list of image file paths containing the applicant's intake letters.
        
    Returns:
        None
    """
    print(colored("Analyzing applicant's intake letter from the input files. Please wait...", 'blue'))

    # Assuming an affirmative response, proceed with analysis
    background = analyze_applicant_intake_letters(file_path_or_images)

    # Before printing out the background information
    print(
        colored(
            "What background information have we extracted from the applicant's intake letter?",
            "blue",
        )
    )
    print(colored(background, "yellow"))

    # Checking for missing information
    print(
        colored(
            "\nIs there any missing information in the application that we need to address?",
            "blue",
        )
    )
    missinfo_check = MISSINFO_CHECK_CHAIN.invoke({"background": background})

    if missinfo_check["response"] == "yes":
        print(
            colored(
                "What does the drafted letter requesting the missing information say?",
                "blue",
            )
        )
        print(colored(missinfo_check["next_steps"], "yellow"))
    else:
        print(
            colored(
                "With no missing information, how does the applicant's case stand against our criteria?",
                "blue",
            )
        )
        critiria_check_response = CRITERIA_CHECK_CHAIN.invoke(
            {"background": background}
        )
        print(colored("Evaluation:", "green"), critiria_check_response["evaluation"])
        print(colored("Conclusion:", "green"), critiria_check_response["conclusion"])
        print(colored("Next Steps:", "green"), critiria_check_response["next_steps"])

In [15]:
# the images files work better than the pdf file somehow (don't know why)
file_path = "path/to/your/file.pdf"
eval_pipeline(file_path)

[34mAnalyzing applicant's intake letter from the input files. Please wait...[0m
[34mWhat background information have we extracted from the applicant's intake letter?[0m
[33mThis letter, dated March 19, 1995, is from an individual serving a life sentence for attempted murder and rape in Baton Rouge. The applicant asserts that they are innocent of the crimes they were convicted of. They express a sense of abandonment and disillusionment with the support system as years pass with no one caring about their situation. They have seemingly exhausted their resources, having reached out for help in numerous places without a response and having their concerns dismissed due to the high-profile nature of the alleged victim's husband, who is described as a significant Baton Rouge attorney.

The applicant is eager to prove their innocence through a DNA test, which they believe would confirm they did not commit the crime. As they have been incarcerated for 12 years at the time of the letter, the