In [None]:
# All important imports
import os
import json

from dotenv import load_dotenv
from pypdf import PdfReader
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from pydantic.v1 import BaseModel, Field

In [None]:
# Load the environment
load_dotenv()
os.environ["OPEN_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [None]:
# Read the PDF
reader = PdfReader('sf_data_with_attachments/pitchdeck_0624_pay33.pdf')
content = ""

for page in reader.pages:
    content += page.extract_text()

In [None]:
# Define structure of the extracted claims
class Claim(BaseModel):
    number: int = Field(description="The number of the claim")
    description: str = Field(description="Description of the name")
    verifiable: bool = Field(description="Indicator whether the claim can be verified")
    
class ClaimList(BaseModel):
    claims: list[Claim] = Field(description="Claims extracted from the file")

In [None]:
# Extract claims

model = ChatOpenAI(model="gpt-4o", temperature=0)

structured_llm = model.with_structured_output(ClaimList)

prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the claims from the provided text"),
    ("human", "{user_request}")
])

chain = prompt | structured_llm

response = chain.invoke({
    "user_request": content
})

print("--- Response Object ---")
print(response)

print("\n--- Type of Response ---")
print(type(response))

In [None]:
# Define structure of the claim verification result
class VerificationResult(BaseModel):
    claim: Claim = Field(description="Claim")
    valid: bool = Field(description="Indicator whether the claim is valid")
    reasoning: str = Field(description="Explanation why the calim is true/false")
    sources: list[str] = Field(description="List of the URL sources used to support the reasoning")
    
class VerificationList(BaseModel):
    verification_results: list[VerificationResult] = Field(description="List of the verification results")

In [None]:
# Filter the claims
response.claims = [claim for claim in response.claims if claim.verifiable]

In [None]:
# Convert the claims into JSON
claims_json = response.json()

In [None]:
# Verify Claims 

structured_verification_llm = model.with_structured_output(VerificationList)

verification_prompt = ChatPromptTemplate.from_messages([
    ("system", "Verify each of the provided quotes. Provide your reasoning and the sources for the reasoning"),
    ("human", "{claims}")
])

verification_chain = verification_prompt | structured_verification_llm

verification_response = verification_chain.invoke({
    "claims": claims_json
})

print(verification_response)

In [None]:
# Write the results into a JSON file
verification_response_json = verification_response.json()

verification_response_dictionary = json.loads(verification_response_json)

with open("verification_response.json", "w") as f:
    json.dump(verification_response_dictionary, f, indent=4)