In [1]:
# All important imports
import os
import json

from dotenv import load_dotenv
from pypdf import PdfReader
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from pydantic.v1 import BaseModel, Field

In [2]:
# Load the environment
load_dotenv()
os.environ["OPEN_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [4]:
# Read the PDF
reader = PdfReader('sf_data_with_attachments/pitchdeck_0624_pay33.pdf')
content = ""

for page in reader.pages:
    content += page.extract_text()

Ignoring wrong pointing object 9 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 53 0 (offset 0)
Ignoring wrong pointing object 55 0 (offset 0)
Ignoring wrong pointing object 58 0 (offset 0)
Ignoring wrong pointing object 60 0 (offset 0)
Ignoring wrong pointing object 63 0 (offset 0)
Ignoring wrong pointing object 65 0 (offset 0)
Ignoring wrong pointing object 86 0 (offset 0)
Ignoring wrong pointing object 88 0 (offset 0)
Ignoring wrong pointing object 90 0 (offset 0)
Ignoring wrong pointing object 112 0 (offset 0)
Ignoring wrong pointing object 114 0 (offset 0)
Ignoring wrong pointing object 116 0 (offset 0)
Ignoring wrong pointing object 127 0 (offset 0)
Ignoring wrong pointing object 131 0 (offset 0)
Ignoring wrong pointing object 135 0 (offset 0)
Ignoring wrong pointing object 160 0 (offset 0)
Ignoring wrong pointing object 162 0 (offset 0)
Ignoring wrong pointing object 164 0 (offset 0)
Ignoring wrong pointing object 166 0 (offset 0)
Igno

In [5]:
# Define structure of the extracted claims
class Claim(BaseModel):
    number: int = Field(description="The number of the claim")
    description: str = Field(description="Description of the name")
    verifiable: bool = Field(description="Indicator whether the claim can be verified")
    
class ClaimList(BaseModel):
    claims: list[Claim] = Field(description="Claims extracted from the file")

In [6]:
# Extract claims

model = ChatOpenAI(model="gpt-4o", temperature=0)

structured_llm = model.with_structured_output(ClaimList)

prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the claims from the provided text"),
    ("human", "{user_request}")
])

chain = prompt | structured_llm

response = chain.invoke({
    "user_request": content
})

print("--- Response Object ---")
print(response)

print("\n--- Type of Response ---")
print(type(response))



--- Response Object ---
claims=[Claim(number=1, description='90% of public charging points for Germany’s 2030 target remain to be built.', verifiable=True), Claim(number=2, description='40 million electric vehicles are expected by 2030 based on emissions regulation and incentives, which is 30 million more than in 2024.', verifiable=True), Claim(number=3, description='80% of companies with more than 20 parking spaces must install electric chargers.', verifiable=True), Claim(number=4, description='45% of companies see complexity in billing/payments as the main challenge for e-mobility.', verifiable=True), Claim(number=5, description='Europe’s first B2B2C white-label solution unifying mobility, payment, and loyalty is offered by pay33.', verifiable=False), Claim(number=6, description='Pay33 provides access to more than 650,000 EU charging points.', verifiable=True), Claim(number=7, description='Pay33 offers a white-label banking solution that opens local IBAN accounts for users in 19 EU c

In [7]:
# Define structure of the claim verification result
class VerificationResult(BaseModel):
    claim: Claim = Field(description="Claim")
    valid: bool = Field(description="Indicator whether the claim is valid")
    reasoning: str = Field(description="Explanation why the calim is true/false")
    sources: list[str] = Field(description="List of the URL sources used to support the reasoning")
    
class VerificationList(BaseModel):
    verification_results: list[VerificationResult] = Field(description="List of the verification results")

In [8]:
# Filter the claims
response.claims = [claim for claim in response.claims if claim.verifiable]

In [9]:
# Convert the claims into JSON
claims_json = response.json()

In [10]:
# Verify Claims 

structured_verification_llm = model.with_structured_output(VerificationList)

verification_prompt = ChatPromptTemplate.from_messages([
    ("system", "Verify each of the provided quotes. Provide your reasoning and the sources for the reasoning"),
    ("human", "{claims}")
])

verification_chain = verification_prompt | structured_verification_llm

verification_response = verification_chain.invoke({
    "claims": claims_json
})

print(verification_response)

verification_results=[VerificationResult(claim=Claim(number=1, description='90% of public charging points for Germany’s 2030 target remain to be built.', verifiable=True), valid=True, reasoning='Germany has set ambitious targets for electric vehicle infrastructure by 2030, which includes a significant increase in the number of public charging points. As of recent reports, a large portion of these charging points still need to be constructed to meet the 2030 goals.', sources=['https://www.reuters.com/business/autos-transportation/germany-needs-14-times-more-charging-points-meet-2030-target-2022-11-15/']), VerificationResult(claim=Claim(number=2, description='40 million electric vehicles are expected by 2030 based on emissions regulation and incentives, which is 30 million more than in 2024.', verifiable=True), valid=True, reasoning='Projections for electric vehicle growth in Europe, including Germany, suggest a significant increase due to regulatory and incentive measures. The expectati

In [11]:
# Write the results into a JSON file
verification_response_json = verification_response.json()

verification_response_dictionary = json.loads(verification_response_json)

with open("verification_response.json", "w") as f:
    json.dump(verification_response_dictionary, f, indent=4)