In [None]:
!pip install PyMuPDF Pillow requests

In [17]:
# Import necessary libraries and define a function to convert PDF pages to images.
import fitz  # PyMuPDF
import io
import base64
import requests
import json
from PIL import Image

def pdf_to_images(pdf_path):
    """
    Convert each page of the PDF into an image.
    
    Args:
        pdf_path (str): Path to the PDF file.
        
    Returns:
        images (list): A list of PIL Image objects representing each page.
    """
    images = []
    document = fitz.open(pdf_path)
    
    for page_number in range(len(document)):
        page = document.load_page(page_number)
        pix = page.get_pixmap()
        image = Image.open(io.BytesIO(pix.tobytes()))
        images.append(image)
    
    return images

In [22]:
# Define a function to encode images to Base64.
def encode_image(image):
    """
    Encode a PIL Image to a Base64 string.
    
    Args:
        image (Image): A PIL Image object.
        
    Returns:
        encoded_image (str): Base64 encoded string of the image.
    """
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    encoded_image = base64.b64encode(buffered.getvalue()).decode('ascii')
    return encoded_image

In [23]:
# Prepare the payload for API request including images and questions.
def prepare_payload(images, questions):
    messages = [
        {
            "role": "system",
            "content": [
                {
                    "type": "text",
                    "text": "You are an AI assistant that analyzes PDF content for specific information."
                }
            ]
        }
    ]
    
    for i, image in enumerate(images):
        encoded_image = encode_image(image)
        messages.append({
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": f"Page {i+1}:"
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{encoded_image}"
                    }
                }
            ]
        })
    
    questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
    messages.append({
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": f"Based on the provided content in the images, answer the following questions in JSON format. Each question should be paired with its answer as a key-value pair. If the information is not available, respond with 'Answer not found in the provided content.'\n{questions_text}"
            }
        ]
    })

    payload = {
        "messages": messages,
        "temperature": 0.7,
        "top_p": 0.95,
        "max_tokens": 3000
    }
    return payload

In [24]:
# Define a set of questions to ask about the PDF content.
questions = [
    "What is the company's current financial status?",
    "What are the key financial ratios?",
    "What is the projected revenue for the next quarter?",
    "What are the company's major expenses?",
    "What is the debt-to-equity ratio?",
    "What are the company's growth prospects?",
    "What is the company's cash flow situation?",
    "What are the potential risks mentioned?",
    "What is the company's competitive advantage?",
    "What is the outlook for the industry?"
]

In [None]:
# Execute the API request and process the response.
API_KEY = "YOUR_API_KEY" #place the gpt4o key here
PDF_PATH = "2023_Annual_Report_Testing.pdf"

images = pdf_to_images(PDF_PATH)
payload = prepare_payload(images, questions)

ENDPOINT = "YourEndpoint" #place the endpoint here
headers = {
    "Content-Type": "application/json",
    "api-key": API_KEY,
}

try:
    response = requests.post(ENDPOINT, headers=headers, json=payload)
    response.raise_for_status()
    
    data = response.json()
    
    # Extract and print answers with questions
    if 'choices' in data:
        content = data['choices'][0]['message']['content']
        #print("Raw Content:", content)  # Debug: Print raw content
        
        # Remove ```json and ``` from the content if present
        if content.startswith("```json"):
            content = content[7:]  # Strip the leading ```json
        if content.endswith("```"):
            content = content[:-3]  # Strip the trailing ```
        
        # Parse the cleaned content as JSON
        results = json.loads(content)
        
        # Output the results as a JSON string
        json_output = json.dumps(results, indent=4)
        print("JSON Output:\n", json_output)

        # Optionally write the JSON output to a file
        with open('output.json', 'w') as f:
            json.dump(results, f, indent=4)

except requests.RequestException as e:
    print(f"Failed to make the request. Error: {e}")
except json.JSONDecodeError as e:
    print(f"Failed to parse JSON content. Error: {e}")