In [None]:
!pip install -q anthropic openai

In [None]:
from pathlib import Path
from pprint import pprint
from pydantic import BaseModel
from typing import List

def get_anthropic_message_lines(message):
    return [x.text for x in message.content if x.type == 'text']


def print_anthropic_message(message):
    print(f"Message id: {message.id}")
    print(f"Input: {message.usage.input_tokens}; Output: {message.usage.output_tokens}")
    print(f"Stop reason: {message.stop_reason}")
    pprint(message.content)

    print()
    thinking_content_elements = [x.thinking for x in message.content if x.type == 'thinking']
    if len(thinking_content_elements) > 0:
        print(f"{'-' * 20} [Thinking] {'-' * 20}")
        for line in thinking_content_elements:
            print(line)

    print()
    print(f"{'-' * 20} [Text] {'-' * 20}")
    for line in get_anthropic_message_lines(message):
        print(line)

def print_openai_response(response):
    print(f"Response id: {response.id}")
    print(f"Input tokens: {response.usage.input_tokens} ({response.usage.input_tokens_details.cached_tokens} cached); Output tokens: {response.usage.output_tokens} ({response.usage.output_tokens_details.reasoning_tokens} reasoning)")
    pprint(response.output)

    print()
    print(f"{'-' * 20} [Text] {'-' * 20}")
    print(response.output_text)

In [None]:
from google.colab import userdata
from anthropic import Anthropic

anthropic_api_key = userdata.get('ANTHROPIC_API_KEY')
anthropic_client = Anthropic(api_key=anthropic_api_key)

In [None]:
from google.colab import userdata
from openai import OpenAI

openai_api_key = userdata.get('OPENAI_API_KEY')
openai_client = OpenAI(api_key=openai_api_key)

1. Upload a file to Anthropic

In [None]:
path = Path("/content/test_doc_01.pdf")
with path.open("rb") as test_file_content:
    file_upload_response = anthropic_client.beta.files.upload(
        file=(path.name, test_file_content, "application/pdf")
    )

In [None]:
pprint(file_upload_response)

2. Summarize the document

In [None]:
summarize_message = anthropic_client.beta.messages.create(
    model="claude-haiku-4-5-20251001",
    messages=[
        {
            "role": "user",
            "content": [
                { "type": "text", "text": "Provide a concise summary of this document in 3-5 sentences." },
                { "type": "document", "source": { "type": "file", "file_id": file_upload_response.id }}
            ]
        }
    ],
    thinking={ "type": "enabled", "budget_tokens": 2048},
    betas=["files-api-2025-04-14"],
    max_tokens=4096
)

In [None]:
print_anthropic_message(summarize_message)

In [None]:
document_summary = "\n".join(get_anthropic_message_lines(summarize_message))

3. Keyword extraction

In [None]:
class KeywordExtractionResult(BaseModel):
    keywords: List[str]

In [None]:
keyword_extraction_response = openai_client.responses.parse(
    model="gpt-5-nano",
    input=document_summary,
    instructions="You are an expert in keyword extraction. Given a document summary, return a list (maximum 10 items) of the most important keywords. Focus on terms, named entities and domain-specific vocabulary.",
    reasoning={ "effort": "medium" },
    text_format=KeywordExtractionResult
)

In [None]:
print_openai_response(keyword_extraction_response)

In [None]:
pprint(keyword_extraction_response.output_parsed)

4. Categorization

In [None]:
class CategorizationResult(BaseModel):
    category: str
    confidence: float
    short_explanation: str

In [None]:
categorization_response = openai_client.responses.parse(
    model="gpt-5-mini",
    input=document_summary,
    instructions="You are an expert in document categorization. Based on the provided document summary, classify the document into exactly one category. Return the category, a configence score (from 0.0 to 1.0), and a short explanatory statement (1-2 sentences).",
    reasoning={ "effort": "high" },
    text_format=CategorizationResult
)

In [None]:
print_openai_response(categorization_response)

In [None]:
pprint(categorization_response.output_parsed)