# Claude - Visual PDF
https://docs.anthropic.com/en/docs/build-with-claude/pdf-support#pdf-support-with-prompt-caching\
https://github.com/anthropics/anthropic-cookbook/blob/main/multimodal/reading_charts_graphs_powerpoints.ipynb

In [None]:
import anthropic # pip install anthropic
import base64
#import httpx
import keyring # pip install keyring

In [None]:
# pdf_url is Anthropic's example: https://docs.anthropic.com/en/docs/build-with-claude/pdf-support
# First fetch the file
#pdf_url = "https://assets.anthropic.com/m/1cd9d098ac3e6467/original/Claude-3-Model-Card-October-Addendum.pdf"
#pdf_data = base64.standard_b64encode(httpx.get(pdf_url).content).decode("utf-8")

# Source: https://www.researchgate.net/publication/285000696_Organizational_socialization_The_effective_onboarding_of_new_employees
pdf_filepath = "BauerErdogan_OrgSocialization_APA_IO_III.pdf" # Tokens: 25'047 (in); 382 (out); 0.08 USD

# https://github.com/anthropics/anthropic-cookbook/blob/main/multimodal/reading_charts_graphs_powerpoints.ipynb
# Start by reading in the PDF and encoding it as base64.
with open(pdf_filepath, "rb") as pdf_file:
    binary_data = pdf_file.read()
    base_64_encoded_data = base64.b64encode(binary_data)
    pdf_data = base_64_encoded_data.decode('utf-8')

client = anthropic.Anthropic(default_headers={
    "anthropic-beta": "pdfs-2024-09-25" # While PDF support is in beta, you must pass in the correct beta header
    },
    # Use keyring or define environmental variable: ANTHROPIC_API_KEY (https://docs.anthropic.com/en/docs/initial-setup#set-your-api-key)
    api_key=keyring.get_password('Claude_API_key', 'Medium_API _test'),
    )

In [None]:
# Finally send the API request
message = client.beta.messages.create(
    model="claude-3-5-sonnet-20241022",
    betas=["pdfs-2024-09-25"],
    max_tokens=1024,
    messages=[
        {
            "role": "user",
            "content": [
                {
                    "type": "document",
                    "source": {
                        "type": "base64",
                        "media_type": "application/pdf",
                        "data": pdf_data
                    }
                },
                {
                    "type": "text",
                    "text": "Extract text from this document."
                }
            ]
        }
    ],
)

In [14]:
with open('Claude_pdf_API_test.txt', mode='w', encoding='utf-8') as f: # create a file object
    f.writelines(message.content[0].text)

In [16]:
print(message.content[0].text)

Here's the extracted text summary from the document, which appears to be a comprehensive chapter on organizational socialization and onboarding:

Title: "Organizational socialization: The effective onboarding of new employees" by Talya N. Bauer and Berrin Erdogan

Key Points:

1. Definition: Organizational socialization or onboarding is the process through which new employees transition from being organizational outsiders to insiders.

2. Importance:
- About 25% of U.S. workers undergo organizational socialization
- Workers change jobs approximately 10.2 times over 20 years
- Effective socialization leads to positive work attitudes and longer retention
- Ineffective socialization can lead to premature departure and resource waste

3. Key Components:
- New employee characteristics (proactive personality, extraversion, openness)
- New employee behaviors (information seeking, feedback seeking, relationship building)
- Organizational efforts (socialization tactics, formal orientations, rec