# Google's Gemini 2.0 

## ... ??

In [None]:
!ls -la samples

## hello, world

In [1]:
from google import genai

In [2]:
# Create a client
api_key = "AIzaSyDsYsuF_ObXDTA661hZmuy6RoNXV7ZglvU"
client = genai.Client(api_key=api_key)
 
# Define the model you are going to use
model_id =  "gemini-2.0-flash" # or "gemini-2.0-flash-lite-preview-02-05"  , "gemini-2.0-pro-exp-02-05"

#### Prepare/upload PDFs

In [3]:
invoice_pdf = client.files.upload(
    file="samples/saintmarc-hd_20250313.pdf", 
    config={'display_name': '2025-Mar-borked'}
)

In [7]:
file_size = client.models.count_tokens(
    model=model_id,
    contents=invoice_pdf
)

print(f'File: {invoice_pdf.display_name} contains {file_size.total_tokens} tokens')

File: 2025-Mar-borked contains 259 tokens


In [8]:
from pydantic import BaseModel, Field
 
# Define a Pydantic model
# Use the Field class to add a description and default value to provide more context to the model
class Topic(BaseModel):
    name: str = Field(description="The name of the topic")
 
class Person(BaseModel):
    first_name: str = Field(description="The first name of the person")
    last_name: str = Field(description="The last name of the person")
    age: int = Field(description="The age of the person, if not provided please return 0")
    work_topics: list[Topic] = Field(description="The fields of interest of the person, if not provided please return an empty list")
 
 
# Define the prompt
prompt = "Donald J. Trump is the 47th President of the United States of America. He is also an idiot and a coward.  "
 
# Generate a response using the Person model
response = client.models.generate_content(
    model=model_id, 
    contents=prompt, 
    config={
        'response_mime_type': 'application/json', 
        'response_schema': Person
})
 
# print the response as a json string
print(response.text)
 
# sdk automatically converts the response to the pydantic model
theDon: Person = response.parsed
 
# access an attribute of the json response
print(f"First name is {theDon.first_name}")

{
  "first_name": "Donald",
  "last_name": "Trump",
  "age": 0,
  "work_topics": []
}
First name is Donald


## 4. Extract Structured data from PDFs using Gemini 2.0

Now, let's combine the File API and structured output to extract information from our PDFs. You can create a simple method that accepts a local file path and a pydantic model and return the structured data for us. The method will:

1. Upload the file to the File API
2. Generate a structured response using the Gemini API
3. Convert the response to the pydantic model and return it

In [9]:
def extract_structured_data(file_path: str, model: BaseModel):
    
    # Upload the file to the File API
    file = client.files.upload(
        file=file_path, 
        config={
            'display_name': file_path.split('/')[-1].split('.')[0]
    })
    
    # Generate a structured response using the Gemini API
    prompt = f"Extract the structured data from the following PDF file"
    response = client.models.generate_content(
        model=model_id, 
        contents=[prompt, file], 
        config={
            'response_mime_type': 'application/json', 
            'response_schema': model
    })
    
    # Convert the response to the pydantic model and return it
    return response.parsed