# Using GenAI SDK

In [3]:
from google import genai
from google.genai import types


PROJECT_ID = "cloud-ai-police"
LOCATION = "us-central1"

### Baics and CLient Setup

In [1]:
client = genai.Client(
    vertexai=True,
    project=PROJECT_ID,
    location=LOCATION,
    http_options={
        # ðŸ‘‡ this replaces https://{location}-aiplatform.googleapis.com
        "base_url": "http://localhost:8080/gemini",
        # optional but useful
        "timeout": 120_000,
    },
)

resp = client.models.generate_content(
    model="gemini-2.5-pro",
    contents="Hello via local proxy!",
)

print(resp.text)

Hello there! Your connection is coming through loud and clear. It's great to hear from you.

How can I help you today?


### Thinking

In [5]:
response = client.models.generate_content(
    model="gemini-2.5-flash",  # Thinking supported in this model
    contents="Provide a list of 3 famous physicists and their key contributions",
    config=types.GenerateContentConfig(
        thinking_config=types.ThinkingConfig(thinking_budget=1024)
        # Turn off thinking:
        # thinking_config=types.ThinkingConfig(thinking_budget=0)
        # Turn on dynamic thinking:
        # thinking_config=types.ThinkingConfig(thinking_budget=-1)
    ),
)


In [6]:
print(response.text)
print("Thoughts tokens:",response.usage_metadata.thoughts_token_count)
print("Output tokens:",response.usage_metadata.candidates_token_count)

Here are three famous physicists and their key contributions:

1.  **Sir Isaac Newton**
    *   **Key Contributions:**
        *   **Laws of Motion:** Formulated the three fundamental laws governing motion (inertia, F=ma, action-reaction) which laid the foundation for classical mechanics.
        *   **Universal Gravitation:** Developed the law of universal gravitation, explaining the force that attracts any two objects with mass, from an apple falling to the Earth to the orbits of planets.
        *   **Calculus:** Independently developed integral and differential calculus, a crucial mathematical tool for understanding physical change.
        *   **Optics:** Made significant contributions to the study of light, demonstrating that white light is composed of a spectrum of colors.

2.  **Albert Einstein**
    *   **Key Contributions:**
        *   **Theories of Relativity:** Developed the Special Theory of Relativity (1905), which redefined space and time, introducing concepts like time

### Adding system instructions


In [7]:
response = client.models.generate_content(
    model="gemini-2.5-flash",
    config=types.GenerateContentConfig(
        system_instruction="You are a cat. Your name is Neko."),
    contents="Hello there"
)

print(response.text)

Mrow! *blinks slowly, then gives a tiny head-nuzzle against the invisible air* Hello to you too!


### Multi-turn conversations


In [8]:
conversation = [
    "I have two dogs in my house.",
    "How many pets do I have?",
]

chat = client.chats.create(model="gemini-2.5-flash")

for message in conversation:
    response = chat.send_message(message)
    print("User:", message)
    print("Gemini:", response.text)


print("\nChat History:")

for message in chat.get_history():
    print(f'role - {message.role}',end=": ")
    print(message.parts[0].text)

User: I have two dogs in my house.
Gemini: Oh, how wonderful! Having two dogs must bring so much joy (and probably some lively moments!) to your home.

What kind of dogs are they? Do they have names? I'd love to hear more about them if you'd like to share!
User: How many pets do I have?
Gemini: Based on what you told me, you have **two** pets (your two dogs!).

Chat History:
role - user: I have two dogs in my house.
role - model: Oh, how wonderful! Having two dogs must bring so much joy (and probably some lively moments!) to your home.

What kind of dogs are they? Do they have names? I'd love to hear more about them if you'd like to share!
role - user: How many pets do I have?
role - model: Based on what you told me, you have **two** pets (your two dogs!).


### Structured Outputs

In [9]:
from pydantic import BaseModel, Field
from typing import List, Optional

class Ingredient(BaseModel):
    name: str = Field(description="Name of the ingredient.")
    quantity: str = Field(description="Quantity of the ingredient, including units.")

class Recipe(BaseModel):
    recipe_name: str = Field(description="The name of the recipe.")
    prep_time_minutes: Optional[int] = Field(description="Optional time in minutes to prepare the recipe.")
    ingredients: List[Ingredient]
    instructions: List[str]


prompt = """
Please extract the recipe from the following text.
The user wants to make delicious chocolate chip cookies.
They need 2 and 1/4 cups of all-purpose flour, 1 teaspoon of baking soda,
1 teaspoon of salt, 1 cup of unsalted butter (softened), 3/4 cup of granulated sugar,
3/4 cup of packed brown sugar, 1 teaspoon of vanilla extract, and 2 large eggs.
For the best part, they'll need 2 cups of semisweet chocolate chips.
First, preheat the oven to 375Â°F (190Â°C). Then, in a small bowl, whisk together the flour,
baking soda, and salt. In a large bowl, cream together the butter, granulated sugar, and brown sugar
until light and fluffy. Beat in the vanilla and eggs, one at a time. Gradually beat in the dry
ingredients until just combined. Finally, stir in the chocolate chips. Drop by rounded tablespoons
onto ungreased baking sheets and bake for 9 to 11 minutes.
"""

response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents=prompt,
    config={
        "response_mime_type": "application/json",
        "response_json_schema": Recipe.model_json_schema(),
    },
)

recipe = Recipe.model_validate_json(response.text)
print(recipe)

recipe_name='delicious chocolate chip cookies' prep_time_minutes=None ingredients=[Ingredient(name='all-purpose flour', quantity='2 and 1/4 cups'), Ingredient(name='baking soda', quantity='1 teaspoon'), Ingredient(name='salt', quantity='1 teaspoon'), Ingredient(name='unsalted butter', quantity='1 cup'), Ingredient(name='granulated sugar', quantity='3/4 cup'), Ingredient(name='packed brown sugar', quantity='3/4 cup'), Ingredient(name='vanilla extract', quantity='1 teaspoon'), Ingredient(name='large eggs', quantity='2'), Ingredient(name='semisweet chocolate chips', quantity='2 cups')] instructions=['Preheat the oven to 375Â°F (190Â°C).', 'In a small bowl, whisk together the flour, baking soda, and salt.', 'In a large bowl, cream together the butter, granulated sugar, and brown sugar until light and fluffy.', 'Beat in the vanilla and eggs, one at a time.', 'Gradually beat in the dry ingredients until just combined.', 'Stir in the chocolate chips.', 'Drop by rounded tablespoons onto ungrea

### function calling ( the mannual way )

In [10]:
# Define a function that the model can call to control smart lights
set_light_values_declaration = {
    "name": "set_light_values",
    "description": "Sets the brightness and color temperature of a light.",
    "parameters": {
        "type": "object",
        "properties": {
            "brightness": {
                "type": "integer",
                "description": "Light level from 0 to 100. Zero is off and 100 is full brightness",
            },
            "color_temp": {
                "type": "string",
                "enum": ["daylight", "cool", "warm"],
                "description": "Color temperature of the light fixture, which can be `daylight`, `cool` or `warm`.",
            },
        },
        "required": ["brightness", "color_temp"],
    },
}

# This is the actual function that would be called based on the model's suggestion
def set_light_values(brightness: int, color_temp: str) -> dict[str, int | str]:
    """Set the brightness and color temperature of a room light. (mock API).

    Args:
        brightness: Light level from 0 to 100. Zero is off and 100 is full brightness
        color_temp: Color temperature of the light fixture, which can be `daylight`, `cool` or `warm`.

    Returns:
        A dictionary containing the set brightness and color temperature.
    """
    return {"brightness": brightness, "colorTemperature": color_temp}




tools = types.Tool(function_declarations=[set_light_values_declaration])
config = types.GenerateContentConfig(tools=[tools])

# Define user prompt
contents = [
    types.Content(
        role="user", parts=[types.Part(text="Turn the lights down to a romantic level")]
    )
]

# Send request with function declarations
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents=contents,
    config=config,
)


print("================== Step 1: Model Suggests Function Call ==================")
print(response.candidates[0].content.parts[0].function_call)


# Extract tool call details, it may not be in the first part.
tool_call = response.candidates[0].content.parts[0].function_call

if tool_call.name == "set_light_values":
    result = set_light_values(**tool_call.args)
    print("================== Step 2: Function Execution Result ==================")
    print(f"Function execution result: {result}")




# Create a function response part
function_response_part = types.Part.from_function_response(
    name=tool_call.name,
    response={"result": result},
)
print("================== Step 4: Wrap Function Response ==================")
print(function_response_part)

# Append function call and result of the function execution to contents
contents.append(response.candidates[0].content) # Append the content from the model's response.
contents.append(types.Content(role="user", parts=[function_response_part])) # Append the function response

print("================== Step 4a: Updated Contents ==================")
for content in contents:
    for part in content.parts:
        print(f'Role: {content.role}, Part: {part}')


final_response = client.models.generate_content(
    model="gemini-2.5-flash",
    config=config,
    contents=contents,
)

print("================== Step 5: Final Model Response ==================")
print(final_response.text)

id=None args={'brightness': 20, 'color_temp': 'warm'} name='set_light_values' partial_args=None will_continue=None
Function execution result: {'brightness': 20, 'colorTemperature': 'warm'}
media_resolution=None code_execution_result=None executable_code=None file_data=None function_call=None function_response=FunctionResponse(
  name='set_light_values',
  response={
    'result': {
      'brightness': 20,
      'colorTemperature': 'warm'
    }
  }
) inline_data=None text=None thought=None thought_signature=None video_metadata=None
Role: user, Part: media_resolution=None code_execution_result=None executable_code=None file_data=None function_call=None function_response=None inline_data=None text='Turn the lights down to a romantic level' thought=None thought_signature=None video_metadata=None
Role: model, Part: media_resolution=None code_execution_result=None executable_code=None file_data=None function_call=FunctionCall(
  args={
    'brightness': 20,
    'color_temp': 'warm'
  },
  na

### Parallel function calling ( Mannual way)

In addition to single turn function calling, you can also call multiple functions at once. Parallel function calling lets you execute multiple functions at once and is used when the functions are not dependent on each other. This is useful in scenarios like gathering data from multiple independent sources, such as retrieving customer details from different databases or checking inventory levels across various warehouses or performing multiple actions such as converting your apartment into a disco.

In [11]:
power_disco_ball = {
    "name": "power_disco_ball",
    "description": "Powers the spinning disco ball.",
    "parameters": {
        "type": "object",
        "properties": {
            "power": {
                "type": "boolean",
                "description": "Whether to turn the disco ball on or off.",
            }
        },
        "required": ["power"],
    },
}

start_music = {
    "name": "start_music",
    "description": "Play some music matching the specified parameters.",
    "parameters": {
        "type": "object",
        "properties": {
            "energetic": {
                "type": "boolean",
                "description": "Whether the music is energetic or not.",
            },
            "loud": {
                "type": "boolean",
                "description": "Whether the music is loud or not.",
            },
        },
        "required": ["energetic", "loud"],
    },
}

dim_lights = {
    "name": "dim_lights",
    "description": "Dim the lights.",
    "parameters": {
        "type": "object",
        "properties": {
            "brightness": {
                "type": "number",
                "description": "The brightness of the lights, 0.0 is off, 1.0 is full.",
            }
        },
        "required": ["brightness"],
    },
}

In [12]:
house_tools = [
    types.Tool(function_declarations=[power_disco_ball, start_music, dim_lights])
]
config = types.GenerateContentConfig(
    tools=house_tools,
    automatic_function_calling=types.AutomaticFunctionCallingConfig(
        disable=True
    ),
    # Force the model to call 'any' function, instead of chatting.
    tool_config=types.ToolConfig(
        function_calling_config=types.FunctionCallingConfig(mode='ANY')
    ),
)

chat = client.chats.create(model="gemini-2.5-flash", config=config)
response = chat.send_message("Turn this place into a party!")

# Print out each of the function calls requested from this single call
print("Example 1: Forced function calling")
for fn in response.function_calls:
    args = ", ".join(f"{key}={val}" for key, val in fn.args.items())
    print(f"{fn.name}({args})")

Example 1: Forced function calling
start_music(energetic=True, loud=True)
power_disco_ball(power=True)
dim_lights(brightness=0.3)


### Function Call (Automatic)

The Python SDK supports automatic function calling, which automatically converts Python functions to declarations, handles the function call execution and response cycle for you. Following is an example for the disco use case.

In the mannual way we were doing

```
tools = types.Tool(function_declarations=[create_chart_function])
config = types.GenerateContentConfig(tools=[tools])
```

Now with automatic function calling we can do

```
config = types.GenerateContentConfig(
    tools=[power_disco_ball_impl, start_music_impl, dim_lights_impl]
)

In [13]:
power_disco_ball_state = None
start_music_state = None
dim_lights_state = None


# Actual function implementations
def power_disco_ball_impl(power: bool) -> dict:
    """Powers the spinning disco ball.

    Args:
        power: Whether to turn the disco ball on or off.

    Returns:
        A status dictionary indicating the current state.
    """
    print("Function called: power_disco_ball_impl")
    global power_disco_ball_state
    power_disco_ball_state =  {"status": f"Disco ball powered {'on' if power else 'off'}"}
    return {"status": f"Disco ball powered {'on' if power else 'off'}"}

def start_music_impl(energetic: bool, loud: bool) -> dict:
    """Play some music matching the specified parameters.

    Args:
        energetic: Whether the music is energetic or not.
        loud: Whether the music is loud or not.

    Returns:
        A dictionary containing the music settings.
    """
    print("Function called: start_music_impl")
    music_type = "energetic" if energetic else "chill"
    volume = "loud" if loud else "quiet"
    global start_music_state
    start_music_state = {"music_type": music_type, "volume": volume}
    return {"music_type": music_type, "volume": volume}

def dim_lights_impl(brightness: float) -> dict:
    """Dim the lights.

    Args:
        brightness: The brightness of the lights, 0.0 is off, 1.0 is full.

    Returns:
        A dictionary containing the new brightness setting.
    """
    print("Function called: dim_lights_impl")
    global dim_lights_state
    dim_lights_state = {"brightness": brightness}
    return {"brightness": brightness}

# Configure the client
config = types.GenerateContentConfig(
    tools=[power_disco_ball_impl, start_music_impl, dim_lights_impl]
)

In [14]:
# Make the request
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents="Do everything you need to this place into party!",
    config=config,
)

print("\nAutomatic function calling")
print(response.text)

Function called: power_disco_ball_impl
Function called: start_music_impl
Function called: dim_lights_impl

Automatic function calling
Alright, the disco ball is spinning, the music is pumping, and the lights are set for a party!


In [15]:
# Make the request
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents="Do everything you need to this place into party! First start the music loudly and energetically, then turn on the disco ball, and finally dim the lights to 50% brightness.",
    config=config,
)

print("\nAutomatic function calling")
print(response.text)

Function called: start_music_impl
Function called: power_disco_ball_impl
Function called: dim_lights_impl

Automatic function calling
The party is ready! The music is loud and energetic, the disco ball is on, and the lights are at 50% brightness.


In [16]:
# Make the request
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents="Do everything you need to this place into party! First start the music loudly and energetically, then turn on the disco ball, and finally dim the lights to 50% brightness. Then again start the music quietly and chill, turn off the disco ball, and set the lights to full brightness.",
    config=config,
)

print("\nAutomatic function calling")
print(response.text)

Function called: start_music_impl
Function called: power_disco_ball_impl
Function called: dim_lights_impl
Function called: start_music_impl
Function called: power_disco_ball_impl
Function called: dim_lights_impl

Automatic function calling
Alright, I've set up the party: loud, energetic music is playing, the disco ball is on, and the lights are at 50% brightness.

Then, I've returned things to normal: quiet, chill music is playing, the disco ball is off, and the lights are at full brightness.


### Compositional function calling ( Automatic )

Passing the output of one function as the input to another function. This is useful when the functions are dependent on each other and the output of one function is needed as input for another function. For example, you can first retrieve user information and then use that information to generate a personalized recommendation.

In [18]:
import os
from google import genai
from google.genai import types

# Example Functions
def get_weather_forecast(location: str) -> dict:
    """Gets the current weather temperature for a given location."""
    print(f"Tool Call: get_weather_forecast(location={location})")
    # TODO: Make API call
    print("Tool Response: {'temperature': 25, 'unit': 'celsius'}")
    return {"temperature": 25, "unit": "celsius"}  # Dummy response

def set_thermostat_temperature(temperature: int) -> dict:
    """Sets the thermostat to a desired temperature."""
    print(f"Tool Call: set_thermostat_temperature(temperature={temperature})")
    # TODO: Interact with a thermostat API
    print("Tool Response: {'status': 'success'}")
    return {"status": "success"}

# Configure the client and model
config = types.GenerateContentConfig(
    tools=[get_weather_forecast, set_thermostat_temperature]
)

# Make the request
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents="If it's warmer than 20Â°C in London, set the thermostat to 20Â°C, otherwise set it to 18Â°C.",
    config=config,
)

# Print the final, user-facing response
print(response.text)

Tool Call: get_weather_forecast(location=London)
Tool Response: {'temperature': 25, 'unit': 'celsius'}
Tool Call: set_thermostat_temperature(temperature=20)
Tool Response: {'status': 'success'}
The thermostat has been set to 20Â°C.


### Code Execution

The Gemini API provides a code execution tool that enables the model to generate and run Python code. The model can then learn iteratively from the code execution results until it arrives at a final output. You can use code execution to build applications that benefit from code-based reasoning. For example, you can use code execution to solve equations or process text. You can also use the libraries included in the code execution environment to perform more specialized tasks.

Gemini is only able to execute code in Python. You can still ask Gemini to generate code in another language, but the model can't use the code execution tool to run it.

https://ai.google.dev/gemini-api/docs/code-execution

- Supported libraries
The code execution environment includes the following libraries:

attrs
chess
contourpy
fpdf
geopandas
imageio
jinja2
joblib
jsonschema
jsonschema-specifications
lxml
matplotlib
mpmath
numpy
opencv-python
openpyxl
packaging
pandas
pillow
protobuf
pylatex
pyparsing
PyPDF2
python-dateutil
python-docx
python-pptx
reportlab
scikit-learn
scipy
seaborn
six
striprtf
sympy
tabulate
tensorflow
toolz
xlrd

###### You can't install your own libraries.



In [19]:
chat = client.chats.create(
    model="gemini-2.5-pro",
    config=types.GenerateContentConfig(
        tools=[types.Tool(code_execution=types.ToolCodeExecution)]
    ),
)

response = chat.send_message("I have a math question for you.")
print(response.text)

response = chat.send_message(
    "What is the sum of the first 50 prime numbers? "
    "Generate and run code for the calculation, and make sure you get all 50."
)

for part in response.candidates[0].content.parts:
    if part.text is not None:
        print(part.text)
    if part.executable_code is not None:
        print(part.executable_code.code)
    if part.code_execution_result is not None:
        print(part.code_execution_result.output)

Of course! Please share your math question with me. I'll do my best to help you solve it.
Of course. Here is the plan to find the sum of the first 50 prime numbers:

1.  **Define a Prime Number:** A prime number is a natural number greater than 1 that has no positive divisors other than 1 and itself.
2.  **Generate Primes:** I will write a script to find prime numbers sequentially, starting from 2.
3.  **Collect 50 Primes:** The script will continue generating primes until it has found exactly 50 of them.
4.  **Calculate the Sum:** Once the list of the first 50 prime numbers is complete, I will calculate their sum.
5.  **Present Findings:** I will display the list of the 50 prime numbers found and their final sum.

Here is the Python code to perform the calculation.


def is_prime(n):
    """Checks if a number is prime."""
    if n <= 1:
        return False
    if n <= 3:
        return True
    if n % 2 == 0 or n % 3 == 0:
        return False
    i = 5
    while i * i <= n:
        

# Using Langchain

In [8]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-pro",

    # âœ… force Vertex AI backend (no API key required)
    vertexai=True,

    project="cloud-ai-police",
    location="us-central1",

    # âœ… route via your proxy
    client_options="http://localhost:8080/gemini",

    temperature=0.2,
    timeout=120,
)

resp = llm.invoke([HumanMessage(content="Hello via LangChain + proxy + Vertex!")])
print(resp.content)

Hello there! It's great to connect with you.

Your message has successfully traveled from your LangChain application, through your proxy, and has reached me here on Google's Vertex AI.

That's a cool setup! What can I help you with today?


# Using REST

In [20]:
import requests

url = "http://localhost:8080/gemini/v1beta1/projects/cloud-ai-police/locations/us-central1/publishers/google/models/gemini-2.5-pro:generateContent"

body = {
  "contents": [
    {
      "role": "user",
      "parts": [
        { "text": "hello from proxy" }
      ]
    }
  ]
}

res = requests.post(url, json=body)
print(res.json())

{'candidates': [{'content': {'role': 'model', 'parts': [{'text': 'Hello! Message received loud and clear from your proxy.\n\nHow can I help you today?'}]}, 'finishReason': 'STOP', 'avgLogprobs': -12.031432302374588}], 'usageMetadata': {'promptTokenCount': 3, 'candidatesTokenCount': 19, 'totalTokenCount': 1190, 'trafficType': 'ON_DEMAND', 'promptTokensDetails': [{'modality': 'TEXT', 'tokenCount': 3}], 'candidatesTokensDetails': [{'modality': 'TEXT', 'tokenCount': 19}], 'thoughtsTokenCount': 1168}, 'modelVersion': 'gemini-2.5-pro', 'createTime': '2026-01-23T06:01:02.533564Z', 'responseId': 'ng5zabzIIO6XmecPzd6EoQ0'}


# Using REST b64

In [73]:
# Base64 encode the the body
import base64,json

encoded_body = base64.b64encode(json.dumps(body).encode()).decode()

url = "http://localhost:8080/geminib64/v1beta1/projects/cloud-ai-police/locations/us-central1/publishers/google/models/gemini-2.5-pro:generateContent"

res = requests.post(url, headers={"Content-Type": "text/plain"},data=encoded_body)

print(res.json())

{'body_b64': 'ewogICJjYW5kaWRhdGVzIjogWwogICAgewogICAgICAiY29udGVudCI6IHsKICAgICAgICAicm9sZSI6ICJtb2RlbCIsCiAgICAgICAgInBhcnRzIjogWwogICAgICAgICAgewogICAgICAgICAgICAidGV4dCI6ICJIZWxsbyEgTWVzc2FnZSByZWNlaXZlZCBsb3VkIGFuZCBjbGVhci4gSXQgc2VlbXMgeW91ciBwcm94eSBpcyB3b3JraW5nIHBlcmZlY3RseS5cblxuSG93IGNhbiBJIGhlbHAgeW91IHRvZGF5PyIKICAgICAgICAgIH0KICAgICAgICBdCiAgICAgIH0sCiAgICAgICJmaW5pc2hSZWFzb24iOiAiU1RPUCIsCiAgICAgICJhdmdMb2dwcm9icyI6IC04LjY4MzI0NTk3Njc2NTk1MTEKICAgIH0KICBdLAogICJ1c2FnZU1ldGFkYXRhIjogewogICAgInByb21wdFRva2VuQ291bnQiOiAzLAogICAgImNhbmRpZGF0ZXNUb2tlbkNvdW50IjogMjQsCiAgICAidG90YWxUb2tlbkNvdW50IjogMTAxOSwKICAgICJ0cmFmZmljVHlwZSI6ICJPTl9ERU1BTkQiLAogICAgInByb21wdFRva2Vuc0RldGFpbHMiOiBbCiAgICAgIHsKICAgICAgICAibW9kYWxpdHkiOiAiVEVYVCIsCiAgICAgICAgInRva2VuQ291bnQiOiAzCiAgICAgIH0KICAgIF0sCiAgICAiY2FuZGlkYXRlc1Rva2Vuc0RldGFpbHMiOiBbCiAgICAgIHsKICAgICAgICAibW9kYWxpdHkiOiAiVEVYVCIsCiAgICAgICAgInRva2VuQ291bnQiOiAyNAogICAgICB9CiAgICBdLAogICAgInRob3VnaHRzVG9rZW5Db3VudCI6IDk5MgogIH0sCiAgIm

In [74]:
stdout = res.text

In [75]:
from typing import Any, Dict

def decode_b64_body(body_b64: str) -> bytes:
    return base64.b64decode(body_b64.encode("utf-8"), validate=True)

def extract_json_from_stdout(stdout: str) -> Dict[str, Any]:
    """
    Pods may print extra logs. We try:
      - parse whole stdout as JSON
      - else parse last JSON object line
    """
    if not stdout:
        raise RuntimeError("Empty stdout from pod exec")

    # 1) direct parse
    try:
        return json.loads(stdout)
    except Exception:
        pass

    # 2) attempt last line json
    lines = [ln.strip() for ln in stdout.splitlines() if ln.strip()]
    for ln in reversed(lines):
        try:
            return json.loads(ln)
        except Exception:
            continue

    # 3) fail with context
    raise RuntimeError(f"Could not parse JSON from pod stdout. Tail:\n{stdout[-800:]}")

In [76]:
result = extract_json_from_stdout(stdout)

In [77]:
result

{'body_b64': 'ewogICJjYW5kaWRhdGVzIjogWwogICAgewogICAgICAiY29udGVudCI6IHsKICAgICAgICAicm9sZSI6ICJtb2RlbCIsCiAgICAgICAgInBhcnRzIjogWwogICAgICAgICAgewogICAgICAgICAgICAidGV4dCI6ICJIZWxsbyEgTWVzc2FnZSByZWNlaXZlZCBsb3VkIGFuZCBjbGVhci4gSXQgc2VlbXMgeW91ciBwcm94eSBpcyB3b3JraW5nIHBlcmZlY3RseS5cblxuSG93IGNhbiBJIGhlbHAgeW91IHRvZGF5PyIKICAgICAgICAgIH0KICAgICAgICBdCiAgICAgIH0sCiAgICAgICJmaW5pc2hSZWFzb24iOiAiU1RPUCIsCiAgICAgICJhdmdMb2dwcm9icyI6IC04LjY4MzI0NTk3Njc2NTk1MTEKICAgIH0KICBdLAogICJ1c2FnZU1ldGFkYXRhIjogewogICAgInByb21wdFRva2VuQ291bnQiOiAzLAogICAgImNhbmRpZGF0ZXNUb2tlbkNvdW50IjogMjQsCiAgICAidG90YWxUb2tlbkNvdW50IjogMTAxOSwKICAgICJ0cmFmZmljVHlwZSI6ICJPTl9ERU1BTkQiLAogICAgInByb21wdFRva2Vuc0RldGFpbHMiOiBbCiAgICAgIHsKICAgICAgICAibW9kYWxpdHkiOiAiVEVYVCIsCiAgICAgICAgInRva2VuQ291bnQiOiAzCiAgICAgIH0KICAgIF0sCiAgICAiY2FuZGlkYXRlc1Rva2Vuc0RldGFpbHMiOiBbCiAgICAgIHsKICAgICAgICAibW9kYWxpdHkiOiAiVEVYVCIsCiAgICAgICAgInRva2VuQ291bnQiOiAyNAogICAgICB9CiAgICBdLAogICAgInRob3VnaHRzVG9rZW5Db3VudCI6IDk5MgogIH0sCiAgIm

In [78]:
status_code = int(result.get("status_code", 502))
pod_headers = result.get("headers") or {}
body_b64_out = result.get("body_b64")

In [79]:
if body_b64_out:
    body_bytes = decode_b64_body(body_b64_out)
    body_str = body_bytes.decode("utf-8", errors="replace")
    print("Body:", body_str)
    print("Status Code:", status_code)
else:
    print("No body received from pod.")

Body: {
  "candidates": [
    {
      "content": {
        "role": "model",
        "parts": [
          {
            "text": "Hello! Message received loud and clear. It seems your proxy is working perfectly.\n\nHow can I help you today?"
          }
        ]
      },
      "finishReason": "STOP",
      "avgLogprobs": -8.6832459767659511
    }
  ],
  "usageMetadata": {
    "promptTokenCount": 3,
    "candidatesTokenCount": 24,
    "totalTokenCount": 1019,
    "trafficType": "ON_DEMAND",
    "promptTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 3
      }
    ],
    "candidatesTokensDetails": [
      {
        "modality": "TEXT",
        "tokenCount": 24
      }
    ],
    "thoughtsTokenCount": 992
  },
  "modelVersion": "gemini-2.5-pro",
  "createTime": "2026-01-23T06:39:51.102591Z",
  "responseId": "txdzab-hBp7f-O4PgcqGkA0"
}

Status Code: 200
