##### Copyright 2025 Google LLC.



```
# @title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
```



# Cost Estimation and Health Monitoring with Gemini

<a target="_blank" href="https://colab.research.google.com/github/google-gemini/cookbook/blob/main/examples/Cost_Estimation_and_Health_Monitoring.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" height=30/></a>

## Overview

Cost observability is key for scaling Gemini API applications. Without tracking token usage and costs, you can't optimize your spending or identify expensive operations.

This notebook demonstrates how to build an observability layer for Gemini API applications. You will learn how to:

1. Extract token usage metadata from API responses.
2. Calculate real-time USD costs based on model pricing.
3. Perform health checks to verify API availability and quota.

By the end of this notebook, you'll have a reusable class that you can integrate into any Gemini API application.

In [None]:
%pip install -U -q "google-genai"

### Grab an API Key

Before you can use the Gemini API, you must obtain an API key.

**In Colab:** Add the key to the secrets manager under the "üîë" in the left panel. Give it the name `GOOGLE_API_KEY`.

In [None]:
from google import genai
from google.genai import types
from typing import Dict, Optional
from google.colab import userdata

# Get API key from Colab secrets
api_key = userdata.get('GOOGLE_API_KEY')
client = genai.Client(api_key=api_key)
print("‚úÖ Gemini Client configured successfully!")

In [None]:
class GeminiObservability:
    """A class to monitor costs and health of Gemini API usage."""

    def __init__(self, client):
        self.client = client
        # 2025 Standard Pricing (USD per 1 Million tokens)
        self.prices = {
            "gemini-2.0-flash": {"input": 0.10, "output": 0.40},
            "gemini-2.5-flash": {"input": 0.075, "output": 0.30},
            "gemini-2.5-pro":   {"input": 3.50,  "output": 10.50},
            "gemini-3.0-flash-preview": {"input": 0.15, "output": 0.60},
        }

    def check_health(self, model_name: str = "gemini-2.0-flash") -> bool:
        """Verifies API connectivity with a small ping."""
        try:
            self.client.models.generate_content(
                model=model_name,
                contents="ping",
                config=types.GenerateContentConfig(max_output_tokens=1)
            )
            return True
        except Exception as e:
            print(f"Health Check Failed for {model_name}: {e}")
            return False

    def estimate_cost(self, usage_metadata, model_name: str) -> float:
        """Calculates cost based on 2025 token rates."""
        if model_name not in self.prices:
            print(f"Warning: No pricing data for {model_name}.")
            return 0.0

        rates = self.prices[model_name]
        in_cost = (usage_metadata.input_token_count / 1_000_000) * rates["input"]
        out_cost = (usage_metadata.output_token_count / 1_000_000) * rates["output"]
        return in_cost + out_cost

    def get_usage_summary(self, usage_metadata, model_name: str) -> Dict:
        """Returns a scannable dictionary of usage and cost."""
        cost = self.estimate_cost(usage_metadata, model_name)
        return {
            "model": model_name,
            "input_tokens": usage_metadata.input_token_count,
            "output_tokens": usage_metadata.output_token_count,
            "total_tokens": usage_metadata.input_token_count + usage_metadata.output_token_count,
            "estimated_cost_usd": round(cost, 6)
        }

In [None]:
# Initialize the class
observability = GeminiObservability(client)

# Select a 2025 model (avoiding older retired IDs)
active_model = "gemini-2.0-flash"

if observability.check_health(active_model):
    print(f"API Health: ‚úÖ Healthy ({active_model})")

    # Execute a standard content generation
    response = client.models.generate_content(
        model=active_model,
        contents="Briefly explain why token tracking is important for software scaling."
    )

    # Generate and print the summary
    report = observability.get_usage_summary(response.usage_metadata, active_model)
    print("\n--- Observability Report ---")
    for key, val in report.items():
        print(f"{key.replace('_', ' ').title()}: {val}")
else:
    print("‚ùå API Unhealthy. Please check your API key and model permissions.")

In [None]:
# 1. Initialize report to avoid NameError
report = None
active_model = "gemini-2.0-flash"

# 2. Run the health check and content generation
status = observability.check_health(active_model)

if status == "HEALTHY":
    response = client.models.generate_content(
        model=active_model,
        contents="Give me a 1-sentence tip for saving cloud costs."
    )

    # Define the report variable here
    report = observability.get_usage_summary(response.usage_metadata, active_model)

    print("\n--- Usage Summary ---")
    print(report)

    # 3. Log to file only if report was successfully created
    if report:
        log_usage_to_file(report)

elif status == "RATE_LIMITED":
    print("‚ùå API is currently rate limited. Wait 60 seconds and try again.")
else:
    print("‚ùå API is unhealthy. Check your credentials.")

## Next Steps

1. **Production Integration**: Use the `GeminiObservability` class as a wrapper around your generation calls.
2. **Alerting**: Store the output of `estimate_cost` in a database to set budget alerts.
3. **Pricing Updates**: Update the `prices` dictionary via the constructor to match the latest [official pricing](https://ai.google.dev/pricing).