In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Measure height of Utility poles with Gemini 2.5 Flash

## Description

This notebook demonstrates how to estimate the height of utility poles using Google Cloud Vertex AI's Gemini 2.5 Flash model and imagery data stored in BigQuery and Google Cloud Storage. The workflow includes querying relevant image observations, grouping images by asset, and applying a multi-image AI-powered analysis to estimate asset heights with structured reasoning.

## Prerequisites

- Access to a Google Cloud Platform (GCP) project with billing enabled.
- BigQuery dataset containing utility pole imagery observations.
- Google Cloud Storage bucket with image files referenced in BigQuery.
- Vertex AI API enabled in your GCP project.
- Service account or user credentials with permissions for BigQuery, Vertex AI, and Cloud Storage.
- Python 3.8+ environment with internet access.

## Install Required Libraries

In [None]:
!pip install --upgrade google-cloud-bigquery google-genai

## Configuration

**Important**: Replace the placeholder values below with your actual GCP Project ID and Region.

In [None]:
PROJECT_ID = ''  # @param {type:"string"}
REGION = 'us-central1'      # @param {type:"string"}

## Imports and Vertex AI Initialization

In [None]:
import vertexai
from google.cloud import bigquery
from google import genai
from google.genai.types import Content, Part

# Initialize Vertex AI SDK
vertexai.init(project=PROJECT_ID, location=REGION)
client = genai.Client(vertexai=True, project=PROJECT_ID, location=REGION)

In [None]:
BIGQUERY_SQL_QUERY = """
SELECT
  t1.gcs_uri,
  t1.asset_id,
  t1.observation_id,
  t1.detection_time,
  t1.location
FROM
  `sarthaks-lab`.`imagery_insights___preview___us`.`all_observations` AS t1
WHERE
  t1.asset_type = "ASSET_CLASS_UTILITY_POLE"
  AND t1.asset_id IN (
  SELECT
    asset_id
  FROM
    `sarthaks-lab`.`imagery_insights___preview___us`.`all_observations`
  WHERE
    asset_type = "ASSET_CLASS_UTILITY_POLE"
  GROUP BY
    asset_id
  HAVING
    COUNT(observation_id) > 1
  ORDER BY
    asset_id  -- Add an ORDER BY for deterministic LIMIT behavior
  LIMIT
    10 );
"""

# Execute BigQuery Query
try:
    bigquery_client = bigquery.Client(project=PROJECT_ID)
    query_job = bigquery_client.query(BIGQUERY_SQL_QUERY)
    query_response_data = [dict(row) for row in query_job]

    print(f"Successfully fetched {len(query_response_data)} observations:")
    for item in query_response_data:
        print(f"Asset ID: {item['asset_id']}, GCS URI: {item['gcs_uri']}")
except Exception as e:
    print(f"An error occurred while querying BigQuery: {e}")

## Step 2: Group Images by Asset

### Subtask: Group Images by Asset

Add a new cell to process the query results and group the image GCS URIs by their corresponding `asset_id`.
**Reasoning**: Now that the data has been successfully queried from BigQuery, I will add a new cell to process the results. This cell will group the GCS URIs of the images by their `asset_id`, preparing the data for the next step where we will process each asset's images together.

In [None]:
from collections import defaultdict

# Group GCS URIs by asset_id
assets = defaultdict(list)
if 'query_response_data' in locals():
    for item in query_response_data:
        asset_id = item.get('asset_id')
        gcs_uri = item.get('gcs_uri')
        if asset_id and gcs_uri:
            assets[asset_id].append(gcs_uri)

    # Print the grouped assets
    print(f"Found {len(assets)} unique assets.")
    for asset_id, uris in assets.items():
        print(f"Asset ID: {asset_id}, Observations: {len(uris)}")
else:
    print("No query response data found to process.")

## Step 3: Define Height Estimation Function

### Subtask: Define Height Estimation Function

In [None]:
import json

def estimate_asset_height(gcs_uris: list[str]) -> dict:
    """
    Estimates the height of an asset from a list of images using a Gemini Pro model.
    """
    # Use a powerful model capable of analyzing multiple images and complex instructions.
    MODEL = "gemini-2.5-pro"

    prompt = """...""" # Same long prompt as before

    try:
        image_parts = [Part(file_data={'file_uri': uri, 'mime_type': 'image/jpeg'}) for uri in gcs_uris]
        content = [prompt] + image_parts
        responses = client.models.generate_content(content)
        response_text = responses.text.strip().replace("```json", "").replace("```", "")
        result = json.loads(response_text)
        return result
    except Exception as e:
        print(f"Error estimating height for URIs {gcs_uris}: {e}")
        return {
            "estimated_height_meters": None,
            "confidence_score": "Error",
            "reasoning_notes": str(e)
        }

print("Height estimation function `estimate_asset_height` has been defined.")

## Step 4: Process Assets and Generate DataFrame

### Subtask: Process Assets and Generate DataFrame

Iterate through the grouped assets, call the new height estimation function for each, and compile the results (asset ID, number of observations, measured height, and confidence score) into a pandas DataFrame.

In [None]:
import pandas as pd
import warnings
from IPython.display import display

# Suppress the specific deprecation warning from the Vertex AI SDK
warnings.filterwarnings("ignore", category=UserWarning, message="This feature is deprecated as of June 24, 2025")

# List to store the results
results_data = []

print("--- Processing Assets and Estimating Height ---\n")

if 'assets' in locals() and assets:
    # Iterate through each asset and its associated image URIs
    for asset_id, uris in assets.items():
        # Call the height estimation function
        estimation_result = estimate_asset_height(uris)

        # Extract results for formatted printing
        height = estimation_result.get("estimated_height_meters", "N/A")
        confidence = estimation_result.get("confidence_score", "N/A")

        # Pretty print the immediate result as requested
        print(f"Asset ID: {asset_id}\n  - Height: {height} meters\n  - Confidence: {confidence}\n")

        # Append the full results to our list for the DataFrame
        results_data.append({
            "asset_id": asset_id,
            "num_observations": len(uris),
            "estimated_height_meters": height,
            "confidence_score": confidence,
            "reasoning_notes": estimation_result.get("reasoning_notes", "N/A")
        })

    # Create a pandas DataFrame from the results
    results_df = pd.DataFrame(results_data)

    # Display the final DataFrame in a clean, 'pretty' format
    print("\n--- Final Results Summary ---")
    display(results_df)

else:
    print("No assets found to process.")