# Measure height of assets with Gemini



## Install Required Libraries

In [None]:
!pip install --upgrade google-cloud-bigquery google-cloud-aiplatform

## Configuration

**Important**: Replace the placeholder values below with your actual GCP Project ID and Region.

In [None]:
PROJECT_ID = ''  # @param {type:"string"}
REGION = ''      # @param {type:"string"}

# BigQuery Configuration
BIGQUERY_DATASET_ID = '' # @param {type:"string"}
BIGQUERY_TABLE_ID = 'latest_observations' # @param {type:"string"}
QUERY_LIMIT = 10 # @param {type:"integer"}
ASSET_TYPE = "ASSET_CLASS_UTILITY_POLE" # @param {type:"string"}
MODEL = "gemini-2.5-pro" # @param {type:"string"}

## Imports and Vertex AI Initialization

In [None]:
import vertexai
from google.cloud import bigquery
from vertexai.generative_models import GenerativeModel, Part

# Initialize Vertex AI SDK
vertexai.init(project=PROJECT_ID, location=REGION)

In [None]:
from google.cloud import bigquery

BIGQUERY_SQL_QUERY = f"""

SELECT
  t1.gcs_uri,
  t1.asset_id,
  t1.observation_id,
  t1.detection_time,
  t1.location
FROM
  `{PROJECT_ID}.{DATASET_ID}.all_observations` AS t1
WHERE
  t1.asset_type = "ASSET_CLASS_UTILITY_POLE"
  AND t1.asset_id IN (
  SELECT
    asset_id
  FROM
    `{PROJECT_ID}.{DATASET_ID}.all_observations`
  WHERE
    asset_type = "ASSET_CLASS_UTILITY_POLE"
  GROUP BY
    asset_id
  HAVING
    COUNT(observation_id) > 1
  ORDER BY
    asset_id  -- Add an ORDER BY for deterministic LIMIT behavior
  LIMIT
    10 );
"""

# Execute BigQuery Query
try:
    bigquery_client = bigquery.Client(project=PROJECT_ID)
    query_job = bigquery_client.query(BIGQUERY_SQL_QUERY)
    query_response_data = [dict(row) for row in query_job]

    print(f"Successfully fetched {len(query_response_data)} observations:")
    for item in query_response_data:
        print(f"Asset ID: {item['asset_id']}, GCS URI: {item['gcs_uri']}")
except Exception as e:
    print(f"An error occurred while querying BigQuery: {e}")

Successfully fetched 39 observations:
Asset ID: t1:000a9efecdc1a18d6cb19d45715c5410:ffff01ee, GCS URI: gs://geoai_published_ba3433e0-d709-4622-9577-66ab23587729__us/ef8ed4fa-a74e-4179-a0d1-8f091caa71bb/v0/o1:6d7f00a5459347e98d4103c124784faf:00250082.jpg
Asset ID: t1:000e68b2cce6da8c9fc30d2471192b7a:ffff01ee, GCS URI: gs://geoai_published_ba3433e0-d709-4622-9577-66ab23587729__us/ef8ed4fa-a74e-4179-a0d1-8f091caa71bb/v0/o1:acbd0f22e6870040050e3e79cb721c45:00250082.jpg
Asset ID: t1:000e68b2cce6da8c9fc30d2471192b7a:ffff01ee, GCS URI: gs://geoai_published_ba3433e0-d709-4622-9577-66ab23587729__us/ef8ed4fa-a74e-4179-a0d1-8f091caa71bb/v0/o1:5d58739d7017ddf37998ed7056b6ddf5:00250082.jpg
Asset ID: t1:000c345da88196b1502312ade92f5ca8:ffff01ee, GCS URI: gs://geoai_published_ba3433e0-d709-4622-9577-66ab23587729__us/ef8ed4fa-a74e-4179-a0d1-8f091caa71bb/v0/o1:24440c557bcbd5cc5e565a3fd59757c0:00250082.jpg
Asset ID: t1:000c345da88196b1502312ade92f5ca8:ffff01ee, GCS URI: gs://geoai_published_ba3433e0-d70

## Step 2: Group Images by Asset

### Subtask: Group Images by Asset
Add a new cell to process the query results and group the image GCS URIs by their corresponding `asset_id`.

**Reasoning**: Now that the data has been successfully queried from BigQuery, I will add a new cell to process the results. This cell will group the GCS URIs of the images by their `asset_id`, preparing the data for the next step where we will process each asset's images together.

In [None]:
from collections import defaultdict

# Group GCS URIs by asset_id
assets = defaultdict(list)
if 'query_response_data' in locals():
    for item in query_response_data:
        asset_id = item.get('asset_id')
        gcs_uri = item.get('gcs_uri')
        if asset_id and gcs_uri:
            assets[asset_id].append(gcs_uri)

    # Print the grouped assets
    print(f"Found {len(assets)} unique assets.")
    for asset_id, uris in assets.items():
        print(f"Asset ID: {asset_id}, Observations: {len(uris)}")
else:
    print("No query response data found to process.")

Found 10 unique assets.
Asset ID: t1:000a9efecdc1a18d6cb19d45715c5410:ffff01ee, Observations: 2
Asset ID: t1:000e68b2cce6da8c9fc30d2471192b7a:ffff01ee, Observations: 2
Asset ID: t1:000c345da88196b1502312ade92f5ca8:ffff01ee, Observations: 8
Asset ID: t1:000bd5b990190de1dd5724294f6cef38:ffff01ee, Observations: 4
Asset ID: t1:00008b06bd849d50b01f9cfa65361b0d:ffff01ee, Observations: 2
Asset ID: t1:00078fcd5eab571c585eaa3b6e13b6d9:ffff01ee, Observations: 9
Asset ID: t1:000a16e094346b97dbbcaae27575671e:ffff01ee, Observations: 2
Asset ID: t1:0006b4ab2f44e8b37029ebd21d7fcda2:ffff01ee, Observations: 5
Asset ID: t1:000d5e714eca02cb526f08a01e985156:ffff01ee, Observations: 3
Asset ID: t1:000abf641ab75e3c10a87d7a6ee057e0:ffff01ee, Observations: 2


## Step 3: Define Height Estimation Function

### Subtask: Define Height Estimation Function


In [None]:
import json
import vertexai
from vertexai.generative_models import GenerativeModel, Part

def estimate_asset_height(gcs_uris: list[str]) -> dict:
    """
    Estimates the height of an asset from a list of images using a Gemini Pro model.
    """
    # Use a powerful model capable of analyzing multiple images and complex instructions.


    # Initialize Vertex AI SDK if it hasn't been already
    try:
        vertexai.get_initialized_project()
    except Exception:
        vertexai.init(project=PROJECT_ID, location=REGION)

    prompt = """
{
  "task_description": "You will be provided with two or more images of the same utility asset. Your task is to estimate the height of the primary asset shown across all images as accurately and concisely as possible. Your reasoning must explicitly follow one of the provided reference analysis flows, leveraging geometrical principles, image understanding, and known reference values. Output must be in a structured JSON format suitable for database entry.",
  "instructions": [
    {
      "step": 1,
      "description": "Thoroughly analyze all provided images of the utility asset. Identify the primary utility asset and observe its placement within the environment in each image, noting any contextual clues."
    },
    {
      "step": 2,
      "description": "Before applying specific flows, perform a general pre-estimation assessment for each image:"
    },
    {
      "sub_step": "A. **Ground Plane Inference**: Identify the ground plane or base level of the asset. Note if it's flat, sloped, or obscured. If a horizon line is visible (especially in multiple images), infer camera tilt and relative elevation. If known parallel lines (e.g., road markings, building edges) converge, identify vanishing points to infer perspective. If the asset's base isn't ground level, determine its elevation from the ground.",
      "mathematical_principles_applicable": ["Perspective Geometry", "Vanishing Point Analysis"]
    },
    {
      "sub_step": "B. **Image Quality and Distortion Assessment**: Note any significant lens distortion (fish-eye, barrel/pincushion), blur, poor lighting, or occlusions that might affect measurement accuracy. These factors will directly influence your confidence score."
    },
    {
      "step": 3,
      "description": "Scan all images for potential reference objects that have a generally known and consistent size. Prioritize references that are clearly visible and in useful proximity to the primary asset. Note their presence and estimated position relative to the ground plane in each image."
    },
    {
      "step": 4,
      "description": "Apply the following reference analysis flows in order. Choose the first flow that matches the available references across all images. Proceed only with the chosen flow."
    },
    {
      "flow": 1,
      "flow_id": "multi_image_correlated_reference",
      "condition": "The *same specific reference object* (e.g., a uniquely identifiable vehicle, a particular person, a distinct piece of street furniture) is clearly identifiable and present in two or more images.",
      "strategy": "Leverage the multiple views of this identical reference object to derive its most precise possible height, then use this refined reference height to estimate the asset's height. This method enhances accuracy by cross-validating the reference's dimensions. The underlying technique is often analogous to **Photogrammetric Triangulation or Multi-view Scale Calibration**, using known objects as 'control points' across views.",
      "sub_steps": [
        "A. **Derive Reference Object Height**: Mentally (or hypothetically, if pixel data were available) analyze the identified correlated reference object across all images where it appears. Use its perceived size relative to any other *stable reference* (e.g., a door height in a background image) or its dimensions from a clearer view to establish a more precise height for *this specific reference object*. Use perspective geometry inferred in Step 2.A to correct for camera angle effects on the reference's apparent height. If a shadow of the reference object is visible next to a shadow of the asset, consider **Shadow Triangulation** (see principles below) as an independent method to validate or refine asset height.",
        "B. **Estimate Asset Height**: Using the newly derived (or confirmed) precise height of the correlated reference object, apply proportionality (distance-adjusted pixel height ratios) in one or more images where both the reference and asset are clear and at a similar depth. Account explicitly for perspective using inferred camera parameters from Step 2.A if objects are at different depths. If multiple images offer good views, use them to cross-validate or average the asset height estimates, incorporating potential uncertainty from each measurement."
      ],
      "mathematical_principles_applicable": [
        {"name": "Proportionality and Scaling", "description": "H_asset = H_ref * (P_asset / P_ref) where H is true height and P is pixel height, adjusted for perceived depth and camera angle/perspective. Robust H_ref from multiple views prior to this step improves accuracy."},
        {"name": "Trigonometry (Base-Height Relationship)", "description": "H = D * tan(angle_of_elevation_to_top), where D (horizontal distance) and angle can be more precisely inferred using the known reference object in multiple views relative to the inferred ground plane and camera perspective."},
        {"name": "Shadow Triangulation", "description": "If sun angle (altitude and azimuth) is known or can be estimated (e.g., from time/date/location), H = Shadow_Length / tan(Sun_Altitude_Angle). If a reference object's height and its shadow are known, and the asset's shadow is known, asset_height = ref_height * (asset_shadow_length / ref_shadow_length). Leverage this method if shadows are clear and conditions allow."}
      ]
    },
    {
      "flow": 2,
      "flow_id": "multiple_distinct_references",
      "condition": "The same specific reference object (as in Flow 1) is NOT identifiable across multiple images, BUT two or more *different* reference objects (or references in only one image) are identifiable across the dataset (e.g., a car in Image 1 and a person in Image 2, or both a car and a person in Image 1).",
      "strategy": "Utilize each available reference object independently to estimate the asset's height. Then, combine these individual estimates for a more robust final height. The underlying technique is often **Weighted Averaging or Consensus Aggregation** of multiple independent measurements, potentially with explicit consideration for measurement uncertainty.",
      "sub_steps": [
        "A. **Individual Asset Height Estimates**: For each identifiable reference object across all images, use its known (or assumed average) height to estimate the asset's height in the specific image(s) where it appears clearly and is in useful proximity. Account explicitly for perspective using inferred camera parameters from Step 2.A if objects are at different depths. Record each individual estimate along with its perceived trustworthiness (e.g., based on reference clarity, proximity, and ground plane visibility). Consider **Shadow Triangulation** if feasible for any reference/asset pair.",
        "B. **Combine Estimates**: Aggregate all individual asset height estimates. Prioritize estimates derived from references that were clearer, closer to the asset, and less affected by perspective distortion (reflecting an implicit or explicit 'weighting'). If estimates vary significantly, assess the sources of discrepancy based on Step 2.B (image quality/distortion) and the specific reference characteristics."
      ],
      "mathematical_principles_applicable": [
        {"name": "Proportionality and Scaling", "description": "H_asset = H_ref * (P_asset / P_ref), adjusted for perspective."},
        {"name": "Shadow Triangulation", "description": "H = Shadow_Length / tan(Sun_Altitude_Angle), if conditions are met."}
      ]
    },
    {
      "flow": 3,
      "flow_id": "single_reference_single_image",
      "condition": "Only one single primary reference object can be clearly identified in only one of the provided images, or references are too unclear/distant in other images to be useful. OR, no suitable reference objects are found, but *contextual asset standards* are strong.",
      "strategy": "Estimate the asset's height based solely on this single available reference, or if no references are truly suitable, use general knowledge of typical asset heights. Acknowledge the inherently lower confidence due to limited cross-validation and fewer data points. The underlying technique is **Direct Proportional Scaling from a Single Reference** or **Contextual Heuristic Estimation**.",
      "sub_steps": [
        "A. **Estimate Asset Height**: Use the known (or assumed average) height of the single identifiable reference object to estimate the asset's height. Account explicitly for perspective using inferred camera parameters from Step 2.A. If **Shadow Triangulation** is feasible, attempt it. If no suitable references are found at all, estimate based on **Contextual Asset Standards** (e.g., 'this looks like a standard distribution pole, which are typically 10-12m').",
        "B. **Confidence Adjustment**: Reflect the lower confidence due to the solitary nature of the reference or reliance on heuristic knowledge."
      ],
      "mathematical_principles_applicable": [
        {"name": "Proportionality and Scaling", "description": "H_asset = H_ref * (P_asset / P_ref), adjusted for perspective."},
        {"name": "Shadow Triangulation", "description": "H = Shadow_Length / tan(Sun_Altitude_Angle), if possible."},
        {"name": "Contextual Heuristics", "description": "Leveraging typical dimensions for known asset types when direct measurement references are insufficient."}
      ]
    },
    {
      "step": 5,
      "description": "Provide your final findings exclusively in the following JSON format. Ensure no extraneous text, notes, or markdown formatting appear outside of the JSON block itself. Your 'reasoning_notes' must be concise and structured."
    }
  ],
  "reference_objects_guidelines": {
    "priority": [
      "Utility Poles (e.g., common wooden utility poles often have standardized heights, typically ranging from 10-15 meters depending on voltage lines and context)",
      "Vehicles (e.g., passenger cars are approximately 1.4-1.8 meters high, pickup trucks are approximately 1.8-2.1 meters high, semi-trailer trucks have varying heights but the trailer body is often around 4.1 meters)",
      "Human Figures (e.g., an average adult height is 1.6-1.8 meters; note if it appears to be a child or a significantly taller/shorter adult)",
      "Doors (e.g., standard residential doors are typically 2.0-2.1 meters high, commercial doors can be larger)",
      "Windows (e.g., a standard window pane height can vary, but common residential windows might be 1.2-1.5 meters tall)",
      "Building Levels (e.g., a single-story residential building is typically 3-4 meters high; a commercial floor can be 3.5-5 meters high)",
      "Manhole Covers (e.g., diameter typically around 0.6-0.75 meters)",
      "Fire Hydrants (e.g., height varies but generally around 0.6-1 meter)",
      "Street Furniture (e.g., benches, trash cans, traffic signs - provide specific types and estimated sizes if possible)",
      "Consistent Ground Level Features (e.g., curb height, sidewalk thickness, road markings)"
    ],
    "notes": "When using a reference object, be as specific as possible about its type and estimated dimensions. If its size is variable, break down the asset's height into proportional segments. When combining images, analyze how the reference object appears relative to the asset in different views. Crucially, if visual cues suggest a measurable geometric relationship (e.g., distinct angles, proportional pixel heights), describe the relevant mathematical principle and the values you would hypothetically input to perform the calculation. **Explicitly comment on how ground plane inference, perspective, and potential image distortions were considered.**"
  },
  "output_format": {
    "json_structure": {
      "estimated_height_meters": "<estimated_height_as_a_float>",
      "confidence_score": "<Low/Medium/High>",
      "modeling_technique_applied": "<Name of the modeling technique used based on the flow_id, e.g., 'Multi-Image Scale Calibration', 'Weighted Averaging', 'Direct Proportional Scaling'>",
      "reasoning_notes": {
        "flow_id_followed": "<ID of the strategy flow chosen from 'instructions.step 4'>",
        "pre_estimation_assessment": {
          "ground_plane_inference": "<Brief note on ground plane (e.g., 'flat, clearly visible in img1/2', 'sloped, inferred from road markings', 'obscured, assumed from asset base'). Mention horizon/vanishing points if used.>",
          "image_quality_issues": "<Concise notes on any significant distortions, blur, or occlusions identified (e.g., 'minor barrel distortion img1', 'asset part obscured img2', 'clear throughout').>"
        },
        "primary_references_details": "[{\"object_type\": \"<type_of_object>\", \"identifier\": \"<specific_id_if_correlated_e.g._'blue_sedan'>\", \"images_present\": [\"<img_id_1>\", \"<img_id_2>\"], \"assumed_or_derived_height_m\": <float_height>, \"derivation_notes\": \"<brief_justification_for_height_e.g._'standard_avg'/'derived_from_imgA_compared_to_door'/'cross-validated_across_img1/2'>\"}]",
        "secondary_references_details": "[{\"object_type\": \"<type_of_object>\", \"images_present\": [\"<img_id_1>\"], \"assumed_or_derived_height_m\": <float_height>, \"derivation_notes\": \"<brief_justification_for_height>\"}]",
        "calculation_summary": "<Concise summary of how the final height was derived, e.g., 'Proportional scaling from correlated blue sedan (1.65m) in img1/2, adjusted for inferred perspective' or 'Averaged estimates from Car (1.5m) in img1 and Person (1.7m) in img2, favoring car due to proximity.'>",
        "mathematical_principles_applied": ["<Principle1>", "<Principle2>"],
        "confidence_factors": "[{'factor': 'Reference Clarity', 'impact': 'positive', 'details': 'All references very sharp and close'}, {'factor': 'Perspective Distortion', 'impact': 'minor_negative', 'details': 'Slight camera tilt in img2, corrected'}, {'factor': 'Shadow Availability', 'impact': 'positive', 'details': 'Sun angle allowed triangulation in img1'}]",
        "confidence_justification": "<Brief overall statement, e.g., 'High due to multiple clear, consistent references across views with perspective correction and shadow validation.'>"
      }
    },
    "fallback_if_no_estimation": {
      "estimated_height_meters": null,
      "confidence_score": "Low",
      "modeling_technique_applied": "No Estimation",
      "reasoning_notes": {
        "flow_id_followed": "no_estimation",
        "pre_estimation_assessment": {
          "ground_plane_inference": "Unable to clearly identify.",
          "image_quality_issues": "Severe blur and occlusion."
        },
        "primary_references_details": [],
        "secondary_references_details": [],
        "calculation_summary": "Unable to make a reasonable estimation due to insufficient clear reference objects, inconsistent data across images, poor image quality, or inability to identify applicable mathematical relationships or perform multi-image correlation.",
        "mathematical_principles_applied": [],
        "confidence_factors": [{"factor": "Reference Availability", "impact": "negative", "details": "No reliable references present."}],
        "confidence_justification": "Lack of reliable data and poor image quality."
      }
    }
  }
}
    """

    try:
        model = GenerativeModel(MODEL)

        image_parts = [Part.from_uri(uri=uri, mime_type="image/jpeg") for uri in gcs_uris]

        content = [prompt] + image_parts

        responses = model.generate_content(content)

        response_text = responses.text.strip().replace("```json", "").replace("```", "")
        result = json.loads(response_text)
        return result

    except Exception as e:
        print(f"Error estimating height for URIs {gcs_uris}: {e}")
        return {
            "estimated_height_meters": None,
            "confidence_score": "Error",
            "reasoning_notes": str(e)
        }

print("Height estimation function `estimate_asset_height` has been defined.")

Height estimation function `estimate_asset_height` has been defined.


## Step 4: Process Assets and Generate DataFrame

### Subtask: Process Assets and Generate DataFrame
Iterate through the grouped assets, call the new height estimation function for each, and compile the results (asset ID, number of observations, measured height, and confidence score) into a pandas DataFrame.

In [None]:
import pandas as pd
import warnings
from IPython.display import display

# Suppress the specific deprecation warning from the Vertex AI SDK
warnings.filterwarnings("ignore", category=UserWarning, message="This feature is deprecated as of June 24, 2025")

# List to store the results
results_data = []

print("--- Processing Assets and Estimating Height ---\n")

if 'assets' in locals() and assets:
    # Iterate through each asset and its associated image URIs
    for asset_id, uris in assets.items():
        # Call the height estimation function
        estimation_result = estimate_asset_height(uris)

        # Extract results for formatted printing
        height = estimation_result.get("estimated_height_meters", "N/A")
        confidence = estimation_result.get("confidence_score", "N/A")

        # Pretty print the immediate result as requested
        print(f"Asset ID: {asset_id}\n  - Height: {height} meters\n  - Confidence: {confidence}\n")

        # Append the full results to our list for the DataFrame
        results_data.append({
            "asset_id": asset_id,
            "num_observations": len(uris),
            "estimated_height_meters": height,
            "confidence_score": confidence,
            "reasoning_notes": estimation_result.get("reasoning_notes", "N/A")
        })

    # Create a pandas DataFrame from the results
    results_df = pd.DataFrame(results_data)

    # Display the final DataFrame in a clean, 'pretty' format
    print("\n--- Final Results Summary ---")
    display(results_df)

else:
    print("No assets found to process.")

--- Processing Assets and Estimating Height ---

Asset ID: t1:000a9efecdc1a18d6cb19d45715c5410:ffff01ee
  - Height: 9.5 meters
  - Confidence: Medium

Asset ID: t1:000e68b2cce6da8c9fc30d2471192b7a:ffff01ee
  - Height: 10.7 meters
  - Confidence: Medium

Asset ID: t1:000c345da88196b1502312ade92f5ca8:ffff01ee
  - Height: 10.0 meters
  - Confidence: High

Asset ID: t1:000bd5b990190de1dd5724294f6cef38:ffff01ee
  - Height: 10.0 meters
  - Confidence: High

Asset ID: t1:00008b06bd849d50b01f9cfa65361b0d:ffff01ee
  - Height: 8.0 meters
  - Confidence: High

Asset ID: t1:00078fcd5eab571c585eaa3b6e13b6d9:ffff01ee
  - Height: 12.3 meters
  - Confidence: High

Asset ID: t1:000a16e094346b97dbbcaae27575671e:ffff01ee
  - Height: 8.3 meters
  - Confidence: High

Asset ID: t1:0006b4ab2f44e8b37029ebd21d7fcda2:ffff01ee
  - Height: 11.2 meters
  - Confidence: High

Asset ID: t1:000d5e714eca02cb526f08a01e985156:ffff01ee
  - Height: 10.0 meters
  - Confidence: Medium

Asset ID: t1:000abf641ab75e3c10a87d7a6e

Unnamed: 0,asset_id,num_observations,estimated_height_meters,confidence_score,reasoning_notes
0,t1:000a9efecdc1a18d6cb19d45715c5410:ffff01ee,2,9.5,Medium,{'flow_id_followed': 'multiple_distinct_refere...
1,t1:000e68b2cce6da8c9fc30d2471192b7a:ffff01ee,2,10.7,Medium,{'flow_id_followed': 'multiple_distinct_refere...
2,t1:000c345da88196b1502312ade92f5ca8:ffff01ee,8,10.0,High,{'flow_id_followed': 'single_reference_single_...
3,t1:000bd5b990190de1dd5724294f6cef38:ffff01ee,4,10.0,High,{'flow_id_followed': 'multiple_distinct_refere...
4,t1:00008b06bd849d50b01f9cfa65361b0d:ffff01ee,2,8.0,High,{'flow_id_followed': 'multi_image_correlated_r...
5,t1:00078fcd5eab571c585eaa3b6e13b6d9:ffff01ee,9,12.3,High,{'flow_id_followed': 'multi_image_correlated_r...
6,t1:000a16e094346b97dbbcaae27575671e:ffff01ee,2,8.3,High,{'flow_id_followed': 'multiple_distinct_refere...
7,t1:0006b4ab2f44e8b37029ebd21d7fcda2:ffff01ee,5,11.2,High,{'flow_id_followed': 'multi_image_correlated_r...
8,t1:000d5e714eca02cb526f08a01e985156:ffff01ee,3,10.0,Medium,{'flow_id_followed': 'multi_image_correlated_r...
9,t1:000abf641ab75e3c10a87d7a6ee057e0:ffff01ee,2,6.5,Medium,{'flow_id_followed': 'multiple_distinct_refere...
