In [2]:
from streetlevel import streetview
from config import Config
from src.inference.segment import detect_trees
from src.inference.depth import estimate_depth
from src.utils.unwrap import divide_panorama
from src.utils.masks import add_masks, remove_duplicates, make_image
from src.utils.transformation import get_point
from src.utils.geodesic import get_coordinates, localize_pixel_with_depth, get_depth_at_pixel
from cli import build_config
from models.DepthAnything.depth_anything_v2.dpt import DepthAnythingV2
from models.CalibrateDepth.model import DepthCalibrator
from config import Config
import torch
from ultralytics import YOLO
import pandas as pd
import folium
import os
import cv2
import numpy as np
import nest_asyncio
from IPython.display import display
import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor
IO_EXECUTOR = ThreadPoolExecutor(max_workers=4)


xFormers not available
xFormers not available


In [2]:
# Load the CSV file
streetviews_df = pd.read_csv('streetviews/chandigarh_28_29.csv')
groundtruth_df = pd.read_csv("eval/28_29_groundtruth.csv")

# Get the center of the map (mean of lat/lng)
center_lat = streetviews_df['lat'].mean()
center_lng = streetviews_df['lng'].mean()

# Create a folium map
m = folium.Map(location=[center_lat, center_lng], zoom_start=13, max_zoom=25)

# Add markers for each point
for idx, row in streetviews_df.iterrows():
    folium.CircleMarker(
        [row['lat'], row['lng']],
        radius=2,
        color='blue',
        fill=True,
        fill_color='blue',
        popup=str(row['pano_id'])
    ).add_to(m)

# Display the map
m


In [3]:
def load_models(config: Config):
    depth_model = DepthAnythingV2(**{**config.MODEL_CONFIGS["vitl"], "max_depth": 80})
    depth_model.load_state_dict(torch.load(config.DEPTH_MODEL_PATH, map_location="cpu"))
    depth_model.to(config.DEVICE).eval()
    tree_model = YOLO(config.TREE_MODEL_PATH)
    depth_calibrator = DepthCalibrator(config.DEPTH_CALIBRATION_MODEL_PATH)
    
    return depth_model, tree_model, depth_calibrator

class MockArgs:
    def __init__(self):
        self.input_csv = 'streetviews/chandigarh_28_29.csv'
        self.output_csv = 'chandigarh_28_29_processed.csv'
        self.fov = 90
        self.width = 1024
        self.height = 720

args = MockArgs()
print(f"Using mock arguments for notebook environment:")
print(f"- Input CSV: {args.input_csv}")
print(f"- Output CSV: {args.output_csv}")
print(f"- FOV: {args.fov}")
print(f"- Width: {args.width}")
print(f"- Height: {args.height}")

config = build_config(args)

Using mock arguments for notebook environment:
- Input CSV: streetviews/chandigarh_28_29.csv
- Output CSV: chandigarh_28_29_processed.csv
- FOV: 90
- Width: 1024
- Height: 720


In [4]:
depth_model, tree_model, depth_calibrator = load_models(config)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [5]:
def rescale_point(x, y, orig_w=13000, orig_h=6000, new_w=512, new_h=256):
    """
    Rescale a (x, y) point from original image size to new image size.
    x = horizontal coordinate
    y = vertical coordinate
    """
    new_x = x * (new_w / orig_w)
    new_y = y * (new_h / orig_h)
    return int(new_x), int(new_y)


In [12]:
m = folium.Map(location=["30.71131375446979", "76.80056971925278"], zoom_start=15, max_zoom=25)
for index, row in streetviews_df.iterrows():
    folium.CircleMarker(
        location=[row["lat"], row["lng"]],
        popup=f'Pano {row["pano_id"]}',
        radius=1,
        color='purple',
        fill=False
    ).add_to(m)

from tqdm import tqdm
skipped_trees = 0
skipped_panos = 0
trees = []
for index, row in tqdm(streetviews_df.iterrows(), total=len(streetviews_df)):
    nest_asyncio.apply()
    pano_id = row["pano_id"]
    pano = streetview.find_panorama_by_id(pano_id, download_depth=True)
    if pano is None:
        continue
    image = streetview.get_panorama(pano)
    image = np.array(image)
    views = divide_panorama(image, config.HEIGHT, config.WIDTH, config.FOV)

    groundtruth_row = groundtruth_df[groundtruth_df["pano_id"] == pano_id]

    for i, (view, theta) in enumerate(views):
        tree_data = detect_trees(view, tree_model, config.DEVICE)
        if tree_data is None:
            continue
        
        for j, tree in enumerate(tree_data):
            masks = tree.masks
            boxes = tree.boxes
            if masks is not None:
                for k, mask in enumerate(masks):
                    image_path = f"{pano.id}_tree{j}_box{k}.jpg"
                    conf = boxes[k].conf.item()
                    if conf < 0.5:
                        continue
                    orig_point, pers_point = get_point(mask, theta, pano, config.HEIGHT, config.WIDTH, config.FOV)
                    depth_map = pano.depth.data  # 2D (H, W)
                    img_h, img_w = image.shape[:2]
                    dep_h, dep_w = depth_map.shape[:2]

                    if len(groundtruth_row) > 0:
                        gt_x = float(groundtruth_row["image_x"].values[0])
                        gt_y = float(groundtruth_row["image_y"].values[0])
                        gt_new_x = gt_x * (dep_w / img_w)
                        gt_new_y = gt_y * (dep_h / img_h)
                        pano_distance_ = depth_map[int(gt_new_y), int(gt_new_x)]
                    else:
                        pano_distance_ = -1
                    
                    # FIXED: Pass orig_point directly (it's already (x, y))
                    distance_pano = get_depth_at_pixel(pano.depth, orig_point[0], orig_point[1], image.shape[1], image.shape[0], flipped=True, method="bilinear")
                    if distance_pano is None:
                        continue
                    else:
                        lat_pano, lon_pano = localize_pixel_with_depth(pano, orig_point[0], orig_point[1], image.shape[1], image.shape[0], distance_pano)
                    
                    tree = {
                            "image_path": image_path,
                            "pano_id": pano.id,
                            "stview_lat": pano.lat,
                            "stview_lng": pano.lon,
                            "tree_lat": lat_pano,
                            "tree_lng": lon_pano,
                            "image_x": float(orig_point[0]),  # Store x coordinate
                            "image_y": float(orig_point[1]),  # Store y coordinate
                            "theta": theta,
                            "mask": mask,
                            "conf": conf,
                            "distance_pano": distance_pano,
                            "distance_pano_": pano_distance_,
                        }
                    trees.append(tree)
                    if distance_pano >=0 and distance_pano <= 20:
                        folium.CircleMarker(
                            location=[lat_pano, lon_pano],
                            popup=f'Conf: {conf:.2f}',
                            radius=2,
                            color='blue',
                            fill=True,
                            fill_color='blue',
                        ).add_to(m)
                    else:
                        skipped_trees += 1
                    if not pano_distance_ == -1:
                        folium.CircleMarker(
                            location=[groundtruth_row["tree_lat"].values[0], groundtruth_row["tree_lng"].values[0]],
                            popup=f'Ground Truth',
                            radius=2,
                            color='yellow',
                            fill=True,
                            fill_color='yellow',
                        ).add_to(m)
                    else:
                        skipped_panos += 1

print(f"Skipped {skipped_trees} trees and {skipped_panos} panoramas")

100%|██████████| 238/238 [17:33<00:00,  4.42s/it]

Skipped 413 trees and 1717 panoramas





In [13]:
m

In [3]:
# Load the CSV file
streetviews_df = pd.read_csv('tree_data.csv')
groundtruth_df = pd.read_csv("eval/28_29_groundtruth.csv")

# Get the center of the map (mean of lat/lng)
center_lat = streetviews_df['tree_lat'].mean()
center_lng = streetviews_df['tree_lng'].mean()

print("Ground Truth:", len(groundtruth_df))
print("Predictions:", len(streetviews_df))


Ground Truth: 113
Predictions: 807


In [4]:
def drop_duplicate_trees(streetviews_df, threshold_m=2.0,
                         lat_col="tree_lat", lon_col="tree_lng",
                         conf_col="conf"):
    """
    Remove duplicate trees by clustering points within threshold_m meters and
    keeping the row with the highest confidence in each cluster.

    Parameters
    ----------
    streetviews_df : pandas.DataFrame
        Must contain columns lat_col, lon_col, conf_col.
    threshold_m : float
        Distance threshold in meters for duplicates (default 2.0).
    lat_col, lon_col : str
        Column names for latitude / longitude (WGS84 degrees).
    conf_col : str
        Column name for confidence; higher is better.

    Returns
    -------
    pandas.DataFrame
        Pruned DF (same columns), duplicates removed.
    """
    import numpy as np
    import pandas as pd
    import geopandas as gpd
    from shapely.geometry import Point

    # 0) guard & copy
    df = streetviews_df.copy()
    # Drop rows without valid coordinates
    m = np.isfinite(df[lat_col]) & np.isfinite(df[lon_col])
    df = df.loc[m].copy()
    if df.empty:
        return df

    # 1) Make GeoDataFrame in WGS84
    gdf = gpd.GeoDataFrame(
        df,
        geometry=gpd.points_from_xy(df[lon_col], df[lat_col]),
        crs="EPSG:4326",
    )

    # 2) Project to a local metric CRS (Azimuthal Equidistant centered on dataset)
    mean_lat = float(gdf[lat_col].mean())
    mean_lon = float(gdf[lon_col].mean())
    metric_crs = f"+proj=aeqd +lat_0={mean_lat} +lon_0={mean_lon} +ellps=WGS84 +units=m +no_defs"
    gdf_m = gdf.to_crs(metric_crs)

    # 3) Build spatial index and union-find over neighbors within threshold
    sindex = gdf_m.sindex
    geom = gdf_m.geometry.values
    n = len(gdf_m)

    parent = np.arange(n, dtype=int)
    def find(x):
        # path compression
        while parent[x] != x:
            parent[x] = parent[parent[x]]
            x = parent[x]
        return x
    def union(a, b):
        ra, rb = find(a), find(b)
        if ra != rb:
            parent[rb] = ra

    # Use bounding-box query + precise distance check
    for i in range(n):
        gi = geom[i]
        if gi is None:
            continue
        # quick bbox query using a buffer (meters)
        for j in sindex.intersection(gi.buffer(threshold_m).bounds):
            if j <= i:
                continue
            gj = geom[j]
            if gi.distance(gj) <= threshold_m:
                union(i, j)

    # 4) Component id per row
    comp_id = np.fromiter((find(i) for i in range(n)), dtype=int, count=n)
    gdf_m["__component"] = comp_id

    # 5) Keep the highest-confidence in each connected component
    #    (stable tie-breaker: original order / lowest index)
    winners = (
        gdf_m
        .sort_values([ "__component", conf_col ], ascending=[True, False])
        .groupby("__component", sort=False, as_index=False)
        .head(1)
    )

    # 6) Return pruned *pandas* DF with original columns (drop geometry helper)
    pruned = gdf.loc[winners.index].drop(columns=["geometry"])
    # (Optional) sort back to original row order
    pruned = pruned.sort_index()

    return pruned


In [6]:
pruned_df = drop_duplicate_trees(streetviews_df, threshold_m=3.0)
print(len(pruned_df))

379


In [7]:
# Create a folium map
m = folium.Map(location=[center_lat, center_lng], zoom_start=15, max_zoom=25)

for idx, row in pruned_df.iterrows():
    folium.CircleMarker(
        [row['tree_lat_model'], row['tree_lng_model']],
        radius=0.2,
        color='green',
        fill=True,
        fill_color='green',
        popup=str(row['pano_id'])
    ).add_to(m)

for idx, row in groundtruth_df.iterrows():
    folium.CircleMarker(
        [row['tree_lat'], row['tree_lng']],
        radius=0.2,
        color='red',
        fill=True,
        fill_color='red',
        popup=str(row['pano_id'])
    ).add_to(m)

# Display the map
m

In [8]:
pruned_df = pruned_df.drop(columns=["mask"])
pruned_df.to_csv("pruned_df.csv", index=None)