In [1]:
import os
import argparse
import time

import pystac_client
import planetary_computer
import stackstac

import geopandas as gpd
import numpy as np

import rasterio
from rasterio.merge import merge
from rasterio import mask
from rasterio import plot
import shapely
import ast

In [18]:

def read_raster(item_collection):
    #extract raster data
    raster_paths = [i.assets["data"].href for i in item_collection]
    rasters = [rasterio.open(raster_path) for raster_path in raster_paths]
    assert [raster.nodata == -9999 for raster in rasters]
    return rasters


def ensure_raster_tank_intersect(rasters, tank_geometry):
    raster_geoms = [shapely.box(raster.bounds.left,raster.bounds.bottom, raster.bounds.right, raster.bounds.top) for raster in rasters]
    raster_intersects = [tank_geometry.intersects(raster_geom) for raster_geom in raster_geoms]
    return np.array(rasters)[np.array(raster_intersects)]


def calculate_height(rasters, tank_geometry):
    # tank_geometry : geomtry of tank in utm
    #calculate the height for each raster
    h = []
    w = []
    for raster in rasters:
        clipped_image, clipped_transform = rasterio.mask.mask(raster, [tank_geometry], crop=True)
        #clipped_image.shape
        arr = np.array(clipped_image[clipped_image != -9999])
        if len(arr) > 0:
            h.append(np.quantile(arr.flatten(), 0.9))
            w.append(arr.size)
    [raster.close() for raster in rasters] #close rasters
    # average height
    if len(h) > 0:
        return np.average(h, weights=w)
    else:
        return None

def height_estimation_by_tank(row, catalog, args):
    #create utm geometry
    tank_geometry = shapely.geometry.box(*row["utm_coords"], ccw=True) #utm
    #search catalog using lat lon geometry
    item_collection = catalog.search(collections=[args.collection], 
                            intersects=row.geometry.buffer(0.001)).item_collection()

    if len(item_collection) > 0:
        rasters = read_raster(item_collection)
        # ensure tank the data intersects
        rasters = ensure_raster_tank_intersect(rasters, tank_geometry)
        # calculate height
        height = calculate_height(rasters, tank_geometry)
        return height
    else:
        return None
    

def height_estimation_handle_errors(row, catalog, attempt, args):
    
    try:
        height = height_estimation_by_tank(row, catalog, args)
        return height
    except Exception as err:
        print(type(err))    # the exception type
        time.sleep(args.backoff_factor * (2 ** attempt))
        
        
def height_estimation_retry(row, catalog, args): 
    print(args)
    for attempt in range(args.max_retries):
        try:
            height = height_estimation_handle_errors(row, catalog, attempt, args)
            return height
            print(attempt)
            break
        except:
            if attempt == args.max_retries - 1:
                print(f"Failed after {args.max_retries} attempts!")
            else: 
                wait_time = 2 ** attempt # Exponential backoff
                print(f"Failed, retrying in {wait_time} seconds...")
                time.sleep(wait_time)

                
def height_estimation(detected_tanks, args):
    start = time.time()
    height_list = [None] * len(detected_tanks) #height list
    catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1",
                                        modifier = planetary_computer.sign_inplace,)
    
    for i, (tank_id, row) in enumerate(detected_tanks.iterrows()):
        #create utm geometry
        height_list[i] = height_estimation_retry(row, catalog, args)
   
    print(time.time() - start)
    return height_list

def get_args_parse():
    parser = argparse.ArgumentParser("Height Estimation")
    parser.add_argument("--prediction_dir", type=str, help="path to the directory storing predictions")
    parser.add_argument("--collection", type=str, help="the name of the planetary computer collection")
    parser.add_argument("--chunk_id",  type=int)
    parser.add_argument("--backoff_factor", default=10, type=float)
    parser.add_argument("--max_retries", default=10, type=int)
    args = parser.parse_args()
    return args

In [19]:
import sys
sys.argv = ['my_notebook']
args = get_args_parse()
args.chunk_id = 0
args.imgsz=640
args.model_path="/work/csr33/object_detection/runs/detect/train_w_tuned_hyperparameters3/weights/best.pt"
args.prediction_dir="/hpc/group/borsuklab/csr33/object_detection/predictions"
args.tile_dir="/hpc/group/borsuklab/csr33/object_detection/naip_tiles"
args.collection="3dep-lidar-hag"

In [32]:
height_estimation_dir="/hpc/group/borsuklab/csr33/object_detection/height_estimation"

In [35]:
height_paths = [os.path.join(height_estimation_dir, f"merged_predictions_height_{i}.parquet") for i in range(173)]

In [39]:
import pandas as pd

In [50]:
# Open each Parquet file as a PyArrow table
tables = [gpd.read_parquet(f) for f in height_paths]
detected_tanks_gdf = gpd.GeoDataFrame(pd.concat(tables, ignore_index=True), crs=tables[0].crs)
# Concatenate the PyArrow tables into one table
detected_tanks_gdf = detected_tanks_gdf.sort_values("confidence", ascending=False)

In [None]:
More targeted assessments at known industrial sites
where height estimation is less than 1, we ignore the height,

In [51]:
has_height_gdf = detected_tanks_gdf[~detected_tanks_gdf['height'].isnull()]
has_height_gdf = has_height_gdf[has_height_gdf["height"] > 0.0]#.sort_values("height")
has_height_gdf["capacity"] = np.pi * (has_height_gdf["diameter"]/2)**2 * has_height_gdf["height"]
has_height_gdf.loc[has_height_gdf['class_name'] == "spherical_tank", 'capacity'] = None 

In [83]:
has_height_gdf

Unnamed: 0,confidence,class_name,bbox_pixel_coords,tile_names,utm_coords,diameter,utm_proj,geometry,height,capacity
15715,"[0.939806342124939, 0.7305512428283691]",['closed_roof_tank'],"[4350, 3556, 4469, 3650]",m_2909006_sw_15_030_20211130,"[754485.15, 3314029.0500000003, 754520.85, 331...",28.2,EPSG:26915,"POLYGON ((-90.36336 29.93070, -90.36336 29.930...",8.280000,5171.522527
20986,"[0.9394638538360596, 0.8525862693786621]",['closed_roof_tank'],"[7655, 9606, 7773, 9721]",m_2909515_se_15_060_20201130,"[292857.3, 3294452.1, 292928.1, 3294383.1]",69.0,EPSG:26915,"POLYGON ((-95.14165 29.76304, -95.14165 29.762...",27.370001,102344.114691
3400,"[0.9393055438995361, 0.8024093508720398]",['closed_roof_tank'],"[4834, 1903, 4914, 1976]",m_2909309_nw_15_060_20201102,"[406070.7, 3304413.9000000004, 406118.7, 33043...",43.8,EPSG:26915,"POLYGON ((-93.97207 29.86671, -93.97207 29.866...",15.969999,24062.624830
23226,"[0.9361604452133179, 0.5756282806396484]",['closed_roof_tank'],"[1360, 8251, 1443, 8329]",m_2909523_nw_15_060_20201130,"[282900.3, 3288455.1, 282950.1, 3288408.300000...",46.8,EPSG:26915,"POLYGON ((-95.24359 29.70725, -95.24359 29.706...",15.836000,27241.253162
4848,"[0.9354996681213379, 0.9061532616615295]",['closed_roof_tank'],"[6727, 4444, 6828, 4542]",m_2909512_sw_15_060_20201030,"[249982.5, 3298431.3000000003, 250043.09999999...",58.8,EPSG:26915,"POLYGON ((-95.58577 29.79099, -95.58577 29.790...",7.620000,20691.858428
...,...,...,...,...,...,...,...,...,...,...
6709,0.5002272725105286,closed_roof_tank,"[2430, 1961, 2445, 1976]",m_4107205_nw_18_060_20211001,"[708209.7000000001, 4652076.9, 708218.70000000...",9.0,EPSG:26918,"POLYGON ((-72.48617 41.99319, -72.48617 41.993...",3.332000,211.972679
8929,0.500196635723114,closed_roof_tank,"[1538, 7964, 1547, 7972]",m_3807531_nw_18_060_20210624,"[478943.1, 4270617.3, 478948.5, 4270612.5]",4.8,EPSG:26918,"POLYGON ((-75.24170 38.58378, -75.24170 38.583...",6.800000,123.049904
29911,0.5001552104949951,external_floating_roof_tank,"[4178, 3411, 4195, 3427]",m_4406928_sw_19_060_20210904,"[452521.5, 4932849.9, 452531.7, 4932840.3]",9.6,EPSG:26919,"POLYGON ((-69.59758 44.54742, -69.59758 44.547...",1.464000,105.967678
11245,0.5001063346862793,closed_roof_tank,"[9850, 7522, 9859, 7530]",m_2909442_sw_15_060_20201116,"[323508.3, 3239752.5, 323513.7, 3239747.7]",4.8,EPSG:26915,"POLYGON ((-94.81664 29.27437, -94.81664 29.274...",6.798800,123.028189


In [76]:
class_name_merge = ['class1', 'class2'] 
conf = [0.7, 0.9]

max_conf_idx = np.argmax(conf)
max_conf_class = class_name_merge[max_conf_idx]

print(max_conf_class)

class2


In [77]:
np.mean(conf)

0.8

In [75]:
has_height_gdf.class_name.unique()

array(["['closed_roof_tank']", "['external_floating_roof_tank']",
       "['closed_roof_tank' 'external_floating_roof_tank']",
       "['spherical_tank']", "['closed_roof_tank' 'spherical_tank']",
       'closed_roof_tank', 'external_floating_roof_tank',
       'spherical_tank'], dtype=object)

In [30]:
detected_tanks = gpd.read_parquet(os.path.join("/hpc/group/borsuklab/csr33/object_detection/height_estimation", f"merged_predictions_height_{0}.parquet"))

In [20]:
print(args)
detected_tanks = gpd.read_parquet(os.path.join(args.prediction_dir, f"merged_predictions_{args.chunk_id}.parquet"))
#reformat     
detected_tanks['utm_coords'] = detected_tanks['utm_coords'].apply(lambda x: ast.literal_eval(x))

Namespace(prediction_dir='/hpc/group/borsuklab/csr33/object_detection/predictions', collection='3dep-lidar-hag', chunk_id=0, backoff_factor=10, max_retries=10, imgsz=640, model_path='/work/csr33/object_detection/runs/detect/train_w_tuned_hyperparameters3/weights/best.pt', tile_dir='/hpc/group/borsuklab/csr33/object_detection/naip_tiles')


In [13]:
detected_tanks["height"] = height_estimation(detected_tanks, args)

Namespace(prediction_dir='/hpc/group/borsuklab/csr33/object_detection/predictions', collection='3dep-lidar-hag', chunk_id=0, backoff_factor=10, max_retries=10, imgsz=640, model_path='/work/csr33/object_detection/runs/detect/train_w_tuned_hyperparameters3/weights/best.pt', tile_dir='/hpc/group/borsuklab/csr33/object_detection/naip_tiles')
<class 'NameError'> handle
retry
Failed, retrying in 1 seconds...
<class 'NameError'> handle
retry
Failed, retrying in 2 seconds...
<class 'NameError'> handle
Namespace(prediction_dir='/hpc/group/borsuklab/csr33/object_detection/predictions', collection='3dep-lidar-hag', chunk_id=0, backoff_factor=10, max_retries=10, imgsz=640, model_path='/work/csr33/object_detection/runs/detect/train_w_tuned_hyperparameters3/weights/best.pt', tile_dir='/hpc/group/borsuklab/csr33/object_detection/naip_tiles')
<class 'NameError'> handle
retry
Failed, retrying in 1 seconds...


KeyboardInterrupt: 

In [21]:
for i, row in detected_tanks.iterrows():
    break

In [24]:
    catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1",
                                        modifier = planetary_computer.sign_inplace,)

In [26]:
height_estimation_by_tank(row, catalog, args)

In [None]:
# Get the arguments
detected_tanks['utm_coords'] = detected_tanks['utm_coords'].apply(lambda x: str(x))

detected_tanks.to_parquet(os.path.join(args.prediction_dir, f"merged_predictions_height_{args.chunk_id}.parquet"))