In [1]:
import os
import argparse
import math
import time
import ast
from copy import copy
from glob import glob

import pandas as pd
import numpy as np

import pyproj
from pyproj import Proj
from shapely.ops import transform
from shapely.geometry import Point
import rioxarray

import ultralytics
from ultralytics import YOLO

In [2]:
def chunk_df(img_paths, num_chunks=10):
    # Calculate the number of rows per chunk
    rows_per_chunk = len(img_paths) // num_chunks
    df_chunks = [np.array(img_paths[i : i + rows_per_chunk]) for i in range(0, len(img_paths), rows_per_chunk)]
    return df_chunks


def tile_dimensions_and_utm_coords(tile_path): #used
    """ Obtain tile band, height and width and utm coordinates
    Args: tile_path(str): the path of the tile 
    Returns: 
    utmx(np array): the x utm coordinates corresponding with the tile coordinate convention (origin in upper left hand corner)
    utmy(np array): the y utm coordinates corresponding with the tile coordinate convention (origin in upper left hand corner)
    tile_band(int): the number of bands
    tile_height(int): the height of the tile (in pixels)
    tile_width(int): the width of the tile (in pixels)
    """
    ## Get tile locations
    da = rioxarray.open_rasterio(tile_path) ## Read the data
    # Compute the lon/lat coordinates with rasterio.warp.transform
    # lons, lats = np.meshgrid(da['x'], da['y'])
    tile_band, tile_height, tile_width = da.shape[0], da.shape[1], da.shape[2]
    utmx = np.array(da['x'])
    utmy = np.array(da['y'])
    crs =  str(rioxarray.open_rasterio(tile_path).rio.crs)
    return(utmx, utmy, crs, tile_band, tile_height, tile_width)
    del da


def process_results(results, tile_height, tile_width, item_dim):
    #xyxys = []
    bbox_pixel_coords_list = [] #xyxy coords with repsect to the tile
    conf_list = [] #probability
    class_name_list = [] #class name
    image_names_list = []
    lat_lons = []
    for result in results:
        boxes = result.boxes
        image_name = os.path.splitext(os.path.basename(result.path))[0]
        if len(boxes) > 0: 
            #get valeus for all bounding boxes
            #class name
            class_name_list.extend([result.names[class_number] for class_number in boxes.cls.cpu().detach().tolist()])
            
            conf_list.extend(boxes.conf.cpu().detach().tolist())

            xyxy = boxes.xyxy.cpu().detach().numpy() - 1 #read xmin,ymin,xmax,ymax coordinates to memory as a numpy array
            xyxy = np.round(xyxy).astype(np.int32).tolist()  # round so that it can be used for utm to lonlat conversion, check if zero indexed
            
            image_names_list.extend([image_name]*len(xyxy)) # The index is a six-digit number like '000023'.
            #calculate the tile level pixel coordinates
            bbox_pixel_coords_list.extend([calculate_tile_level_bbox(image_name, box, item_dim,
                                                                     tile_width, tile_height) for box in xyxy])
        del boxes
    return pd.DataFrame({"confidence":conf_list, "class_name": class_name_list,
                       "image_names": image_names_list, "bbox_pixel_coords": bbox_pixel_coords_list})#,  dtype=dtypes


def predict_process(img_paths, tile_height, tile_width, model, args):
    # obtain predictions over the dataframe
    results_df = pd.DataFrame({})
    num_chunks = len(img_paths)//50
    for df_chunk in chunk_df(img_paths, num_chunks=num_chunks):
        results = model.predict(df_chunk.tolist(), save=False, imgsz=args.imgsz)#, conf=0.5)
        #process_results(results, utmx, utmy, utm_proj, tile_height, tile_width, item_dim=args.imgsz)
        results_df = pd.concat([results_df, process_results(results, tile_height, tile_width, item_dim=args.imgsz)])
        del results, df_chunk
    return results_df


def calculate_tile_level_bbox(image_name, xyxy, item_dim, tile_width, tile_height):
    obj_xmin, obj_ymin, obj_xmax, obj_ymax = xyxy
    #identify rows and columns
    y, x = image_name.split("_")[-2:] #name of tif with the extension removed; y=row;x=col
    # Each chip xml goes from 1 - item_dim, specify the "0", or the end point of the last xml
    image_minx = int(x)*item_dim
    image_miny = int(y)*item_dim

    #add the bounding boxes
    obj_xmin = image_minx + obj_xmin
    obj_ymin = image_miny + obj_ymin
    obj_xmax = image_minx + obj_xmax
    obj_ymax = image_miny + obj_ymax
    
    # correct bboxes that extend past the bounds of the tile width/height
    if int(obj_xmin) >= tile_width:
        obj_xmin = tile_width - 1
    if int(obj_xmax) >= tile_width:
        obj_xmax = tile_width - 1
    if int(obj_ymin) >= tile_height:
        obj_ymin = tile_height - 1
    if int(obj_ymax) >= tile_height:
        obj_ymax = tile_height - 1
    
    return [obj_xmin, obj_ymin, obj_xmax, obj_ymax]


def transform_point_utm_to_wgs84(utm_proj, utm_xcoord, utm_ycoord): #used
    """ Convert a utm pair into a lat lon pair 
    Args: 
    utm_proj(str): the UTM string as the in term of EPSG code
    utmx(int): the x utm coordinate of a point
    utmy(int): the y utm coordinates of a point
    Returns: 
    (wgs84_pt.x, wgs84_pt.y): the 'EPSG:4326' x and y coordinates 
    """
    #https://gis.stackexchange.com/questions/127427/transforming-shapely-polygon-and-multipolygon-objects
    #get utm projection
    utm = pyproj.CRS(utm_proj)
    #get wgs84 proj
    wgs84 = pyproj.CRS('EPSG:4326')
    #specify utm point
    utm_pt = Point(utm_xcoord, utm_ycoord)
    #transform utm into wgs84 point
    project = pyproj.Transformer.from_crs(utm, wgs84, always_xy=True).transform
    wgs84_pt = transform(project, utm_pt)
    return wgs84_pt.x, wgs84_pt.y
    
    
def get_crs_coords(pixel_coords, utmx, utmy, utm_proj):
    minx, miny, maxx, maxy = pixel_coords
    #determine the lat/lon
    nw_lon, nw_lat = transform_point_utm_to_wgs84(utm_proj, utmx[minx], utmy[miny])
    se_lon, se_lat = transform_point_utm_to_wgs84(utm_proj, utmx[maxx], utmy[maxy]) 
    return pd.Series({'utm_coords': [utmx[minx], utmy[miny], utmx[maxx], utmy[maxy]],
                      'latlon_coords': [nw_lon, nw_lat, se_lon, se_lat]})

        
def merge_boxes(bbox1, bbox2): #used
    """ 
    Generate a bounding box that covers two bounding boxes
    Called in merge_algo
    Arg:
    bbox1(list): a list of the (xmin, ymin, xmax, ymax) coordinates for box 1 
    bbox2(list): a list of the (xmin, ymin, xmax, ymax) coordinates for box 2
    Returns:
    merged_bbox(list): a list of the (xmin, ymin, xmax, ymax) coordinates for the merged bbox

    """
    return [min(bbox1[0], bbox2[0]), 
            min(bbox1[1], bbox2[1]),
            max(bbox1[2], bbox2[2]),
            max(bbox1[3], bbox2[3])]


def calc_sim(bbox1, bbox2, dist_limit): #used
    """Determine the similarity of distances between bboxes to determine whether bboxes should be merged
    Computer a Matrix similarity of distances of the text and object
    Called in merge_algo
    Arg:
    bbox1(list): a list of the (xmin, ymin, xmax, ymax) coordinates for box 1 
    bbox2(list): a list of the (xmin, ymin, xmax, ymax) coordinates for box 2
    dist_list(int): the maximum threshold (pixel distance) to merge bounding boxes
    Returns:
    (bool): to indicate whether the bboxes should be merged 
    """

    # text: ymin, xmin, ymax, xmax
    # obj: ymin, xmin, ymax, xmax
    bbox1_xmin, bbox1_ymin, bbox1_xmax, bbox1_ymax = bbox1
    bbox2_xmin, bbox2_ymin, bbox2_xmax, bbox2_ymax = bbox2
    x_dist = min(abs(bbox2_xmin-bbox1_xmax), abs(bbox2_xmax-bbox1_xmin))
    y_dist = min(abs(bbox2_ymin-bbox1_ymax), abs(bbox2_ymax-bbox1_ymin))
        
    #define distance if one object is inside the other
    if (bbox2_xmin <= bbox1_xmin) and (bbox2_ymin <= bbox1_ymin) and (bbox2_xmax >= bbox1_xmax) and (bbox2_ymax >= bbox1_ymax):
        return True
    elif (bbox1_xmin <= bbox2_xmin) and (bbox1_ymin <= bbox2_ymin) and (bbox1_xmax >= bbox2_xmax) and (bbox1_ymax >= bbox2_ymax):
        return True
    #determine if both bboxes are close to each other in 1d, and equal or smaller length in the other
    elif (x_dist <= dist_limit) and (bbox1_ymin <= bbox2_ymin) and (bbox1_ymax >= bbox2_ymax): #bb1 bigger
        return True
    elif (x_dist <= dist_limit) and (bbox2_ymin <= bbox1_ymin) and (bbox2_ymax >= bbox1_ymax): #bb2 bigger
        return True
    elif (y_dist <= dist_limit) and (bbox1_xmin <= bbox2_xmin) and (bbox1_xmax >= bbox2_xmax): #bb1 bigger
        return True
    elif (y_dist <= dist_limit) and (bbox2_xmin <= bbox1_xmin) and (bbox2_xmax >= bbox1_xmax): #bb2 bigger
        return True
    else: 
        return False

    
def merge_predicted_bboxes(results_df, dist_limit = 5):
    class_names = results_df.class_name.to_list()
    bbox_pixel_coords = results_df.bbox_pixel_coords.to_list()
    confidences = results_df.confidence.to_list()
   
    merge_bools = [False] * len(class_names)
    for i, (conf1, class_name1, bbox1) in enumerate(zip(confidences, class_names, bbox_pixel_coords)):
        for j, (conf2, class_name2, bbox2) in enumerate(zip(confidences, class_names, bbox_pixel_coords)):
            if j <= i: #only consider the remaining bboxes
                continue
            # Create a new box if a distances is less than distance limit defined 
            merge_bool = calc_sim(bbox1, bbox2, dist_limit) 
            if merge_bool == True:
                # Create a new box  
                new_box = merge_boxes(bbox1, bbox2)   
                bbox_pixel_coords[i] = new_box
                #delete previous text boxes
                del bbox_pixel_coords[j]
                class_name_merge = np.unique([class_name1, class_name2])
                conf = [conf1, conf2]

                class_names[i] = class_name_merge
                confidences[i] = conf

                conf
                #delete previous text 
                del class_names[j],  confidences[i]
    return pd.DataFrame({"confidence":confidences, "class_name": class_names,
                         "bbox_pixel_coords": bbox_pixel_coords})#,  dtype=dtypes


def write(predictions, predictions_file_path):
    # remove file if it exists 
    if os.path.exists(predictions_file_path):
        predictions.to_parquet(predictions_file_path, engine='fastparquet', append=True)
    else:
        predictions.to_parquet(predictions_file_path, engine='fastparquet')
        

def calculate_diameter(bbox, resolution = 0.6): #used
    """ Calculate the diameter of a given bounding bbox (in Pascal Voc Format) for imagery of a given resolution
    Arg:
    bbox(list): a list of the (xmin, ymin, xmax, ymax) coordinates for box. Utm coordinates are provided as [nw_x_utm, se_y_utm, se_x_utm, nw_y_utm] to conform with Pascal Voc Format.
    resolution(float): the (gsd) resolution of the imagery
    Returns:
    (diameter): the diameter of the bbox of interest
    """
    obj_xmin, obj_ymin, obj_xmax, obj_ymax = bbox
    obj_width = obj_xmax - obj_xmin
    obj_height = obj_ymin - obj_ymax 
    diameter = min(obj_width, obj_height) * resolution #meter
    return diameter

        
def get_args_parse():
    parser = argparse.ArgumentParser("Predict on images")    
    parser.add_argument("--chunk_id",  type=int)
    parser.add_argument("--tile_dir", default="/work/csr33/images_for_predictions/naip_tiles", type=str)
    parser.add_argument("--tilename_chunks_path", default='/hpc/home/csr33/ast_object_detection/tilename_chunks.npz', type=str)
    parser.add_argument("--model_path", default="/work/csr33/object_detection/runs/detect/baseline_train/weights/best.pt", type=str)
    parser.add_argument("--prediction_dir", default="/work/csr33/images_for_predictions/predictions", type=str)
    parser.add_argument("--imgsz", default=640, type=int)
    parser.add_argument('--img_dir', type=str, default="/work/csr33/images_for_predictions/naip_imgs")
    parser.add_argument('--classification_threshold', type=float, default=0.5)

    args = parser.parse_args()
    return args

In [3]:
import sys
sys.argv = ['my_notebook']
args = get_args_parse()
args.chunk_id=0

In [4]:
os.chdir("/work/csr33/object_detection")
#determine chunk-number   
os.makedirs(args.prediction_dir, exist_ok=True)
model = YOLO(args.model_path)  # custom trained model 

In [7]:
# load a subset of the tile paths to predict on
tile_paths = np.load(args.tilename_chunks_path)[str(args.chunk_id)][:2]
tile_names = [os.path.splitext(os.path.basename(tile_path))[0] for tile_path in tile_paths]

In [8]:
#intialize dataframes
predict_df = pd.DataFrame({})
merged_df = pd.DataFrame({})

# obtain predictions over the dataframe
for tile_name in tile_names:
    print("tile_name", tile_name)
    start_time = time.time()
    img_paths = glob(os.path.join(args.img_dir,"*"+tile_name+"*")) #identify the imgs correspondig to a given tile
    tile_path = os.path.join(args.tile_dir, tile_name +".tif") # specify the tile path
    #obtain tile information
    utmx, utmy, utm_proj, tile_band, tile_height, tile_width = tile_dimensions_and_utm_coords(tile_path) #used
    #predict on images
    predict_df_by_tank = predict_process(img_paths, tile_height, tile_width, model, args)
    #remove predictions with low confidence scores
    predict_df_by_tank = predict_df_by_tank[predict_df_by_tank.confidence > args.classification_threshold]
    print(len(predict_df_by_tank))
    #merge neighboring images
    merged_df_by_tank = merge_predicted_bboxes(predict_df_by_tank, dist_limit = 5)
    print(len(merged_df_by_tank))
    # calculate utm and lat lon coords
    merged_df_by_tank[["utm_coords","latlon_coords"]] = merged_df_by_tank["bbox_pixel_coords"].apply(\
                                                        lambda box: get_crs_coords(box, utmx, utmy, utm_proj))
    print(len(merged_df_by_tank))
    merged_df_by_tank["diameter"] = merged_df_by_tank["utm_coords"].apply(lambda utm_coord: calculate_diameter(utm_coord, resolution = 1))
    #specify the projection used 
    merged_df_by_tank["utm_proj"] = [utm_proj] * len(merged_df_by_tank)
  #update dataframes
    predict_df = pd.concat([predict_df, predict_df_by_tank], ignore_index=True)
    merged_df = pd.concat([merged_df, merged_df_by_tank], ignore_index=True)
    #delete temp dataframe to conserve memory
    del predict_df_by_tank, merged_df_by_tank
    end_time = time.time()
    execution_time = end_time - start_time
    print("Execution time:", execution_time, "seconds")        

predict_df.to_csv(os.path.join(args.prediction_dir, f"predictions_{args.chunk_id}.csv"))
merged_df.to_csv(os.path.join(args.prediction_dir, f"merged_predictions_{args.chunk_id}.csv"))


tile_name m_3008929_ne_16_030_20211119

0: 640x640 (no detections), 1: 640x640 (no detections), 2: 640x640 (no detections), 3: 640x640 (no detections), 4: 640x640 (no detections), 5: 640x640 (no detections), 6: 640x640 (no detections), 7: 640x640 (no detections), 8: 640x640 (no detections), 9: 640x640 (no detections), 10: 640x640 (no detections), 11: 640x640 (no detections), 12: 640x640 (no detections), 13: 640x640 3 narrow_closed_roof_tanks, 14: 640x640 (no detections), 15: 640x640 (no detections), 16: 640x640 (no detections), 17: 640x640 (no detections), 18: 640x640 (no detections), 19: 640x640 (no detections), 20: 640x640 (no detections), 21: 640x640 (no detections), 22: 640x640 (no detections), 23: 640x640 1 external_floating_roof_tank, 24: 640x640 (no detections), 25: 640x640 (no detections), 26: 640x640 (no detections), 27: 640x640 (no detections), 28: 640x640 (no detections), 29: 640x640 (no detections), 30: 640x640 (no detections), 31: 640x640 (no detections), 32: 640x640 (no d

In [None]:
    try:
        point = geolocator.geocode(row).point
        return pd.Series({'Latitude': point.latitude, 'Longitude': point.longitude})
    except:
        return None, None

In [None]:
git clone --mirror https://github.com/celinerobi/ast_object_detection.git ast_object_detection_clone

In [27]:
merged_df.to_csv(os.path.join("/hpc/home/csr33", f"merged_predictions_{args.chunk_id}.csv"))
predict_df.to_csv(os.path.join("/hpc/home/csr33", f"predictions_{args.chunk_id}.csv"))


In [17]:
merged_df_0 = pd.read_csv(os.path.join("/hpc/home/csr33", f"merged_predictions_{args.chunk_id}.csv"))

In [18]:
merged_df_0

Unnamed: 0.1,Unnamed: 0,confidence,class_name,bbox_pixel_coords,utm_coords,latlon_coords,diameter,utm_proj
0,0,0.709970,narrow_closed_roof_tank,"[1413, 7656, 1421, 7663]","[266384.85000000003, 3388577.85, 266387.25, 33...","[-89.43680726604093, 30.606986808105322, -89.4...",2.1,EPSG:26916
1,1,0.691709,narrow_closed_roof_tank,"[1416, 7662, 1424, 7669]","[266385.75, 3388576.05, 266388.15, 3388573.949...","[-89.43679747890906, 30.606970755865152, -89.4...",2.1,EPSG:26916
2,2,0.685344,narrow_closed_roof_tank,"[1440, 7665, 1448, 7672]","[266392.95, 3388575.15, 266395.35000000003, 33...","[-89.43672223065883, 30.60696404828831, -89.43...",2.1,EPSG:26916
3,3,0.674716,narrow_closed_roof_tank,"[1447, 7664, 1455, 7671]","[266395.05, 3388575.4499999997, 266397.45, 338...","[-89.4367004102816, 30.606967163169212, -89.43...",2.1,EPSG:26916
4,4,0.660369,['narrow_closed_roof_tank'],"[1442, 7671, 1456, 7678]","[266393.55, 3388573.35, 266397.75, 3388571.25]","[-89.4367155704176, 30.60694793743995, -89.436...",2.1,EPSG:26916
...,...,...,...,...,...,...,...,...
404,404,0.614811,narrow_closed_roof_tank,"[8319, 4501, 8327, 4509]","[492965.7, 3531017.1, 492970.5, 3531012.300000...","[-81.07440366461059, 31.915004851399, -81.0743...",4.8,EPSG:26917
405,405,0.544820,narrow_closed_roof_tank,"[8818, 806, 8827, 814]","[493265.1, 3533234.1, 493270.5, 3533229.300000...","[-81.07125225425202, 31.93500824881911, -81.07...",4.8,EPSG:26917
406,406,0.678150,narrow_closed_roof_tank,"[9157, 119, 9166, 127]","[493468.5, 3533646.3000000003, 493473.89999999...","[-81.06910315607813, 31.938728263710118, -81.0...",4.8,EPSG:26917
407,407,0.572326,['narrow_closed_roof_tank'],"[9158, 103, 9167, 118]","[493469.1, 3533655.9000000004, 493474.5, 35336...","[-81.06909687289001, 31.93881487735055, -81.06...",5.4,EPSG:26917


In [26]:
merged_df["bbox_pixel_coords"].apply(lambda box: get_crs_coords(box, utmx, utmy, utm_proj))

IndexError: index 12726 is out of bounds for axis 0 with size 12230

In [15]:
from random import shuffle



[[0]]


In [None]:
impor