# 6 - Post-processing


In [None]:
path_to_predicted="/content/drive/MyDrive/NOVA_course_deep_learning/data/tiles/10m_ortho_hobol_042222_mavic_sun"
path_to_tile_index=path_to_predicted+ "/ortho_hobol_042222_mavic_sun_tile_index.shp"
buffer_size_m=1

### 6.1 Loading required libraries and mounting Google Drive

In [None]:
!pip install geopandas

# general libraries

import os, glob
from pathlib import Path

# Geospatial libraries

import geopandas as gpd
from osgeo import gdal, osr
from shapely.geometry import Polygon


In [None]:
# mount google drive

from google.colab import drive
drive.mount('/content/drive')

### 6.2 Getting necessary files


In [None]:
# get list of images (.tif) and labels (.txt)

gtiffs = glob.glob(path_to_predicted + "/*.tif")
labels = glob.glob(path_to_predicted + "/predict_tiles/labels/*.txt")
print("There a total of "+str(len(gtiffs))+" tif files")
print("There a total of "+str(len(labels))+" label files")

In [None]:
# load tile index

tile_index = gpd.read_file(path_to_tile_index)

In [None]:
# Get raster metadata
# pixel resolution (in meters) and tile size in pixels

src_ds = gdal.Open(gtiffs[0])                       # get raster datasource
_, xres, _, _, _, yres  = src_ds.GetGeoTransform()  # get pixel size in meters
tile_size_m=round(src_ds.RasterXSize*xres)
tile_size_px= round(tile_size_m/abs(xres))          # calculate the tile size in pixels

# Get EPSG code

proj = osr.SpatialReference(wkt=src_ds.GetProjection())
EPSG_code= proj.GetAttrValue('AUTHORITY',1)

print("Resolution: "+str(round(xres,2))+" m")
print("EPSG: "+str(EPSG_code))

### 6.3 Parsing from YOLO to UTM coordinates

In [None]:
##########################################################################################################################################################################
# YOLO to x1, y1, x2, y2 parsers: converts (x, y, width, height) YOLO format to (x1, y1, x2, y2)  format.
# arguments:
# - label_file: file with YOLO predictions(s) inside including: class, x, y, width, height, probabilities
# - img_width - width of input image in pixels
# - img_height - height of input image in pixels
#Returns:
# - a file with a row per predicted bounding box and the following columns: class, x1, y1, x2, y2, probability (note that the coordinates are still in image coordinates and NOT GEOGRAPHICAL ONES)

##########################################################################################################################################################################

def yolo2xy(label_file, img_width, img_height):
    """
    Definition:
Parameters:
"""
    lfile = open(label_file)
    coords = []
    all_coords = []
    for line in lfile:
        l = line.split(" ")
        label=list(map(float, list(map(float, l[0]))))
        probabs=(l[5])

        #print(probabs)

        coords = list(map(float, list(map(float, l[1:6]))))
        x1 = float(img_width) * (2.0 * float(coords[0]) - float(coords[2])) / 2.0
        y1 = float(img_height) * (2.0 * float(coords[1]) - float(coords[3])) / 2.0
        x2 = float(img_width) * (2.0 * float(coords[0]) + float(coords[2])) / 2.0
        y2 = float(img_height) * (2.0 * float(coords[1]) + float(coords[3])) / 2.0
        tmp = [int(label[0]), int(x1), int(y1), int(x2), int(y2), float(coords[4])]
        all_coords.append(list(tmp))
    lfile.close()
    return all_coords


In [None]:
all_bboxes = None
iter_all=0    # setup counter
for lab in range(len(labels)):
    print(str(round(lab/len(labels)*100))+" % done!", end="\r")

    # Define one label file and select the corresponding geotiff image

    label_file=labels[lab]
    label_file_name=Path(label_file).stem       # ortho name
    for p in gtiffs:
        if Path(p).stem ==label_file_name:
            gtiff_file=p

    # determing image witdth and height

    try:
      r = gdal.Open(gtiff_file)
    except:
      print("no file")
      continue

    img_width=r.RasterXSize
    img_height=r.RasterYSize

    # Convert from yolo coordinates to x1, y1, x2, y2,

    coords = yolo2xy(label_file, img_width, img_height)               # class, x1, y1, x2, y2, probability

    # Convert from image to geographical coordinates
    ## select tile polygon (from tile index shapefile) that corresponds to the label_file_name
    # the other files are required by the gpd readfile


    # tile_index is <class 'geopandas.geodataframe.GeoDataFrame'>

    one_tile=tile_index[tile_index['ID']==label_file_name+".tif"]     # Select tile in tile_index that has ID equal to label_file_name

    ## get tile bounding box geographical coordinates (UTM)

    one_tile_XminUTM=one_tile.total_bounds[0]
    one_tile_YminUTM=one_tile.total_bounds[1]
    one_tile_XmaxUTM=one_tile.total_bounds[2]
    one_tile_YmaxUTM=one_tile.total_bounds[3]

    ## take inner buffer equal to the buffer_size_m

    one_tile_innerB= one_tile
    one_tile_innerB['geometry'] = one_tile_innerB.geometry.buffer(-(buffer_size_m/2))

    ## get inner tile bounding boxes

    one_tile_inner_XminUTM=one_tile_innerB.total_bounds[0]
    one_tile_inner_YminUTM=one_tile_innerB.total_bounds[1]
    one_tile_inner_XmaxUTM=one_tile_innerB.total_bounds[2]
    one_tile_inner_YmaxUTM=one_tile_innerB.total_bounds[3]

    # Now iterate through each bounding box and assign UTM coordinates and create a shapefile

    bboxes_tile = None
    for i in coords:

        # print("inside coords")
        # Convert bounding box coordinates from image to geographical coords

        X1_UTM=(i[1]*xres)+one_tile_XminUTM
        Y1_UTM=(i[2]*yres)+one_tile_YminUTM+tile_size_m
        X2_UTM=(i[3]*xres)+one_tile_XminUTM
        Y2_UTM=(i[4]*yres)+one_tile_YminUTM+tile_size_m

        # skip bounding box if its centroid is NOT within the inner tile (removing the overlap)

        X_UTM= (X1_UTM+X2_UTM)/2
        Y_UTM= (Y1_UTM+Y2_UTM)/2
        if X_UTM<one_tile_inner_XminUTM or X_UTM>one_tile_inner_XmaxUTM or Y_UTM<one_tile_inner_YminUTM or Y_UTM>one_tile_inner_YmaxUTM:

            #print("continue break")

            continue

        # Create polygon shape from geographical coords

        lat_point_list = [Y1_UTM, Y1_UTM, Y2_UTM, Y2_UTM, Y1_UTM]
        lon_point_list = [X1_UTM, X2_UTM, X2_UTM, X1_UTM, X1_UTM]
        try:
          polygon_geom = Polygon(zip(lon_point_list, lat_point_list))
        except:
          print("not sure what is going on")
          continue

        crs = {'init': 'epsg:'+EPSG_code}
        data= {'class': [i[0]], 'prob': [i[5]]}
        bbox = gpd.GeoDataFrame(data, crs=crs, geometry=[polygon_geom])

        if (bboxes_tile is None):
            bboxes_tile = bbox
        else:
            bboxes_tile = bboxes_tile.append(bbox)

    # cleanup boxes (removing overlapping ones)

    if bboxes_tile is not None:
        #clean_boxes = bboxes_tile #cleanUp_boudingBoxes(bboxes_tile, iou_thresh)
        if (all_bboxes is None):
            all_bboxes = bboxes_tile
        else :
            all_bboxes = all_bboxes.append(bboxes_tile)


### 6.4 Exporting predictions shapefile

In [None]:
ortho_name= os.path.basename(path_to_predicted)
all_bboxes.to_file(path_to_predicted + "/" + ortho_name + "_predictions_AM.shp", driver='ESRI Shapefile') # turn this off if it's not needed