In [None]:
import numpy as np
import pandas as pd
from osgeo import gdal
import math
import os
from scipy.stats import linregress
from scipy.stats import pearsonr
from sklearn.metrics import accuracy_score, matthews_corrcoef, confusion_matrix, r2_score
from sklearn.linear_model import LinearRegression
import higra as hg
import scipy.ndimage as si
from skimage import measure
import glob
import matplotlib.pyplot as plt
np.random.seed(42)

In [32]:
def InstSegm(extent, boundary, t_ext=0.4, t_bound=0.2):
    """
    INPUTS:
    extent : extent prediction
    boundary : boundary prediction
    t_ext : threshold for extent
    t_bound : threshold for boundary
    OUTPUT:
    instances
    """

    # Threshold extent mask
    ext_binary = np.uint8(extent >= t_ext)

    # Artificially create strong boundaries for
    # pixels with non-field labels
    input_hws = np.copy(boundary)
    input_hws[ext_binary == 0] = 1

    # Create the directed graph
    size = input_hws.shape[:2]
    graph = hg.get_8_adjacency_graph(size)
    edge_weights = hg.weight_graph(
        graph,
        input_hws,
        hg.WeightFunction.mean
    )

    tree, altitudes = hg.watershed_hierarchy_by_dynamics(
        graph,
        edge_weights
    )
    
    # Get individual fields
    # by cutting the graph using altitude
    instances = hg.labelisation_horizontal_cut_from_threshold(
        tree,
        altitudes,
        threshold=t_bound)
    
    instances[ext_binary == 0] = -1

    return instances

def get_IoUs(extent_true, extent_pred, boundary_pred, t_ext=0.4, 
             t_bound=0.2, plot=False, sample_size=0.05, border_limit=10): # , used_fields_path=None, tile=None
    
    # initiate dictionary to export used field_numbers
    k = ['tile','t_ext','t_bound', 'fieldID'] #'medianIoU', 'meanIoU', 'IoU_50', 'IoU_80']
    v = [list() for i in range(len(k))]
    res = dict(zip(k, v))

    # get predicted instance segmentation
    instances_pred = InstSegm(extent_pred, boundary_pred, t_ext=t_ext, t_bound=t_bound)
    instances_pred = measure.label(instances_pred, background=-1) # shouldn't that be 0
    
    # get instances from ground truth label
    binary_true = extent_true > 0
    instances_true = measure.label(binary_true, background=0, connectivity=1)
   
    if plot:
        fig, ax = plt.subplots(1, 2, figsize=(8, 4))
        ax[0].imshow(instances_true)
        ax[1].imshow(instances_pred)
        plt.show()
    
    # loop through true fields
    field_values = np.unique(instances_true)
    # here subsetting
    field_values = np.random.choice(field_values, size=int(sample_size * len(field_values)), replace=False)

    best_IoUs = []
    field_IDs = []
    field_sizes = []

    for field_value in field_values:
        if field_value == 0:
            continue # move on to next value
    
        this_field = instances_true == field_value
        # check if field is close to border and throw away if too close
        if TooCloseToBorder(this_field, border_limit):
            continue
        # else:
            # if used_fields_path != None:
            #     res['tile'].append(tile)
            #     res['t_ext'].append(t_ext)
            #     res['t_bound'].append(t_bound)
            #     res['fieldID'].append(field_value)

        field_IDs.append(field_value)
        field_sizes.append(np.sum(this_field))
        
        # find predicted fields that intersect with true field
        intersecting_fields = this_field * instances_pred
        intersect_values = np.unique(intersecting_fields)
        intersect_fields = np.isin(instances_pred, intersect_values[1:])
        
        if plot:
            fig, ax = plt.subplots(1, 2, figsize=(8, 4))
            ax[0].imshow(this_field)
            ax[1].imshow(intersect_fields)
            plt.show()
        
        # compute IoU for each intersecting field
        field_IoUs = []
        for intersect_value in intersect_values:
            if intersect_value == 0:
                continue # move on to next value
            pred_field = instances_pred == intersect_value
            union = this_field + pred_field > 0
            intersection = (this_field * pred_field) > 0
            IoU = np.sum(intersection) / np.sum(union)
            field_IoUs.append(IoU)
    
        # take maximum IoU - this is the IoU for this true field
        if len(field_IoUs) > 0:
            best_IoUs.append(np.max(field_IoUs))
        else:
            best_IoUs.append(0)
    
    # df  = pd.DataFrame(data = res)
    # df.to_csv(f'{used_fields_path}_{tile}_{t_ext}_{t_bound}.csv', index=False)

    return best_IoUs, field_IDs, field_sizes


def makeTif_np_to_matching_tif(array, tif_path, out_folder, ending):
    ds = gdal.Open(tif_path)
    gtiff_driver = gdal.GetDriverByName('GTiff')
    file_name = tif_path.split('/')[-1].split('.')[0]
    out_ds = gtiff_driver.Create(f'{out_folder}/{file_name}{ending}',ds.RasterXSize, ds.RasterYSize, 1, gdal.GDT_Float32)
    out_ds.SetGeoTransform(ds.GetGeoTransform())
    out_ds.SetProjection(ds.GetProjection())             
    out_ds.GetRasterBand(1).WriteArray(array)
    del out_ds

def TooCloseToBorder(numbered_array, border_limit):
    rows, cols = np.where(numbered_array==True)
    r,c = numbered_array.shape
    if any(value < border_limit for value in [np.min(rows), r - np.max(rows), np.min(cols), c - np.max(cols)]):
        return True

In [34]:
# load the predictions and labels

predictions =  '/data/fields/output/predictions/FORCE/BRANDENBURG/vrt/256_20_masked_chipsvrt.vrt' # predictions straight from GPU 
reference =  '/data/fields/IACS/Auxiliary/GSA-DE_BRB-2019_All_agromask_linecrop_prediction_extent.tif' # mask from IACS

result_dir = '/data/fields/Auxiliary/grid_search/' + predictions.split('/')[-1].split('.')[0] + '_' + reference.split('/')[-1].split('.')[0]

# make a dictionary for export
k = ['tile','t_ext','t_bound', 'IoU', 'field_IDs', 'field_sizes'] #'medianIoU', 'meanIoU', 'IoU_50', 'IoU_80']
v = [list() for i in range(len(k))]
res = dict(zip(k, v))
# mIoUs = []
# mnIoUs = []
# IoU_50s = []
# IoU_80s = []


# tile predictions in prds --> total extent encompasses 90 Force Tiles (+ a few rows and cols that will be neglected as they are outside of study area)
pred_ds = gdal.Open(predictions)
rows, cols = pred_ds.RasterYSize, pred_ds.RasterXSize
tiles = 10

row_start = [i for i in range(0, rows, math.floor(rows/tiles))]
row_end = [i for i in range (math.floor(rows/tiles), rows, math.floor(rows/tiles))]
row_start = row_start[:len(row_end)] 

col_start = [i for i in range(0, cols, math.floor(cols/tiles))]
col_end = [i for i in range (math.floor(cols/tiles), cols, math.floor(cols/tiles))]
col_start = col_start[:len(col_end)] 

# read in vrt in tiles
for i in [6]:# range(len(row_end)):

    print(f'Starting on {i+1}. row from {len(row_end)} rows')
    
    for j in [3]:# range(len(col_end)):
        
        print(f'Starting on {j+1}.column from {len(col_end)} columns')
        # make identifier for tile for csv
        tile = f'{str(i)}_{str(j)}'
        #s ubset the prediction of fields read-in
        extent_pred = pred_ds.GetRasterBand(1).ReadAsArray(col_start[j], row_start[i], col_end[j] - col_start[j], row_end[i] - row_start[i]) # goes into InstSegm --> image of crop probability
        # check if prediction subset of fields actually contains data
        if len(np.unique(extent_pred)) == 1:
            continue
        # load predicted boundary prob subset
        boundary_pred = pred_ds.GetRasterBand(2).ReadAsArray(col_start[j], row_start[i], col_end[j] - col_start[j], row_end[i] - row_start[i]) # goes into InstSegm --> image of boundary probability
        # load IACS reference
        ref_ds = gdal.Open(reference)
        extent_true = ref_ds.GetRasterBand(1).ReadAsArray(col_start[j], row_start[i], col_end[j] - col_start[j], row_end[i] - row_start[i]) # goes into InstSegm --> image of crop probability
        
        # set the parameter combinations and test combinations
        t_exts = [i/100 for i in range(10,55,5)] 
        t_bounds = [i/100 for i in range(10,55,5)]

        for t_ext in t_exts:
            for t_bound in t_bounds:
                print('thresholds: ' + str(t_ext) + ', ' +str(t_bound))
                #IoUs = []

                # reference = gdal.Open(refs[i]).ReadAsArray()
                # extent_true = np.squeeze(reference[0])

                # prediction = gdal.Open(prds[i]).ReadAsArray()
                # extent_pred = np.squeeze(prediction[0]) # goes into InstSegm --> image of crop probability
                # boundary_pred = np.squeeze(prediction[1]) # goes into InstSegm --> image of boundary probability

                img_IoUs, field_IDs, field_sizes = get_IoUs(extent_true, extent_pred, boundary_pred, t_ext=t_ext, t_bound=t_bound,
                                    sample_size=0.01 ,border_limit=10)
                #img_IoUs, _ = get_IoUs_scores(extent_true, extent_pred, boundary_pred, t_ext=t_ext, t_bound=t_bound, t_semc=t_semc)
                #IoUs = IoUs + img_IoUs
                for e, IoUs in enumerate(img_IoUs):
                    res['tile'].append(tile)
                    res['t_ext'].append(t_ext)
                    res['t_bound'].append(t_bound)
                    res['IoU'].append(IoUs)
                    res['field_IDs'].append(field_IDs[e])
                    res['field_sizes'].append(field_sizes[e])
                # res['medianIoU'].append(np.median(IoUs))
                # res['meanIoU'].append(np.mean(IoUs))
                # res['IoU_50'].append(np.sum(np.array(IoUs) > 0.5) / len(IoUs))
                # res['IoU_80'].append(np.sum(np.array(IoUs) > 0.8) / len(IoUs))
                # mIoUs.append(np.median(IoUs))
                # mnIoUs.append(np.mean(IoUs))
                # IoU_50s.append(np.sum(np.array(IoUs) > 0.5) / len(IoUs))
                # IoU_80s.append(np.sum(np.array(IoUs) > 0.8) / len(IoUs))

# # hp_df = pd.DataFrame({
# #     't_ext': np.repeat(t_exts, len(t_bounds)),
# #     't_bound': np.tile(t_bounds, len(t_exts)),
# #     'medianIoU': mIoUs,
# #     'meanIoU': mnIoUs,
# #     'IoU_50': IoU_50s,
# #     'IoU_80': IoU_80s
# # })
# # hp_df.to_csv(os.path.join(results_dir, 'IoU_hyperparameter_tuning_full.csv'), index=False)
df  = pd.DataFrame(data = res)
df.to_csv(result_dir + '_IoU_hyperparameter_tuning_full.csv', index=False)

Starting on 7. row from 10 rows
Starting on 4.column from 10 columns
thresholds: 0.1, 0.1
thresholds: 0.1, 0.15
thresholds: 0.1, 0.2
thresholds: 0.1, 0.25
thresholds: 0.1, 0.3
thresholds: 0.1, 0.35
thresholds: 0.1, 0.4
thresholds: 0.1, 0.45
thresholds: 0.1, 0.5
thresholds: 0.15, 0.1
thresholds: 0.15, 0.15
thresholds: 0.15, 0.2
thresholds: 0.15, 0.25
thresholds: 0.15, 0.3
thresholds: 0.15, 0.35
thresholds: 0.15, 0.4
thresholds: 0.15, 0.45
thresholds: 0.15, 0.5
thresholds: 0.2, 0.1
thresholds: 0.2, 0.15
thresholds: 0.2, 0.2
thresholds: 0.2, 0.25
thresholds: 0.2, 0.3
thresholds: 0.2, 0.35
thresholds: 0.2, 0.4
thresholds: 0.2, 0.45
thresholds: 0.2, 0.5
thresholds: 0.25, 0.1
thresholds: 0.25, 0.15
thresholds: 0.25, 0.2
thresholds: 0.25, 0.25
thresholds: 0.25, 0.3
thresholds: 0.25, 0.35
thresholds: 0.25, 0.4
thresholds: 0.25, 0.45
thresholds: 0.25, 0.5
thresholds: 0.3, 0.1
thresholds: 0.3, 0.15
thresholds: 0.3, 0.2
thresholds: 0.3, 0.25
thresholds: 0.3, 0.3
thresholds: 0.3, 0.35
thresholds: 

In [None]:
def TooCloseToBorder(numbered_array, border_limit):
    rows, cols = np.where(numbered_array==True)
    r,c = this_field.shape
    if any(value < border_limit for value in [np.min(rows), r - np.max(rows), np.min(cols), c - np.max(cols)]):
        return True
    

113

buh
gfgdf
buh
gfgdf
buh
gfgdf
buh
gfgdf
buh
gfgdf
