In [1]:
from sklearn.linear_model import LinearRegression
from skimage import measure
from joblib import Parallel, delayed

import sys
sys.path.append('/home/potzschf/repos/')
from helperToolz.helpsters import *


#########################  parameter settings
# load the predictions and labels
predictions =  '/data/fields/output/predictions/FORCE/BRANDENBURG/vrt/256_20_chips.vrt' # predictions straight from GPU 
reference =  '/data/fields/IACS/4_Crop_mask/GSA-DE_BRB-2019_cropMask_lines_touch_false_lines_touch_false_linecrop.tif' # mask from IACS
result_dir = '/data/fields/Auxiliary/grid_search/Brandenburg/' + predictions.split('/')[-1].split('.')[0] + '_' + reference.split('/')[-1].split('.')[0]
os.makedirs(result_dir, exist_ok=True)
# set the number by which rows and cols will be divided --> determines the number of tiles // also set border limit (dont sample fields too close to tile borders) and sample size
slicer = 10
border_limit = 10
sample_size  = 10000
# set the number of cores for parallel processing and set seed
ncores = 15
np.random.seed(42)
make_tifs_from_intermediate_step = False
######### prepare job-list


# create lists that will be passed on to the joblist
tile_list = []
extent_true_list = []
extent_pred_list = []
boundary_pred_list = []
result_dir_list = []
row_col_start = []


# tile predictions in prds --> total extent encompasses 90 Force Tiles (+ a few rows and cols that will be neglected as they are outside of study area)
pred_ds = gdal.Open(predictions)
rows, cols = pred_ds.RasterYSize, pred_ds.RasterXSize

row_start = [i for i in range(0, rows, math.floor(rows/slicer))]
row_end = [i for i in range (math.floor(rows/slicer), rows, math.floor(rows/slicer))]
row_start = row_start[:len(row_end)] 

col_start = [i for i in range(0, cols, math.floor(cols/slicer))]
col_end = [i for i in range (math.floor(cols/slicer), cols, math.floor(cols/slicer))]
col_start = col_start[:len(col_end)] 

In [2]:
# load IACS reference mask and label it 
ref_ds = gdal.Open(reference)
extent_true = ref_ds.GetRasterBand(1).ReadAsArray() 
binary_true = extent_true > 0
instances_true = measure.label(binary_true, background=0, connectivity=1)

# sample fields
# build a mask to exclude fields that are in border_limit to tile borders
power_mask = np.zeros(instances_true.shape)
for i in range(len(row_end)):
    for j in range(len(col_end)):
            power_mask[row_start[i]:row_start[i] + border_limit, :] = 1
            power_mask[:, col_start[j]:col_start[j] + border_limit] = 1
            power_mask[row_end[-1] - border_limit:power_mask.shape[0], :] = 1
            power_mask[:, col_end[-1] - border_limit:power_mask.shape[1]] = 1

# makeTif_np_to_matching_tif(power_mask, reference, result_dir, 'powermask.tif',0)
# get IDs from labelled reference
IDs_to_skip = np.unique(instances_true[power_mask==1])

# get distribution of field sizes after segmentation
unique_IDs, counts = np.unique(instances_true, return_counts=True)

# exlcude fields that are too close to tile borders
mask = ~np.isin(unique_IDs, IDs_to_skip)
unique_IDs = unique_IDs[mask]
counts = counts[mask]

if make_tifs_from_intermediate_step:
    # Create filtered array with only valid IDs preserved for export
    filtered_instances = np.where(np.isin(instances_true, unique_IDs), instances_true, 0)
    makeTif_np_to_matching_tif(filtered_instances, reference, '/data/fields/output/predictions/FORCE/BRANDENBURG/auxiliary/256_20_chips_border_cut.tif')
# exlude 0 (background) and 1 (super-small fields) from sample
mask = (unique_IDs != 0) & (counts > 3)
unique_IDs = unique_IDs[mask]
counts = counts[mask]

# get deciles and draw equally from them
deciles = [perc for perc in range(10,100,10)]
deciles_values = np.percentile(counts, deciles)
decs = [0] + deciles_values.tolist() + [np.max(counts)]
bin_ids = []
for ind in range(len(decs) -1):
    # get the unique_IDS of those fields, whose count (size) is within bin
    bin_ids.append(np.random.choice(unique_IDs[(counts > decs[ind]) & (counts <= decs[ind + 1])], int(sample_size/10), replace=False))

mask = np.isin(instances_true, np.concatenate(bin_ids))
# set everything to 0 except samples
instances_true[~mask] = 0

if make_tifs_from_intermediate_step:
    # Create filtered array with only valid IDs preserved for export
    filtered_instances = np.where(np.isin(instances_true, unique_IDs), instances_true, 0)
    makeTif_np_to_matching_tif(filtered_instances, reference, '/data/fields/output/predictions/FORCE/BRANDENBURG/auxiliary/256_20_chips_valid_IDs.tif', 0)


In [None]:
# read in vrt in tiles
for i in range(len(row_end)):
    for j in range(len(col_end)):
        
        ######### fill the lists with tiled data

        
        #subset the prediction of fields read-in
        extent_pred = pred_ds.GetRasterBand(1).ReadAsArray(col_start[j], row_start[i], col_end[j] - col_start[j], row_end[i] - row_start[i]) # goes into InstSegm --> image of crop probability
        # check if prediction subset of fields actually contains data
        if len(np.unique(extent_pred)) == 1:
            continue
        # check if tile contains a sample of reference/label data
        extent_true = instances_true[row_start[i]:row_end[i], col_start[j]:col_end[j]]
        if len(np.unique(extent_true)) == 1:
            continue
        
        extent_true_list.append(extent_true)
        extent_pred_list.append(extent_pred)

        # make identifier for tile for csv
        tile_list.append(f'{str(i)}_{str(j)}')
        # load predicted boundary prob subset // goes into InstSegm --> image of boundary probability
        boundary_pred_list.append(pred_ds.GetRasterBand(2).ReadAsArray(col_start[j], row_start[i], col_end[j] - col_start[j], row_end[i] - row_start[i])) 
        # output folder
        result_dir_list.append(result_dir)
        row_col_start.append(str(row_start[i]) + '_' + str(col_start[j]))

jobs = [[tile_list[i], row_col_start[i] ,extent_true_list[i], extent_pred_list[i], boundary_pred_list[i], result_dir_list[i], border_limit]  for i in range(len(result_dir_list))]

del tile_list, row_col_start, extent_true_list, extent_pred_list, boundary_pred_list, result_dir_list, border_limit

In [4]:
ind = 0

row_col_start = jobs[ind][1]
extent_true = jobs[ind][2]
extent_pred = jobs[ind][3]
boundary_pred = jobs[ind][4]
t_ext=0.2
t_bound=0.2
border_limit=border_limit = jobs[ind][6]

In [None]:
row_start = int(row_col_start.split('_')[0])
col_start = int(row_col_start.split('_')[1])
# get predicted instance segmentation
instances_pred = InstSegm(extent_pred, boundary_pred, t_ext=t_ext, t_bound=t_bound)
instances_pred = measure.label(instances_pred, background=-1) 

def export_intermediate_products(intermediate_aray, dummy_ds ,folder_out, filename):
    '''
    intermediate_aray: array to be exported
    dummy_ds: a gdal.Open object that contains desired geoinformation
    folder_out: path to FOLDER, where intermediate product will be stored
    '''
    if not folder_out.endswith('/'):
        folder_out = folder_out + '/'
    geoTF = dummy_ds.GetGeoTransform()
    out_ds = gdal.GetDriverByName('GTiff').Create(f'{folder_out}{filename}', 
                                                instances_pred.shape[1], instances_pred.shape[0], 1, gdal.GDT_Int32)
    # change the Geotransform for each chip
    geotf = list(geoTF)
    # get column and rows from filenames
    geotf[0] = geotf[0] + geotf[1] * col_start
    geotf[3] = geotf[3] + geotf[5] * row_start
    #print(f'X:{geoTF[0]}  Y:{geoTF[3]}  AT {file}')
    out_ds.SetGeoTransform(tuple(geotf))
    out_ds.SetProjection(pred_ds.GetProjection())
                
    out_ds.GetRasterBand(1).WriteArray(intermediate_aray)

    del out_ds

 intersecting_fields



4294967295

In [None]:
# get instances from ground truth label
# binary_true = extent_true > 0
# instances_true = measure.label(binary_true, background=0, connectivity=1)
instances_true = extent_true

# loop through true fields
field_values = np.unique(instances_true)

best_IoUs = []
field_IDs = []
field_sizes = []
centroid_rows = []
centroid_cols = []
centroid_IoUS = []
intersect_rows = []
intersect_cols = []
temp_rows = []
temp_cols = []

In [20]:
# for field_value in field_values:
#     if field_value == 0:
#         continue # move on to next value

field_value = field_values[1]
this_field = instances_true == field_value
# # check if field is close to border and throw away if too close
# if TooCloseToBorder(this_field, border_limit):
#     continue

# calculate centroid
this_field_centroid = np.mean(np.column_stack(np.where(this_field)),axis=0).astype(int)

# fill lists with info
centroid_rows.append(this_field_centroid[0])
centroid_cols.append(this_field_centroid[1])
field_IDs.append(field_value)
field_sizes.append(np.sum(this_field))

# find predicted fields that intersect with true field
intersecting_fields = this_field * instances_pred
intersect_values = np.unique(intersecting_fields)

# compute IoU for each intersecting field
field_IoUs = []
center_IoU = 0
for intersect_value in intersect_values:
    if intersect_value == 0:
        continue # move on to next value
    
    pred_field = instances_pred == intersect_value
    r, c = np.where(pred_field == True)
    temp_rows.append(r + row_start)
    temp_cols.append(c + col_start)
    union = this_field + pred_field > 0
    intersection = (this_field * pred_field) > 0
    IoU = np.sum(intersection) / np.sum(union)
    field_IoUs.append(IoU)
    # check for centroid condition
    if instances_pred[this_field_centroid[0], this_field_centroid[1]] == intersect_value:
        center_IoU = IoU

# # take maximum IoU - this is the IoU for this true field
# if len(field_IoUs) != 0:
#     best_IoUs.append(np.max(field_IoUs))
#     # fill centroid list
#     centroid_IoUS.append(center_IoU)
#     max_index = np.argmax(field_IoUs)
#     intersect_rows.append(temp_rows[max_index])
#     intersect_cols.append(temp_cols[max_index])
#     temp_rows = []
#     temp_cols = []
# else:
#     best_IoUs.append(0)
#     # fill centroid list
#     centroid_IoUS.append(0)
#     intersect_rows.append(0)
#     intersect_cols.append(0)
#     temp_rows = []
#     temp_cols = []

In [22]:
r

array([1760, 1760, 1760, ..., 1910, 1911, 1911])

In [14]:
intersecting_fields.shape

(2690, 2997)