# Utility functionality for datasets

### References : TODO

### Imports

In [1]:
import os
import cv2
import numpy as np
import sys

## Functions

In [9]:
# Recursively search a directory, applying a given function to each file
def rec_dir(dir, rel_path, datatype, dir_out, f, fparams):
    current_dir = os.path.join(dir, rel_path)

    for fname in os.listdir(current_dir):
        fpath = os.path.join(current_dir, fname)

        if os.path.isfile(fpath):
            f(dir, rel_path, fpath, datatype, dir_out, fparams)
        else:
            rec_dir(dir, os.path.join(rel_path, fname), datatype, dir_out, f, fparams)

In [10]:
# Thresholding util function
def threshold_val(value, inclusive_upper, threshold):
    if inclusive_upper:
        if value >= threshold:
            return max
        else:
            return min
    else:
        if value > threshold:
            return max
        else:
            return min

# Check if a file is a binary change map
def isBinaryFile(dir, rel_path, fpath, dataset):
    if dataset == 'AirChange':
        dtype = os.path.basename(fpath).split('.')[0]
        return dtype == 'gt'
    else:
        # TODO adjust this to other datasets
        sys.exit('Invalid dataset selected')

In [24]:
# downsample image and threshold binary maps
def downsampleAndThreshold(dir, rel_path, fpath, datatype, dir_out, params):
    ftype = os.path.splitext(fpath)[-1].lower()

    # Create a resized file, if it is of the correct type
    if ftype == '.' + datatype:
        dir_file_out = os.path.join(dir_out, rel_path)
        if not os.path.exists(dir_file_out):
            os.makedirs(dir_file_out)
        
        image = cv2.imread(fpath)
        
        if isBinaryFile(dir, rel_path, fpath, params['dataset']):
            img_max = np.maximum.reduce(image, 2)
            img_min = np.minimum.reduce(image, 2)

            # Safety check
            if not np.all(img_max == img_min):
                sys.exit('Invalid binary result file')

            # 'Reduce' the image
            image = img_max

        # Resize the image
        size_1 = int(image.shape[0] * 1 / params['downsample_factor'])
        size_2 = int(image.shape[1] * 1 / params['downsample_factor'])
        resized = cv2.resize(image, dsize=(size_2, size_1), interpolation=params['interpolation_alg'])

        # Apply threshold
        if isBinaryFile(dir, rel_path, fpath, params['dataset']):
            with np.nditer(resized, op_flags=['readwrite']) as iterator:
                for value in iterator:
                    value[...] = threshold_val(value, params['inclusive_upper'], params['threshold'])

        # Write to file
        cv2.imwrite(os.path.join(dir_file_out, os.path.basename(fpath)), resized)

## Downsample and threshold

In [25]:
# ------ Settings ------
# => These variables should be set to the desired settings!

# Thresholding binary maps
min = 0     # Minimum value of a band in one pixel
max = 255   # Maximum value of a band in one pixel
threshold = max / 2     # Threshold for setting a pixel to be "true" (i.e., change)
inclusive_upper = True  # Whether a value equal to the threshold value should be set to "true" (i.e., change)

# Downsampling
interpolation_alg = cv2.INTER_AREA  # The interpolation algorithm for downsampling
downsample_factor = 8   # The inverse factor of downsampling (e.g., 2 results in 1/2 of the size)

# Dataset and directories
dir_in = '' # The directory of the dataset
dir_out = '' # The directory to save the modified dataset to
add_size_info = True # Whether to add the downsampling info to the directory name (form of "sample_facor_" + factor)
datatype = 'bmp' # The image datatype
dataset = 'AirChange' # The dataset name, selected from: AirChange

In [26]:
fparams = {
    'downsample_factor' : downsample_factor,
    'interpolation_alg' : interpolation_alg,
    'threshold' : threshold,
    'inclusive_upper': inclusive_upper,
    'add_size_info' : add_size_info,
    'dataset' : dataset
}

rec_dir(dir_in, '', datatype, dir_out, downsampleAndThreshold, fparams)