# Document for the retrieval of the correct Interquartile range for the dataset

In this document we provide the code to identify the correct interquartile range for both S1 SAR and DEM images downloaded by the dataset.   
Since the values can be calculated once for all, the codde has been saved in a notebook format, so to have an easy retrieval of the charts and the results.

In [1]:
import logging
from pathlib import Path

import numpy as np
from plotille import histogram
from tqdm import tqdm
from glob import glob
import rasterio
from rasterio.io import DatasetReader
from rasterio.windows import Window

In [2]:
def imread(path: Path, channels_first: bool = True) -> np.ndarray:
    """Wraps rasterio open functionality to read the numpy array and exit the context.

    Args:
        path (Path): path to the geoTIFF image
        channels_first (bool, optional): whether to return it channels first or not. Defaults to True.

    Returns:
        np.ndarray: image array
    """
    with rasterio.open(str(path), mode="r", driver="GTiff") as src:
        image = src.read()
    return image if channels_first else image.transpose(1, 2, 0)

def getStats(data: np.array) -> (float, float):
    print(histogram(data))
    return np.percentile(data, [25, 75])

def find_IQR(dataset_path: str):
    # get the images from the folder
    sar_files = glob(dataset_path + '/train/sar/*.tif')
    dem_files = glob(dataset_path + '/train/dem/*.tif')
    mask_files = glob(dataset_path + '/train/mask/*.tif')
    
    # Take only a sample 
    vv_list = np.zeros(shape=(1))
    vh_list = np.zeros(shape=(1))
    dem_list = np.zeros(shape=(1))
    
    assert len(sar_files) == len(dem_files), f'Number of files not matching, SAR: {len(sar_files)}, DEM: {len(dem_files)}'
    for i, sar_path in enumerate(tqdm(sar_files)):
        if(i % 100 == 0):
            sar = imread(Path(sar_path))
            dem = imread(Path(dem_files[i]))
            mask = imread(Path(mask_files[i]))

            valid = mask.squeeze(0) != 255
            dem = dem[:, valid]
            sar = sar[:, valid]

            vv_list = np.concatenate((vv_list, sar[0]), axis=0)
            vh_list = np.concatenate((vh_list, sar[1]), axis=0)
            dem_list = np.concatenate((dem_list, dem[0]), axis=0)
    
    print('Histogram VV:')
    print('0,1,2,5,50,95,98,99,100:')
    print(*getStats(vv_list))
    print('Histogram VH:')
    print('0,1,2,5,50,95,98,99,100:')
    print(*getStats(vh_list))
    print('Histogram DEM:')
    print('0,1,2,5,50,95,98,99,100:')
    print(*getStats(dem_list))
    return

In [3]:
find_IQR('/mnt/userdata/montello_data/shub/imgs_zoom11/ready-to-train')

100%|██████████| 6181/6181 [00:08<00:00, 737.28it/s]


Histogram VV:
0,1,2,5,50,95,98,99,100:
 (Counts)  ^
4063237.20 |
3961656.27 | ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
3860075.34 | ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
3758494.41 | ⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
3656913.48 | ⠀⠀⠀⠀⠀⠀⢠⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
3555332.55 | ⠀⠀⠀⠀⠀⠀⢸⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
3453751.62 | ⠀⠀⠀⠀⠀⠀⢸⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
3352170.69 | ⠀⠀⠀⠀⠀⠀⢸⡆⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
3250589.76 | ⠀⠀⠀⠀⠀⠀⢸⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
3149008.83 | ⠀⠀⠀⠀⠀⠀⢸⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀
3047427.90 | ⠀⠀⠀⠀⠀⠀⢸⡇⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀

In [11]:
import numpy as np

x = np.random.randn(4,3,5)
x

array([[[-0.4344677 , -1.095227  ,  0.42567934,  0.15096658,
          0.87492166],
        [-1.19999112,  0.81430589, -1.44150181, -0.45749366,
          2.15861198],
        [ 1.03527601, -0.63985133,  0.2483588 , -1.55654529,
         -1.06102868]],

       [[-0.59763519,  1.34258182,  0.14423194,  0.2861469 ,
          0.15337352],
        [-0.35800013,  0.44029719,  1.02040698,  1.02243335,
          2.14194299],
        [-2.28513943, -2.71614581,  0.0207223 , -0.5661338 ,
          1.20403061]],

       [[-0.62458829, -0.58938719, -0.08661486,  0.68891359,
          0.99117743],
        [-0.89520722,  0.14355694,  0.73345299, -0.75128514,
         -1.08590449],
        [-0.63247045,  1.0052778 ,  0.64927364,  1.14170523,
          0.05160039]],

       [[-0.59506675,  0.08508217, -0.9017707 ,  0.23207057,
         -0.60004209],
        [-1.32030128,  0.84076993,  0.31683703,  1.0721066 ,
          0.94464328],
        [ 0.74566789, -0.07517316,  0.4579626 ,  0.55514486,
         

In [12]:
np.moveaxis(x, 1, 0)

array([[[-0.4344677 , -1.095227  ,  0.42567934,  0.15096658,
          0.87492166],
        [-0.59763519,  1.34258182,  0.14423194,  0.2861469 ,
          0.15337352],
        [-0.62458829, -0.58938719, -0.08661486,  0.68891359,
          0.99117743],
        [-0.59506675,  0.08508217, -0.9017707 ,  0.23207057,
         -0.60004209]],

       [[-1.19999112,  0.81430589, -1.44150181, -0.45749366,
          2.15861198],
        [-0.35800013,  0.44029719,  1.02040698,  1.02243335,
          2.14194299],
        [-0.89520722,  0.14355694,  0.73345299, -0.75128514,
         -1.08590449],
        [-1.32030128,  0.84076993,  0.31683703,  1.0721066 ,
          0.94464328]],

       [[ 1.03527601, -0.63985133,  0.2483588 , -1.55654529,
         -1.06102868],
        [-2.28513943, -2.71614581,  0.0207223 , -0.5661338 ,
          1.20403061],
        [-0.63247045,  1.0052778 ,  0.64927364,  1.14170523,
          0.05160039],
        [ 0.74566789, -0.07517316,  0.4579626 ,  0.55514486,
         -0