In [1]:
import os
from concurrent.futures import ProcessPoolExecutor
from itertools import repeat
from tqdm import tqdm
import warnings
import multiprocessing
from glob import glob
from utils import reclassify_month, dilate_image, preprocess_s1, preprocess_s2, stratify_data, calcuate_mean_std

warnings.simplefilter("ignore")

## Pre-processing 

* *Sentinel-1* and *Sentinel-2* imagery were preprocessed into 6 cloud-free median composites to reduce data dimensionality while preserving the maximum amount of information. 
* I tested multiple median image compositing strategies and incorporation of vegetation indices for Sentinel-2 and VV/VH ratio for Sentinel-1 imagery.

### Image compositing strategies:
2S (two seasons): 
* Sentinel-2 (20 channels): cloud-masked median for (May, Jun, Jul, Aug, Sep, Oct) and (Nov, Dec, Jan, Feb, Mar, Apr)
* Sentinel-1 (8 channels): median for (May, Jun, Jul, Aug, Sep, Oct) and (Nov, Dec, Jan, Feb, Mar, Apr)

2SI (two seasons + indices):
* Sentinel-2 (38 channels): cloud-masked median for (May, Jun, Jul, Aug, Sep, Oct) and (Nov, Dec, Jan, Feb, Mar, Apr) + 9 vegetation indices per season
* Sentinel-1 (12 channels): median for (May, Jun, Jul, Aug, Sep, Oct) and (Nov, Dec, Jan, Feb, Mar, Apr) + 2 VV/VH ratios per season

3S (three seasons):
* Sentinel-2 (30 channels): cloud-masked median for (Sep, Oct, Nov, Dec), (Jan, Feb, Mar, Apr), (May, Jun, Jul, Aug)
* Sentinel-1 (12 channels): median for (Sep, Oct, Nov, Dec), (Jan, Feb, Mar, Apr), (May, Jun, Jul, Aug)

4S (four seasons):
* Sentinel-2 (40 channels): cloud-masked median for (Sep, Oct, Nov), (Dec, Jan, Feb), (Mar, Apr, May), and (Jun, Jul, Aug)
* Sentinel-1 (16 channels): median for (Sep, Oct, Nov), (Dec, Jan, Feb), (Mar, Apr, May), and (Jun, Jul, Aug)

4SI (four seasons + indices):
* Sentinel-2 (48 channels): cloud-masked median for (Sep, Oct, Nov), (Dec, Jan, Feb), (Mar, Apr, May), and (Jun, Jul, Aug) + 2 vegetation indices per season 
* Sentinel-1 (24 channels): median for (Sep, Oct, Nov), (Dec, Jan, Feb), (Mar, Apr, May), and (Jun, Jul, Aug) + 1 VV/VH ratio per season

6S (six seasons):
* Sentinel-2 (60 channels): cloud-masked median for (Sep, Oct), (Nov, Dec), (Jan, Feb), (Mar, Apr), (May, Jun), (Jul, Aug)
* Sentinel-1 (24 channels): median for (Sep, Oct), (Nov, Dec), (Jan, Feb), (Mar, Apr), (May, Jun), (Jul, Aug)

In [2]:
root_dir = os.getcwd() # Change to the root folder where you downloaded raw data


In [3]:

train_img_dir = f"{root_dir}/train_features"
test_img_dir = f"{root_dir}/test_features"

CORES = multiprocessing.cpu_count() // 2

S1_S2_TRAIN_TIFs = glob(f"{train_img_dir}/*.tif")
uIDs_train = sorted(set([os.path.basename(name).split('_')[0] for name in S1_S2_TRAIN_TIFs]))

S1_S2_TEST_TIFs = glob(f"{test_img_dir}/*.tif")
uIDs_test = sorted(set([os.path.basename(name).split('_')[0] for name in S1_S2_TEST_TIFs]))

Here we simply iterate through different compositing strategies to generate training data using <code>multiprocessing</code>

In [4]:
if __name__ == '__main__':


    for SUFFIX in  ['2S', '2SI', '3S', '4S', '4SI', '6S']:

        train_img_dir_s1 = f"{root_dir}/train_features_s1_{SUFFIX}"
        if not os.path.exists(train_img_dir_s1):
            os.mkdir(train_img_dir_s1)
        with ProcessPoolExecutor(CORES) as pool:
            print(f'Pre-processing {train_img_dir_s1} data')
            result = list(tqdm(pool.map(preprocess_s1, uIDs_train, repeat(train_img_dir), repeat(train_img_dir_s1), 
                                        repeat(SUFFIX)), total=len(uIDs_train)))
        try:
            result
        except Exception:
            raise Exception(f"preprocess_s1() on {train_img_dir_s1} failed")
       

        train_img_dir_s2 = f"{root_dir}/train_features_s2_{SUFFIX}"
        if not os.path.exists(train_img_dir_s2):
            os.mkdir(train_img_dir_s2)
        with ProcessPoolExecutor(CORES) as pool:
            print(f'Pre-processing {train_img_dir_s2} data')
            result = list(tqdm(pool.map(preprocess_s2, uIDs_train, repeat(train_img_dir), repeat(train_img_dir_s2), 
                                        repeat(SUFFIX)), total=len(uIDs_train)))
        try:
            result
        except Exception:
            raise Exception(f"preprocess_s2() on {train_img_dir_s2} failed")
        
        
        test_img_dir_s1 = f"{root_dir}/test_features_s1_{SUFFIX}"
        if not os.path.exists(test_img_dir_s1):
            os.mkdir(test_img_dir_s1)
        with ProcessPoolExecutor(CORES) as pool:
            print(f'Pre-processing {test_img_dir_s1} data')
            result = list(tqdm(pool.map(preprocess_s1, uIDs_test, repeat(test_img_dir), repeat(test_img_dir_s1), 
                                        repeat(SUFFIX)), total=len(uIDs_test)))
        try:
            result
        except Exception:
            raise Exception(f"preprocess_s1() on {test_img_dir_s1} failed")
        
        
        test_img_dir_s2 = f"{root_dir}/test_features_s2_{SUFFIX}"
        if not os.path.exists(test_img_dir_s2):
            os.mkdir(test_img_dir_s2)
        with ProcessPoolExecutor(CORES) as pool:
            print(f'Pre-processing {test_img_dir_s2} data')
            result = list(tqdm(pool.map(preprocess_s2, uIDs_test, repeat(test_img_dir), repeat(test_img_dir_s2), 
                                        repeat(SUFFIX)), total=len(uIDs_test)))
        try:
            result
        except Exception:
            raise Exception(f"preprocess_s2() on {test_img_dir_s2} failed")


Pre-processing /home/ubuntu/the-biomassters/train_features_s1_2S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/train_features_s2_2S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/test_features_s1_2S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/test_features_s2_2S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/train_features_s1_2SI data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/train_features_s2_2SI data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/test_features_s1_2SI data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/test_features_s2_2SI data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/train_features_s1_3S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/train_features_s2_3S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/test_features_s1_3S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/test_features_s2_3S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/train_features_s1_4S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/train_features_s2_4S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/test_features_s1_4S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/test_features_s2_4S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/train_features_s1_4SI data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/train_features_s2_4SI data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/test_features_s1_4SI data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/test_features_s2_4SI data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/train_features_s1_6S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/train_features_s2_6S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/test_features_s1_6S data


0it [00:00, ?it/s]


Pre-processing /home/ubuntu/the-biomassters/test_features_s2_6S data


0it [00:00, ?it/s]
