## Set up directories

In [178]:
import os

labelbox_dir = "./data/labelbox"
labelbox_clouds_dir = f'{labelbox_dir}/clouds'
labelbox_clouds_rgb_dir = f'{labelbox_clouds_dir}/rgb'
labelbox_clouds_labels_dir = f'{labelbox_clouds_dir}/labels'
labelbox_clouds_source_dir = f'{labelbox_clouds_dir}/source'

labelbox_clouds_temp_dir = f'{labelbox_clouds_dir}/temp'

os.makedirs(labelbox_clouds_labels_dir, exist_ok=True)
os.makedirs(labelbox_clouds_rgb_dir, exist_ok=True)
os.makedirs(labelbox_clouds_source_dir, exist_ok=True)
os.makedirs(labelbox_clouds_temp_dir, exist_ok=True)

state_path = "./data/labelbox/clouds/uploadState"


## Create chips from bounding box and dates

In [172]:
%reload_ext autoreload
%autoreload 2

import multiprocessing
import numpy as np
from patchify import patchify
import rasterio
import rasterio.transform

from common.constants import NODATA_BYTE, NODATA_FLOAT32, RES, S2_BANDS_TIFF_ORDER
import common.utilities.download as download
import common.utilities.imagery as imagery

PATCH_SIZE = 512


def create_chips_from_bbox(bbox, start_date, end_date):
    
    collection_path = f'{labelbox_clouds_temp_dir}/collection.json'
    collection = download.get_collection(start_date, end_date, bbox, collection_path, max_cloud_cover=100, max_tile_count=1, min_tile_count=1)
    original_scenes = download.download_collection(collection, bbox, S2_BANDS_TIFF_ORDER, labelbox_clouds_temp_dir, RES)

    for scene in original_scenes:
        print(f'\tpatchifying... {scene}')
        
        stack_path = original_scenes[scene]["stack_original_tif_path"]
        with rasterio.open(stack_path) as src:
            if src.width < 512 or src.height < 512:
                print(f'\t\tskipping... {scene}')
                continue

        bbox_str = ''.join([str(round(coord, 2)) for coord in bbox]).replace('.', '')
            
        with rasterio.open(stack_path) as src:
            stack_data = src.read().transpose((1, 2, 0))
            transform = src.transform

            source_patches = patchify(stack_data, (PATCH_SIZE, PATCH_SIZE, stack_data.shape[2]), step=PATCH_SIZE)
            
            for irow in range(source_patches.shape[0]):
                for icol in range(source_patches.shape[1]):
                    source_data = source_patches[irow, icol, 0, :, :, :]
                    
                    rgb_data = source_data[:, :, [2, 1, 0]]
                    rgb_data_norm = (rgb_data * 254).astype(np.uint8)
                    rgb_data_norm[rgb_data_norm > 254] = 254
        
                    rgb_path = f'{labelbox_clouds_rgb_dir}/{scene}_{bbox_str}_{irow}_{icol}.tif'
                    source_path = f'{labelbox_clouds_source_dir}/{scene}_{bbox_str}_{irow}_{icol}.tif'
                    
                    x_min, y_min = rasterio.transform.xy(transform, PATCH_SIZE*(irow+1), PATCH_SIZE*icol)
                    x_max, y_max = rasterio.transform.xy(transform, PATCH_SIZE*irow, PATCH_SIZE*(icol+1))             
                    chip_bbox = [x_min, y_min, x_max, y_max]
                                        
                    imagery.write_array_to_tif(source_data, source_path, chip_bbox, dtype=np.float32, nodata=NODATA_FLOAT32)
                    imagery.write_array_to_tif(rgb_data_norm, rgb_path, chip_bbox, dtype=np.uint8, nodata=NODATA_BYTE, is_cog=True)                        
                    

In [173]:
import common.constants as constants
import common.aws.s3 as s3_utils


def save_rgb_chip_to_s3(rgb_path):
    file_name = rgb_path.split('/')[-1]    
    object_key = f'training/clouds/{file_name}'
    href = f'https://data.smartcarte.earth/{object_key}'
    s3_utils.put_item(rgb_path, constants.S3_DATA_BUCKET, object_key)
    return href


In [174]:
import datetime
from labelbox import Client, Dataset, DataRow
import glob
import os
import shelve


try:
    from common.sagemaker_env import LABELBOX_API_KEY
except: 
    LABELBOX_API_KEY = os.environ['LABELBOX_API_KEY']
    
CLOUD_PROJECT_ID = "cleamnf3q398707ug5s2z4rp6"


client = Client(api_key=LABELBOX_API_KEY)
project = client.get_project(CLOUD_PROJECT_ID)


def create_labelbox_dataset(prefix=""):
    
    today = datetime.datetime.today().strftime('%Y%m%d_%H%M')
    clouds_dataset_name = f"Clouds {prefix} {today}"
    
    clouds_dataset = client.get_datasets(where=(Dataset.name==clouds_dataset_name)).get_one()
    if clouds_dataset is not None:
        raise ValueError("cloud dataset already exists; wait a minute")        

    rgb_paths = glob.glob(f'{labelbox_clouds_rgb_dir}/*[0-9].tif')
    print(f'{len(rgb_paths)} total chips')
        
    with shelve.open(state_path) as upload_state:
        
        payload = []
        for rgb_path in rgb_paths:
            chip_id = rgb_path.split('/')[-1].replace('.tif', '')
            chip_state = upload_state.get(chip_id)
            
            if chip_state and chip_state.get('uploaded_to_labelbox', False):
                continue
            
            s3_href = save_rgb_chip_to_s3(rgb_path)
            
            payload.append({
                "chip_id": chip_id,
                "min_zoom": 10,
                "max_zoom": 14,
                "tile_layer_url": s3_href
            })

            upload_state[chip_id] = {
                'rgb_cog_href': s3_href,
                'uploaded_to_s3': True,
                'uploaded_to_labelbox': False
            }
                    
        if len(payload) > 0:
            print(f'{len(payload)} chips to add to Labelbox')

            clouds_dataset = client.create_dataset(name=clouds_dataset_name)
            datarow_payload = [{DataRow.row_data: row} for row in payload]
            task = clouds_dataset.create_data_rows(datarow_payload)
            task.wait_till_done()

            for row in payload:
                chip_id = row["chip_id"]
                upload_state[chip_id] = {
                    'rgb_cog_href': s3_href,
                    'uploaded_to_s3': True,
                    'uploaded_to_labelbox': True
                }
            

# Create new chips 🐿️🐿️🐿️

In [175]:

def create_new_chips(bbox, region_name, dates):
    for date in dates:
        start_date, end_date = date[0], date[1]
        create_chips_from_bbox(bbox, start_date, end_date)
        
    create_labelbox_dataset(prefix=region_name)
    

In [169]:
import datetime as dt

dates = [
    (dt.datetime(2019, 1, 1), dt.datetime(2019, 2, 1)),
    (dt.datetime(2020, 3, 1), dt.datetime(2020, 4, 1)),
    (dt.datetime(2021, 6, 1), dt.datetime(2021, 7, 1)),
    (dt.datetime(2022, 9, 1), dt.datetime(2022, 10, 1)),
]


## Boma National Park

In [179]:
import datetime as dt

bbox_boma = [33.884583, 6.180003, 34.083033, 6.344253]
create_new_chips(bbox_boma, "Boma", dates)

print('done')

./data/labelbox/clouds/temp/collection.json: {'WM': 1, 'XM': 1, 'XN': 1, 'WN': 1}
	downloading... S2A_36NWM_20190105_0_L2A
	downloading... S2A_36NXM_20190115_0_L2A
	downloading... S2A_36NXN_20190115_0_L2A
	downloading... S2A_36NWN_20190115_0_L2A
	patchifying... S2A_36NWM_20190105_0_L2A
	patchifying... S2A_36NXM_20190115_0_L2A
	patchifying... S2A_36NXN_20190115_0_L2A
	patchifying... S2A_36NWN_20190115_0_L2A
./data/labelbox/clouds/temp/collection.json: {'WN': 1, 'WM': 1, 'XN': 1, 'XM': 1}
	downloading... S2B_36NWN_20200325_0_L2A
	downloading... S2B_36NWM_20200325_0_L2A
	downloading... S2A_36NXN_20200310_0_L2A
	downloading... S2B_36NXM_20200325_0_L2A
	patchifying... S2B_36NWN_20200325_0_L2A
	patchifying... S2B_36NWM_20200325_0_L2A
	patchifying... S2A_36NXN_20200310_0_L2A
	patchifying... S2B_36NXM_20200325_0_L2A
./data/labelbox/clouds/temp/collection.json: {'WM': 1, 'WN': 1, 'XN': 1, 'XM': 1}
	downloading... S2B_36NWM_20210608_0_L2A
	downloading... S2B_36NWN_20210608_0_L2A


KeyboardInterrupt: 

## Virunga National Park (Gorilla Sector)

In [146]:
import datetime as dt

bbox_virunga_gorilla_sector = [ 29.397261, -1.464377, 29.497164, -1.392410]
create_new_chips(bbox_virunga_gorilla_sector, "Virunga Gorilla Sector", dates)

print('done')

./data/labelbox/clouds/temp/collection.json: {'QU': 1}
	downloading... S2A_35MQU_20200123_0_L2A
	patchifying... S2A_35MQU_20200123_0_L2A
	34 total chips
	6 chips to add to Labelbox
done
