# Cloudbutton Geospatial Use Case: Sentinel2 Satellital Image Processing

In [None]:
import sys
sys.path.append('../')

In [None]:
import fiona
import sentinelsat
import ipywidgets as widgets
import numpy as np
import os
import time
import lithops
import random
import shutil
import rasterio
import re
import tempfile
import zipfile
import subprocess
import glob
import multiprocessing as mp
import json
from rio_cogeo import cogeo

from collections import defaultdict
from datetime import date
from fiona.io import ZipMemoryFile
from matplotlib import pyplot as plt
from rasterio.io import MemoryFile
from zipfile import ZipFile
from ipyleaflet import Map, basemaps, basemap_to_tiles
from lithops import Storage

from cloudbutton_geospatial.utils import notebook as notebook_utils
from cloudbutton_geospatial.io_utils.ndvi import get_ndvi_params, ndvi_calculation, ndvi_tile_sentinel, get_subset_raster, lonlat_to_utm, get_poly_within
from cloudbutton_geospatial.io_utils.plot import tiff_overview, plot_map

os.environ['CURL_CA_BUNDLE'] = '/etc/ssl/certs/ca-certificates.crt'

Set the environmental variables *SENTINEL_USERNAME* and *SENTINEL_PASSWORD* to match your Sentinel-2 credentials. You can register and access data for free at https://sentinel.esa.int/web/sentinel/sentinel-data-access/registration:

In [None]:
SENTINEL_USERNAME = ''
SENTINEL_PASSWORD = ''
STORAGE_BACKEND = 'aws_s3'
BATCH_BACKEND = 'aws_batch'
BATCH_RUNTIME = 'cloudbutton-geospatial-sentinel:01'
FAAS_BACKEND = 'aws_lambda'
FAAS_RUNTIME = ''
STORAGE_BUCKET = 'cloudbutton-geospatial-sentinel'

In [None]:
%matplotlib inline

In [None]:
cloud_storage = Storage(backend=STORAGE_BACKEND)

## Input parameters

Select the date interval in which tiles will be processed:

In [None]:
from_day, to_day = notebook_utils.pick_date_range()

Select the tile's cloud percentage threshold:

In [None]:
percentage = notebook_utils.pick_percentage_slider()
from_day.value

Select the area which delimites the tiles you want to process (left click to mark a point in the map, right click to erase current selection):

In [None]:
map_region = notebook_utils.MapRegion(zoom=8)

## Get Sentinel-2 metadata

In [None]:
# locations = map_region.get_region()

# Tarragona data
locations = [[1.5481363013595886, 41.16508628430497],
 [0.8177319989996914, 40.62111912603713],
 [0.6008074129604647, 40.60652433834119],
 [0.4552757286556909, 40.868742532626996],
 [0.3811369460853299, 41.03883697553436],
 [0.427816920296289, 41.247740935856484],
 [0.694167361382423, 41.33441592882952],
 [1.097811844265526, 41.39831645175795],
 [1.452030472101722, 41.365343372983396],
 [1.5481363013595886, 41.16508628430497]]


# debug
#locations = [[-1.32110595703125, 37.57329031970199],
#   [-2.0681762695312504, 37.684227882053044],
#   [-1.636962890625, 38.24289903439589],
#   [-0.7745361328125, 38.12199840979802],
#   [-1.32110595703125, 37.57329031970199]]

print(locations)

In [None]:
geo_json_area = {
    "features": [
        {
            "geometry": {
                "coordinates":[locations],
                "type": "Polygon"
            },
            "properties": {},
            "type": "Feature"
        }
    ],
    "type": "FeatureCollection"
}

Using the selected parameters, get the identifiers of the selected tiles from Sentinel-2:

In [None]:
sentinel_api = sentinelsat.SentinelAPI(user=SENTINEL_USERNAME,
                                       password=SENTINEL_PASSWORD,
                                       show_progressbars=False)
footprint = sentinelsat.geojson_to_wkt(geo_json_area)
products = sentinel_api.query(footprint,
                              date=(from_day.value, to_day.value),
                              platformname='Sentinel-2',
                              producttype=('S2MS2Ap', 'S2MSI1C'),
                              cloudcoverpercentage=(0, percentage.value))
tiles_ids = []
products_ids = []

for product in list(products.keys()):
    product_id = products[product]['identifier']
    tile_id = products[product]['level1cpdiidentifier']
    tiles_ids.append(tile_id)
    products_ids.append(product_id)

geojson_products = sentinel_api.to_geojson(products)

In [None]:
print('Number of tiles: {}'.format(len(geojson_products['features'])))

In [None]:
#from sentinelhub import AwsProductRequest
#from sentinelhub import AwsTile

#product_id = 'S2A_MSIL1C_20210902T105031_N0301_R051_T31TBE_20210902T130744'
#data_folder = './AwsData'
#request = AwsProductRequest(product_id=product_id, data_folder=data_folder, safe_format=True)

# Uncomment the the following line to download the data:
#data_list = request.get_data(save_data=True)

## Prepare dataset

In [None]:
products.keys()

In [None]:
def download_dataset(product_id, product_meta, storage):
    
    product = {product_id: product_meta}
    storage_prefix = 'sentinel'
    tiles_in_storage = storage.list_keys(bucket=STORAGE_BUCKET, prefix=storage_prefix)
    
    tile_id = products[product_id]['identifier']
    product_storage_key = os.path.join(storage_prefix, tile_id+'.zip')
    if product_storage_key in tiles_in_storage:
        return tile_id

    sentinel_api = sentinelsat.SentinelAPI(user=os.environ["SENTINEL_USERNAME"],
                                           password=os.environ["SENTINEL_PASSWORD"],
                                           show_progressbars=False)
    
    tmpdir = tempfile.gettempdir()
    sentinel_api.download_all(product, directory_path=tmpdir)

    tile_id = products[product_id]['identifier']
    print(f"Tile name: {tile_id}")
    product_local_file = os.path.join(tmpdir, tile_id+'.zip')
    product_storage_key = os.path.join(storage_prefix, tile_id+'.zip')
    print(f"Uploading tile {tile_id} to Storage")
    with open(product_local_file, 'rb') as tiffile:
        storage.put_object(bucket=STORAGE_BUCKET, key=product_storage_key, body=tiffile)
    
    return tile_id

In [None]:
iterdata = []

for product_id, product_meta in products.items():
    iterdata.append((product_id, product_meta))

In [None]:
extra_env = {'SENTINEL_USERNAME': SENTINEL_USERNAME,
             'SENTINEL_PASSWORD': SENTINEL_PASSWORD}

fexec = lithops.FunctionExecutor(backend=BATCH_BACKEND, storage=STORAGE_BACKEND, runtime=BATCH_RUNTIME)

In [None]:
# Needs to be sequential
dw_data_fs = []
for product in iterdata:
    fut = fexec.call_async(download_dataset, product, extra_env=extra_env)
    fexec.wait(fs=fut)
    dw_data_fs.append(fut)

## Athmospheric correction using Serverful Lithops

Here we will download tile images from Sentinel2 using the previously selected configuration and apply athmospheric correction.

This process is not parallelizable and lasts for over 25 minutes, so it is not suited for serverless functions. We will use Lithops Standalone instead, which uses serverful instances that haven't time limits.

In [None]:
def jp2_to_cog(band_src_path):
    """
    Transform a sentinel2 band (.jp2) to GeoTiff (.tif)
    """
    config = dict(NUM_THREADS=100, GDAL_TIFF_OVR_BLOCKSIZE=128)

    output_profile = {
        "driver": "GTiff",
        "interleave": "pixel",
        "tiled": True,
        "blockxsize": 256,
        "blockysize": 256,
        "compress": "DEFLATE",
    }

    cog_path = f"{band_src_path[band_src_path.rfind('/')+1:band_src_path.rfind('.')]}.tif"
    cogeo.cog_translate(
        band_src_path,
        cog_path,
        output_profile,
        nodata=0,
        in_memory=False,
        config=config,
        quiet=True,
    )

    return cog_path

In [None]:
def perform_atmospheric_correction(product_geojson, storage):
    product = product_geojson['properties']
    tile = product['filename'][39:44]
    date = product['filename'][11:19]

    tmpdir = tempfile.gettempdir()
    
    product_local_file = os.path.join(tmpdir, product['identifier']+'.zip')
    product_storage_key = os.path.join('sentinel', product['identifier']+'.zip')
    
    if not os.path.isfile(product_local_file):
        print(f'Copying {product_storage_key} to local disk')
        obj_stream = storage.get_object(bucket=STORAGE_BUCKET, key=product_storage_key, stream=True)    
        with open(product_local_file, 'wb') as shapf:
            shutil.copyfileobj(obj_stream, shapf)
        print(f'Finished copying {product_storage_key} to local disk')
 
    # Extract and remove zip file
    print('Extracting zip file')
    zip_ref = zipfile.ZipFile(product_local_file)
    zip_ref.extractall(tmpdir)
    zip_ref.close()

    # Atmospheric correction
    print('Starting atmospheric correction')
    sentinel_product_dir = os.path.join(tmpdir, product['filename'])
    corrected_images = glob.glob(f"*2A_{date}*_T{tile}_*.SAFE/GRANULE/*/IMG_DATA/R10m/*B0[48]*.jp2")
    atmospheric_corrected = corrected_images[0] if len(corrected_images) > 0 else None

    if not atmospheric_corrected:
        print(f'Doing the atmospheric correction for {sentinel_product_dir}')
        retry = 0
        while True:
            try:
                cmd = ['L2A_Process --resolution 10 {}'.format(sentinel_product_dir)]
                val = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True, universal_newlines=True)
                corrected_images = glob.glob(f"*2A_{date}*_T{tile}_*.SAFE/GRANULE/*/IMG_DATA/R10m/*B0[48]*.jp2")
                print(f'Atmospheric correction finished {val}')
                break
            except subprocess.CalledProcessError as e:
                if retry<3:
                    retry += 1
                    time.sleep(2)
                else:   
                    print(e.returncode)
                    print(e.output)
                    raise(e)


    # Translate bands in .jp2 to GeoTiff format
    band_files = []
    band4 = glob.glob(os.path.join(tmpdir, '*L2A_{}*_T{}*.SAFE/GRANULE/*/IMG_DATA/R10m/*B04*'.format(date, tile))).pop()
    band8 = glob.glob(os.path.join(tmpdir, '*L2A_{}*_T{}*.SAFE/GRANULE/*/IMG_DATA/R10m/*B08*'.format(date, tile))).pop()

    if band4 is not None and band8 is not None:
        band4_tiff_file = f"{band4[band4.rfind('/')+1:band4.rfind('.')]}.tif"
        band8_tiff_file = f"{band8[band8.rfind('/') + 1:band8.rfind('.')]}.tif"
        jp2_to_cog(band4)
        jp2_to_cog(band8)
        band_files.append(band4_tiff_file)
        band_files.append(band8_tiff_file)
    
    print(band_files)

    # Merge both bands into a single geotiff
    combined_geotiff_key = band_files[0][0:22] + '_COMBINED.tif'
    with rasterio.open(band_files[0]) as src:
        profile = src.profile
        profile.update(count=len(band_files))

    with rasterio.open(combined_geotiff_key, 'w', **profile) as dst:
        for i, band_file in enumerate(band_files):
            with rasterio.open(band_file) as src:
                dst.write(src.read(1), i + 1)

    # Upload generated files to Cloud Storage
    with open(combined_geotiff_key, 'rb') as combined_geotiff_f:
        storage.put_object(Bucket=STORAGE_BUCKET, Key=combined_geotiff_key, Body=combined_geotiff_f)
    product_meta_key = combined_geotiff_key + '.meta.json'
    storage.put_object(Bucket=STORAGE_BUCKET, Key=product_meta_key, Body=json.dumps(product))

    return combined_geotiff_key

In [None]:
# print(geojson_products["features"])

In [None]:
# fexec = lithops.FunctionExecutor(backend='aws_batch', storage='aws_s3', runtime=BATCH_RUNTIME)

fexec.map(perform_atmospheric_correction, geojson_products["features"])

combined_keys = fexec.get_result()

In [None]:
combined_keys

## NDVI Computation using Serverless Lithops

Now we will calculate NDVI index of tiles tha thave been downloaded and pre-processed before.

This process can be executed in parallel (for every tile) and in serverless functions.

In [None]:
# debug
# combined_keys = ['T30SXG_20201229T110451_COMBINED.tif']

In [None]:
def ndvi(combined_key, storage):
    tmpdir = tempfile.gettempdir()
    dat = storage.get_object(bucket=STORAGE_BUCKET, key=combined_key, stream=True)
    out = os.path.join(tmpdir, 'out.tif')

    with rasterio.open(dat) as src:
        profile = src.profile
        profile.update(dtype='float32')
        profile.update(count=1)
        with rasterio.open(out, 'w', **profile) as dst:
            for _, window in src.block_windows(1):
                red = src.read(1, window=window).astype('float32')
                nir = src.read(2, window=window).astype('float32')
                ndvi = (np.where((nir + red) == 0., 0,
                                 (nir - red) / (nir + red))).astype('float32')
                dst.write(ndvi, 1, window=window)

    prefix = combined_key.rsplit('_', 1)[0]
    output_key = prefix + '_NDVI.tif'
    with open(out, 'rb') as output_f:
        storage.put_object(bucket=STORAGE_BUCKET, key=output_key, body=output_f)

    return output_key

In [None]:
fexec = lithops.FunctionExecutor(backend=COMPUTE_BACKEND, storage=STORAGE_BACKEND,
                                 runtime=RUNTIME, runtime_memory=2048, log_level='DEBUG')
fexec.map(ndvi, combined_keys, timeout=60)
ndvi_keys = fexec.get_result()

In [None]:
ndvi_keys

In [None]:
# debug
# ndvi_keys = ['T30SXG_20201229T110451_NDVI.tif']

In [None]:
tile_select = notebook_utils.pick_tile(ndvi_keys)

In [None]:
obj = cloud_storage.get_object(bucket=STORAGE_BUCKET, key=tile_select.value, stream=True)

fig, axs = plt.subplots(figsize=(20,15))

with rasterio.open(obj) as src:
#     ij, window = random.choice(list(src.block_windows()))
#     arr = src.read(1, window=window)
    arr = src.read(1)
    plt.imshow(arr)