# OTSU Big Data Cloud notebook

In [None]:
import gcsfs
import rioxarray
import rasterio
import os
import numpy as np
from distributed import Client
import xarray as xr

In [None]:
import os

def set_env():
    os.environ["GS_NO_SIGN_REQUEST"] = "YES"

set_env()

## Accès à un bucket GCP

In [None]:
import gcsfs
fs = gcsfs.GCSFileSystem(bucket_name="supaero", token='anon')

In [None]:
fs.ls('supaero/31TCH')

In [None]:
fs.ls('supaero/31TCH/SENTINEL2B_20191224-104910-788_L2A_T31TCH_C_V2-2')

## Lecture d'une bande avec rioxarray, et affichage d'un sous ensemble

Seul les pixels sélectionnés sont chargés.

In [None]:
xds_11 = rioxarray.open_rasterio("gs://supaero/31TCH/SENTINEL2B_20191224-104910-788_L2A_T31TCH_C_V2-2/SENTINEL2B_20191224-104910-788_L2A_T31TCH_C_V2-2_FRE_B11.tif", nodata=-10000.0)
xds_11 = xds_11[:,4000:5000,4000:5000]
xds_11

In [None]:
xds_11.plot()

## Construction d'un Dataset monotemporel

Sans Dask, Bande GREEN et SWIR, sous ensemble, rééchantillonage, calcul du NDSI

In [None]:
green = rioxarray.open_rasterio("gs://supaero/31TCH/SENTINEL2B_20191224-104910-788_L2A_T31TCH_C_V2-2/SENTINEL2B_20191224-104910-788_L2A_T31TCH_C_V2-2_FRE_B3.tif")
green = green[:,8000:10000,8000:10000]
# Rééchantillonage à 20m, diviser résolution par 2
green = green.coarsen(x=2, y=2, boundary='pad').mean()
#No data
green = green.where(green != -10000)
green.rio.write_nodata(-10000, encoded=True, inplace=True)
green

In [None]:
green.plot()

In [None]:
swir = rioxarray.open_rasterio("gs://supaero/31TCH/SENTINEL2B_20191224-104910-788_L2A_T31TCH_C_V2-2/SENTINEL2B_20191224-104910-788_L2A_T31TCH_C_V2-2_FRE_B11.tif")
swir = swir[:,4000:5000,4000:5000]
#No data
swir = swir.where(swir != -10000)
swir.rio.write_nodata(-10000, encoded=True, inplace=True)
swir

In [None]:
swir.plot()

In [None]:
ndsi = (green - swir) / (green + swir)
ndsi

In [None]:
ndsi.plot()

In [None]:
(ndsi > 0.4).plot()

In [None]:
sub_ds = xr.Dataset({"green": green, "swir": swir})
sub_ds

In [None]:
sub_ds["ndsi"] = (sub_ds.green - sub_ds.swir) / (sub_ds.green + sub_ds.swir)
sub_ds["snow"] = sub_ds.ndsi > 0.4
sub_ds

In [None]:
sub_ds.snow.plot()

In [None]:
sub_ds.snow.sum() / sub_ds.snow.size

## Maintenant, calcul sur une image complète, avec Dask

Pour éviter les problèmes mémoire