<b>Sargassum Sensing (demo)</b>  
Towards an application for detecting and forecasting Sargassum in open sea

* Supervised DTC and MLC were used apart from density slicing of GNDVI
* Full scene classification requires a lot of RAM. Tiling or cropping the scene prior classification is recommended. 
* Sentinel-hub Playground used for previewing images
* HYCOM + NCODA Global 1/12° Analysis (3 hourly) ([reference](https://www.hycom.org/dataserver/gofs-3pt1/analysis)) used for sea surface current model
* OceanParcels tool used for particle tracking [reference](https://oceanparcels.org/)
* Download HYCOM data used in this demo : [Google Drive download](https://drive.google.com/uc?export=download&confirm=aLlF&id=1vCtr2pYqa6Ih2ChaiZ4t6IS5qXyYdCiK)   
    * Demo netCDF data is a global coverage with time component: <b>2020-04-02T15:00 to 2020-04-03T00:00</b> (4 layers)
    * OceanParcels tool gives an out-of-bound error when the HYCOM data is derived from a spatial subset/ cropped data. Therefore, only global coverage data was used for this demo.
    * Download your own data thorugh this: [link](https://ncss.hycom.org/thredds/ncss/grid/GLBy0.08/expt_93.0/uv3z/dataset.html)
        * Select water_u and water_v variables
        * Choose a Time Subset
        * Choose Vertical Level = 0 (sea current at surface)
        * Select Add Lat/Lon variables
        * Output format = netcdf4 

         
        
    

In [None]:
import os 
import re
import getpass
import numpy as np
import pandas as pd
from glob import glob
import rasterio as rio
from joblib import load
import geopandas as gpd
from zipfile import ZipFile
import matplotlib.pyplot as plt
from tqdm import tqdm,tqdm_notebook
from sentinelsat import SentinelAPI
from shapely.geometry import LineString
from parcels import FieldSet, Field,ParticleSet, Variable, JITParticle, AdvectionRK4, plotTrajectoriesFile, ErrorCode
from datetime import timedelta
import xarray as xr

#custom functions
from Python.misc import retrieve_product,semi_sv_pred,get_band_paths,dbscan_cluster
from Python.prep_raster import stack_bands,polygonize,get_tiles

#specify I/O dir
os.makedirs(os.path.join(os.path.abspath('..'),'demo'),exist_ok=True)
data_dir = os.path.join(os.path.abspath('..'),'demo')

<b>Preview Sentinel-2 image(s) via Sentinel Hub Playground</b>  
* Example for a scene taken on 4 April 2020

In [None]:
import IPython
from IPython.core.display import display, HTML

#specifiy url and view in notebook cell
url = 'https://apps.sentinel-hub.com/sentinel-playground/?source=S2L2A&lat=12.176447977999016&lng=-68.12845230102539&zoom=12&preset=5_MOISTURE_INDEX&layers=B01,B02,B03&maxcc=100&gain=1.0&gamma=1.0&time=2019-10-01%7C2020-04-06&atmFilter=&showDates=true'
display(HTML("<style>.container { width:90% !important; }</style>"))
IPython.display.IFrame(url,width='100%',height='600')

<b>Download Sentinel-2 image(s)</b>  

In [None]:
#user authentication (Copernicus account)
username = getpass.getpass("Username:")
pswd = getpass.getpass("Password:")
api_auth = SentinelAPI(username,pswd,'https://scihub.copernicus.eu/dhus')

In [None]:
# specify tile(s) and date(s) (T19PEP covers Bonaire and T19PFP covers eastern sea)
date_tiles = { "T19PFP":[20200402]}

#download product(s) 
df_products = retrieve_product(date_tiles,api_auth)
api_auth.download_all(products=df_products.index,directory_path=data_dir)

#extract all zips to SAFE files
zip_file = glob(data_dir+'*/S2*_MSIL*.zip')[0]
with ZipFile(zip_file) as zip_obj:
    zip_obj.extractall(data_dir)

<b>Preprocess Sentinel-2 image(s)</b>  

In [None]:
#safe file
safe_file = glob(data_dir+"/*.SAFE")[0]

#get image paths (12 multiband .jp2 images)
date = re.findall(r"(\d{8})T", safe_file)[0]
tile_id =  re.findall(r"(T\d{2}[A-Z]{3})", safe_file)[0]
img_paths = get_band_paths(safe_file)

#stack bands (computationaly intensive)
outfile = os.path.join(data_dir,f'stack_{tile_id}_{date}.tif')
stack_bands(img_paths,img_paths[1],outfile)

<b> Classification </b>
* `semi_sv_pred()` returns a numpy.ndarray which represents the concensus (effective areal overlap) between the DTC, MLD and GNDVI (density slicing) classifications
     *  Default parameters: `mlc_thr=7.79,gndvi_thr=0.05,b02_thr=0.15,sieve_size=10`:
         * `mlc_thr` is based on the chi-square probability (n=4) (lower number = stricter classification)
         * `gndvi_thr` is based on the GNDVI image histogram (larger number= stricter classification)
             * Similar like NDVI with possible values between -1 and 1
         * `b02_thr` is based on Sentinel-2 blue band B02 (larger number = stricter cloud mask classification)
         * `sieve_size` refers to the minimum sieve filter size (reduce speckles in the classification)

In [None]:
#load models
dtc = load(r".\data\models\dtc_model_sargassum.joblib")
mlc = load(r".\data\models\mlc_model_sargassum.joblib")

#get stack file
stack_file = glob(data_dir+"/stack*.tif")[0]                           
date_tile_id = re.findall(r"(T\d{2}[A-Z]{3}_\d{8})", stack_file)[0]

#collect geodata
geodata = [] 

#open stack file 
with rio.open(stack_file) as src:
    meta = src.meta.copy()

    #compute tiles (available nr of tiles: 4,9,16,25,36)
    tiles = get_tiles(src,16)
    for i in tqdm(range(len(tiles)),position=0, leave=True):
        tile_window, tile_affine = tiles[i]
        tile_arr = src.read(window=tile_window)

        #predict each tile and convert valid classifications into points
        semi_pred = semi_sv_pred(tile_arr,mlc,dtc)
        if semi_pred is not None:
            gdf = polygonize(semi_pred,1,tile_affine,src.crs,out_file=None,centroid=True)
            geodata.append(gdf)

#export geodata 
geodata = pd.concat(geodata,ignore_index=True)
atoll_mask = gpd.read_file(r'./data/demo/demo_atoll_mask.geojson')                              #mask a few islands in the eastern sea
geodata = gpd.overlay(geodata, atoll_mask, how='difference')
geodata.to_file(os.path.join(data_dir,f'sargassum_{date_tile_id}.geojson'),driver='GeoJSON')


<b> Particle tracking </b>
* Spatial clustering to label groups of Sargassum points (DBSCAN) [reference](https://geoffboeing.com/2014/08/clustering-to-reduce-spatial-data-set-size/)

In [None]:
#load geodata
geodata = gpd.read_file(r'./data/demo/demo_sargassum_20200402.geojson')
bonaire = gpd.read_file(r'./data/demo/demo_curacao_bonaire.geojson')

#spatial cluster distance = 15km
geodata['label'] = dbscan_cluster(geodata.geometry,15).astype(str)

#plot Sargassum clusters on a map
plt.rcParams['axes.facecolor'] = '97e5ef'
plt.rcParams["figure.figsize"] = (10,10)
base = bonaire.plot(color='#323232')
geodata.plot(ax=base,column='label',markersize=20,legend=True,cmap='viridis')
base.text(bonaire.centroid[0].x-10000,bonaire.centroid[0].y-15000, 'Bonaire')
plt.xlabel('Easting'),plt.ylabel('Northing'),plt.title('Sargassum clusters 4 April 2020')
plt.show()

* Perform 5-days simulation (with time extrapolation) 

In [None]:
#get lat lon from geodata
geodata = gpd.read_file(r'./data/demo/demo_sargassum_20200402.geojson')
lons = geodata.to_crs(4326).geometry.x
lats = geodata.to_crs(4326).geometry.y

#define fieldset variables from HYCOM data
hycom_file = glob(data_dir+"/*.nc4")[0]                #the one downlaoded from Google Drive
variables = {'U': 'water_u','V': 'water_v'}
dimensions = {'lon': 'lon','lat': 'lat','time': 'time'}

#construct fieldset
fieldset = FieldSet.from_netcdf(hycom_file, variables, dimensions,allow_time_extrapolation=True)

#release particles
pset = ParticleSet.from_list(fieldset=fieldset,pclass=JITParticle,lon=lons,lat=lats) 

#run advection and save trajectory to file
output_fname = r'./data/demo/demo_simulation_20200402.nc'
output_file = pset.ParticleFile(name=output_fname, outputdt=timedelta(hours=3))
pset.execute(AdvectionRK4,runtime=timedelta(days=5),dt=timedelta(minutes=5),output_file=output_file)
output_file.export()

* Plot trajectory of Sargassum

In [None]:
#open the simulated data
simulated = xr.open_dataset(r'./data/demo/demo_simulation_20200402.nc')

#load geodata
geodata = gpd.read_file(r'./data/demo/demo_sargassum_20200402.geojson')
bonaire = gpd.read_file(r'./data/demo/demo_curacao_bonaire.geojson')

#get simulated points
data = []
for i in range(len(simulated.time.values)):
    p_id = f'P{i+1}'
    df = pd.DataFrame([pd.Series(simulated.lon.values[i]),pd.Series(simulated.lat.values[i])]).T
    df.columns,df['id'],df['time'] = (['lon','lat'],p_id,simulated.time.values[0])
    data.append(df)
    
#convert to geodataframe
data =pd.concat(data)
geo_points_traj = gpd.GeoDataFrame(data,geometry=gpd.points_from_xy(x=data.lon, y=data.lat),crs=4326).to_crs(geodata.crs)
geo_points_traj['day'] = geo_points_traj['time'].dt.strftime('%m-%d')

#get initial points
initial_points = geo_points_traj[geo_points_traj['time']==geo_points_traj['time'].min()]

#plot Sargassum trajectory on a map
plt.rcParams['axes.facecolor'] = '97e5ef'
plt.rcParams["figure.figsize"] = (10,10)
base = bonaire.plot(color='#323232')
geo_points_traj.plot(ax=base,markersize=5,column='day',legend=True,cmap='viridis')
initial_points.plot(ax=base,color='red',zorder=1,markersize=5)
plt.xlabel('Easting'),plt.ylabel('Northing'),plt.title('Sargassum trajectory 4 to 7 April 2020')
plt.show()