# S2Cloudless long-term percentiles


In [None]:
# !pip list | grep odc

In [None]:
# !pip uninstall s2_gm_tools -y
# !pip install s2_gm_tools/

# !pip install --no-deps git+https://github.com/opendatacube/odc-stats@1.0.77

# !pip uninstall odc-algo -y
# !pip install --no-deps git+https://github.com/opendatacube/odc-algo@adb1856

# !pip uninstall odc-dscache -y
# !pip install --no-deps ~/git/odc-dscache/

In [None]:
import os
import json
import warnings
import xarray as xr
import rioxarray as rxr
import geopandas as gpd
import matplotlib.pyplot as plt
from odc.geo.xr import assign_crs
from odc.stats.tasks import TaskReader
from odc.stats.model import OutputProduct

warnings.filterwarnings("ignore")

## Analysis Parameters


In [None]:
# tiles = ['x30y34','x36y52','x61y30','x58y22','x57y28', 'x61y29', 'x64y32', 'x65y40', 'x60y53' ,'x55y51', 'x46y58', 'x46y46', 'x36y34']
# gdf = gpd.read_file('~/gdata1/projects/s2_gm/testing_tile_suite.geojson')

# gdf = gdf[gdf['region_code'].isin(tiles)]
# gdf.reset_index(drop=True).to_file('~/gdata1/projects/s2_gm/testing_tile_suite_13tiles.geojson')

In [None]:
year='2022' 
t_range = '2020--P3Y'
t = 175, 85 # tile id to run i.e. x19y18
resolution = 10 # can coarsen resolution to run to speed up testing
products='ga_s2am_ard_3-ga_s2bm_ard_3-ga_s2cm_ard_3' # use all S2 observations
name, version = 'ga_s2cloudless_percentiles_3', '0-0-1' #product name and version
results = '/gdata1/projects/s2_gm/results/' #where are we outputting results?
ncpus=30
mem='230Gi'

## Save tasks database etc.

In [None]:
# os.system("odc-stats save-tasks "\
#           "--grid au_extended_s2_10 "\
#           f"--input-products {products} "\
#           f"--temporal-range {t_range} "\
#           f"--frequency all"
#          )

## Find the tile ID to run

We'll pass this index to odc-stats next to tell it to run this tile

In [None]:
## Open the task database to find out tiles
op = OutputProduct(
            name=name,
            version=version,
            short_name=name,
            location=f"s3://dummy-bucket/{name}/{version}",
            properties={"odc:file_format": "GeoTIFF"},
            measurements=['nbart_red'],
        )

taskdb = TaskReader(f'{products}_{t_range}.db', product=op)
task = taskdb.load_task((f'{t_range}', t[0], t[1]))

# Now find index of the tile we want to run
# We'll pass this index to odc-stats next to tell it to run this tile
tile_index_to_run = []
all_tiles = list(taskdb.all_tiles)
for i, index in zip(all_tiles, range(0, len(all_tiles))):
    if (i[1]==t[0]) & (i[2]==t[1]):
        tile_index_to_run.append(index)
        print(index)

### Optionally view tile to check location

The next cell will plot the tile extent on an interactive map so you can ensure its the tile you want to run.

In [None]:
# with open('task_tile_check.geojson', 'w') as fh:
#     json.dump(task.geobox.extent.to_crs('epsg:4326').json, fh, indent=2)

gdf = gpd.GeoDataFrame(index=[0], crs='epsg:4326', geometry=[task.geobox.extent.to_crs('epsg:4326').geom])
gdf.explore()

## Run the geomedian algo using odc-stats

Put this link into the dask dashboard to view the progress, altering the email address to yours: https://app.sandbox.dea.ga.gov.au/user/chad.burton@ga.gov.au/proxy/8787/status

In [None]:
!pip uninstall s2_gm_tools -y
!pip install s2_gm_tools/

In [None]:
%%time
os.system("odc-stats run "\
          f"{products}_{t_range}.db "\
          "--config=s2_gm_tools/s2_gm_tools/config/config_s2Cloudless_percentiles.yaml "\
          f"--resolution={10} "\
          f"--threads={ncpus} "\
          f"--memory-limit={mem} "\
          f"--location=file:///home/jovyan/{results}{name}/{version} " +str(tile_index_to_run[0])
         )

## Plot the RGBA output

In [None]:
# t = 3,19  # tile id
name, version = 'ga_s2cloudless_percentiles_3', '0-0-1'
results = '/gdata1/projects/s2_gm/results/'

In [None]:
x= f'x{t[0]}'
y= f'y0{t[1]}'

## Interactively explore results

In [None]:
import numpy as np
path_5 = f'{results}{name}/{version}/{x}/{y}/{t_range}/{name}_{x}{y}_{t_range}_final_oa_s2cloudless_prob_pc_5.tif'
path_10 = f'{results}{name}/{version}/{x}/{y}/{t_range}/{name}_{x}{y}_{t_range}_final_oa_s2cloudless_prob_pc_10.tif'
path_25 = f'{results}{name}/{version}/{x}/{y}/{t_range}/{name}_{x}{y}_{t_range}_final_oa_s2cloudless_prob_pc_25.tif'

r=assign_crs(rxr.open_rasterio(path_5).squeeze().drop_vars('band'),crs='EPSG:3577')
g=assign_crs(rxr.open_rasterio(path_10).squeeze().drop_vars('band'),crs='EPSG:3577')
b=assign_crs(rxr.open_rasterio(path_25).squeeze().drop_vars('band'),crs='EPSG:3577')

r = r.rename('s2cloudless_prob_pc_5')
g = g.rename('s2cloudless_prob_pc_10')
b = b.rename('s2cloudless_prob_pc_25')

ds = assign_crs(xr.merge([r,g,b]), crs='EPSG:3577')

In [None]:
fig, ax= plt.subplots(1,3, sharey=True, figsize=(15,5), layout='constrained')
vmin,vmax=0,0.4
ds['s2cloudless_prob_pc_5'].plot.imshow(vmin=vmin, vmax=vmax, ax=ax[0], cmap='magma', add_labels=False, add_colorbar=False)
ds['s2cloudless_prob_pc_10'].plot.imshow(vmin=vmin, vmax=vmax, ax=ax[1], cmap='magma', add_labels=False, add_colorbar=False)
ds['s2cloudless_prob_pc_25'].plot.imshow(vmin=vmin, vmax=vmax, ax=ax[2], cmap='magma', add_labels=False);

ax[0].set_title(f'2020-22 5th pc of cloud proba', fontsize=10);
ax[0].set_yticklabels([])
ax[0].set_xticklabels([])

ax[1].set_title(f'2020-22 10th pc of cloud proba', fontsize=10);
ax[1].set_yticklabels([])
ax[1].set_xticklabels([])

ax[2].set_title(f'2020-22 25th pc of cloud proba', fontsize=10);
ax[2].set_yticklabels([])
ax[2].set_xticklabels([]);

In [None]:
ds['s2cloudless_prob_pc_10'].odc.explore(
    tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
    attr = 'Esri',
    name = 'Esri Satellite'
              )

## Remove all files

In [None]:
# !rm -r -f results/ga_s2cloudless_percentiles_3/