# Run Sentinel-2 Geomedian with odc-stats over larger testing tile suite


In [None]:
# !pip uninstall odc-dscache -y
# !pip install --no-deps ~/git/odc-dscache/

In [None]:
# !pip install -U odc-stats
# !pip uninstall odc-algo -y
# !pip install --no-deps git+https://github.com/opendatacube/odc-algo@adb1856

# !pip uninstall odc-dscache -y
# !pip install --no-deps ~/git/odc-dscache/

#### !pip install git+https://github.com/opendatacube/odc-algo.git

In [None]:
import os
import json
import warnings
import xarray as xr
import rioxarray as rxr
import geopandas as gpd
import matplotlib.pyplot as plt
from odc.geo.xr import assign_crs
from odc.stats.tasks import TaskReader
from odc.stats.model import OutputProduct

warnings.filterwarnings("ignore")

## Analysis Parameters


In [None]:
t_range = '2020--P3Y'
resolution = 10 # can coarsen resolution to run to speed up testing
products='ga_s2am_ard_3-ga_s2bm_ard_3-ga_s2cm_ard_3' # use all S2 observations
name, version = 'ga_s2_gm_cyear_3', '0-0-1' #product name and version
results = '/gdata1/projects/s2_gm/results/' #where are we outputting results?
ncpus=35
mem='470'

## Save tasks database etc.

In [None]:
%%time
os.system("odc-stats save-tasks "\
          "--grid au_extended_small_10 "\
          f"--input-products {products} "\
          f"--temporal-range {t_range} "\
          f"--frequency all" \
         )

## Find list of tile indexes to run

In [None]:
# gdf_test_tiles = gpd.read_file('~/gdata1/projects/s2_gm/data/testing_tile_suite.geojson')
# gdf_mini_tiles = gpd.read_file('/home/jovyan/gdata1/projects/s2_gm/data/au_extented_smalltiles.geojson')
# gdf_mini_tiles_tests = gdf_mini_tiles.overlay(gdf_test_tiles[['geometry']], how='intersection')

# gdf_mini_tiles_tests['area'] = gdf_mini_tiles_tests.area
# gdf_mini_tiles_tests = gdf_mini_tiles_tests[gdf_mini_tiles_tests['area'] > 0.01]
# gdf_mini_tiles_tests = gdf_mini_tiles_tests.drop('area', axis=1)

# gdf_mini_tiles_tests.to_file('~/gdata1/projects/s2_gm/data/testing_minitile_suite.geojson')
# gdf_mini_tiles_tests

In [None]:
gdf = gpd.read_file('/home/jovyan/gdata1/projects/s2_gm/data/testing_minitile_suite.geojson')

In [None]:
## Open the task database to find out tiles
op = OutputProduct(
        name=name,
        version=version,
        short_name=name,
        location=f"s3://dummy-bucket/{name}/{version}",
        properties={"odc:file_format": "GeoTIFF"},
        measurements=['nbart_red']
    )

taskdb = TaskReader(f'{products}_{t_range}.db', product=op)

i=0
tile_indexes_to_run = []
for index, row in gdf.iterrows():
    print(f'Feature: {i + 1}/{len(gdf)}\r', end="")
    
    t = int(row['region_code'][1:4]), int(row['region_code'][-3:])
    
    task = taskdb.load_task([f'{t_range}', t[0], t[1]])
    # Now find index of the tile we want to run
    all_tiles = list(taskdb.all_tiles)
    for idx, index in zip(all_tiles, range(0, len(all_tiles))):
        if (idx[1]==t[0]) & (idx[2]==t[1]):
            tile_indexes_to_run.append(index)
            # if row['region_code']=='x115y056':
            #     print(index)
    
    i+=1

## Run tiles in serial

https://app.sandbox.dea.ga.gov.au/user/chad.burton@ga.gov.au/proxy/8787/status

In [None]:
!pip uninstall s2_gm_tools -y
!pip install s2_gm_tools/

In [None]:
for tile in tile_indexes_to_run[100:]:
    print(f"Starting tile: {tile}")

    os.system("odc-stats run "\
              f"{products}_{t_range}.db "\
              "--config=s2_gm_tools/s2_gm_tools/config/config_gm_s2_annual_s2Cloudless_enhanced.yaml "\
              f"--resolution={resolution} "\
              f"--threads={ncpus} "\
              f"--memory-limit={mem}Gi "\
              f"--location=file:///home/jovyan/{results}{name}/{version} " +f'{tile}'
             )


In [None]:
# ds = assign_crs(xr.open_dataarray('results/ga_s2_gm_cyear_3/0-0-1/x60/y68/2020--P3Y/ga_s2_gm_cyear_3_x60y68_2020--P3Y_final_nbart_red.tif'), crs='EPSG:3577')

In [None]:
# ds.squeeze().odc.explore()

In [None]:
# ds.squeeze().plot.imshow(vmin=10, vmax=90, cmap='magma', size=10)

## Run tiles in parallel

In [None]:
MAX_PROCESSES = 3
total_cpus=95
total_mem = 705

In [None]:
import subprocess
import time

active_processes = []

for tile in tile_indexes_to_run[2:]:
    print(f"Starting tile: {tile}")

    cmd = [
        "odc-stats", "run", f"{products}_{t_range}.db",
        "--threads", f"{int(total_cpus/MAX_PROCESSES)}",
        "--memory-limit", f"{int(total_mem/MAX_PROCESSES)}Gi",
        "--config", "s2_gm_tools/s2_gm_tools/config/config_gm_s2_annual_s2Cloudless_enhanced.yaml",
        "--location", f"file:///home/jovyan/{results}{name}/{version}",
        "--resolution", f"{resolution} ",
        f'{tile}'
    ]

    # Start process
    p = subprocess.Popen(cmd)
    active_processes.append(p)

    # Wait if too many processes are running
    while len(active_processes) >= MAX_PROCESSES:
        # Remove any finished processes
        active_processes = [proc for proc in active_processes if proc.poll() is None]
        time.sleep(1)

# Wait for any remaining processes to finish
for p in active_processes:
    p.wait()


## Sync results to AWS

In [None]:
# aws --profile user1 s3 sync results/ga_s2_gm_cyear_3  s3://dea-public-data-dev/ga_s2_gm_cyear_3/

### Remove files from AWS

In [None]:
# !aws --profile user1 s3 rm --recursive s3://dea-public-data-dev/ga_s2_gm_cyear_3/ --dryrun

## Remove all local files

In [None]:
# !rm -r -f results/ga_s2_gm_cyear_3/