In [1]:
import glob
import json
import logging
import math
import os
import os.path as osp
import time

import psutil
from datacube.utils.dask import start_local_dask
from odc.io.cgroups import get_cpu_quota, get_mem_quota
from odc.stats._cli_common import setup_logging

from dea_ml.core.merge_tifs_to_ds import PredictFromFeature
from dea_ml.core.product_feature_config import FeaturePathConfig
from distributed import Client, LocalCluster

In [6]:
import json
from typing import Dict, Tuple

def geofeature_to_tileidx(feature: Dict) -> Tuple[int, int]:
    title = feature["properties"]["title"]
    x_str, y_str = title.split(",")
    return int(x_str), int(y_str)


with open("/home/jovyan/wa/u23/crop-mask/eastern_cropmask/data/s2_tiles_eastern_aez.geojson") as fhin:
    raw = json.load(fhin)
    tile_indicies = [geofeature_to_tileidx(feature) for feature in raw['features']]
    tasks = [f'x{x:+04d}/y{y:+04d}' for x, y in tile_indicies]

In [7]:
tasks[-2:]

['x+047/y+010', 'x+048/y+010']

In [2]:
def get_max_mem() -> int:
    """
    Max available memory, takes into account pod resource allocation
    """
    total = psutil.virtual_memory().total
    mem_quota = get_mem_quota()
    if mem_quota is None:
        return total
    return min(mem_quota, total)


def get_max_cpu() -> int:
    """
    Max available CPU (rounded up if fractional), takes into account pod
    resource allocation
    """
    ncpu = get_cpu_quota()
    if ncpu is not None:
        return int(math.ceil(ncpu))
    return psutil.cpu_count()


setup_logging()

_log = logging.getLogger(__name__)

nthreads = get_max_cpu()
memory_limit = get_max_mem()

with open("/home/jovyan/wa/u23/notebooks/s2_tiles_eastern_aez_tasks.json") as fhin:
    tasks = json.load(fhin)

output_fld = osp.join(
    FeaturePathConfig.DATA_PATH,
    FeaturePathConfig.product.name,
    FeaturePathConfig.product.version,
)


# manually add tasks
tasks = ["x+029/y+000/2019-P6M", "x+048/y+010"]

with LocalCluster() as cluster:
    with Client(cluster) as client:
        worker = PredictFromFeature(client=client)
        for task in tasks:
            tile_indx = "/".join(task.split("/")[:2])

            file_prefix = f"{tile_indx}"
            output_path = osp.join(output_fld, file_prefix, "*")
            if glob.glob(output_path):
                _log.warning(f"tile {output_path} is done already. Skipping...")
                continue
            _log.info(f"proessing tiles for task {output_path}.")

            t0 = time.time()
            worker.run(task)
            t1 = time.time()
            wall_time = (t1 - t0) / 60
            _log.info(f"time used {wall_time:.4f}")

[2021-03-16 23:29:30,460] {<ipython-input-2-313edbf98f4f>:54} INFO - proessing tiles for task /g/data/u23/data/crop_mask_eastern/v0.1.5/x+029/y+000/*.


  result_data = func(*input_data)
  result_data = func(*input_data)
  result_data = func(*input_data)
  result_data = func(*input_data)




  result_data = func(*input_data)
  result_data = func(*input_data)
  result_data = func(*input_data)
  result_data = func(*input_data)


   predicting...
   probabilities...
[2021-03-16 23:29:58,574] {merge_tifs_to_ds.py:141} INFO - ... Dask computing ...
[2021-03-16 23:30:06,453] {merge_tifs_to_ds.py:177} INFO - collecting mask and write cog.
[2021-03-16 23:31:00,202] {merge_tifs_to_ds.py:184} INFO - collecting prob and write cog.
[2021-03-16 23:31:43,244] {merge_tifs_to_ds.py:191} INFO - collecting the stac json and write out.
[2021-03-16 23:31:43,299] {<ipython-input-2-313edbf98f4f>:60} INFO - time used 2.2140
[2021-03-16 23:31:43,308] {<ipython-input-2-313edbf98f4f>:54} INFO - proessing tiles for task /g/data/u23/data/crop_mask_eastern/v0.1.5/x+048/y+010/*.
   predicting...
   probabilities...
[2021-03-16 23:32:10,338] {merge_tifs_to_ds.py:141} INFO - ... Dask computing ...
[2021-03-16 23:32:17,509] {merge_tifs_to_ds.py:177} INFO - collecting mask and write cog.
[2021-03-16 23:33:03,287] {merge_tifs_to_ds.py:184} INFO - collecting prob and write cog.
[2021-03-16 23:33:48,333] {merge_tifs_to_ds.py:191} INFO - collect