In [1]:
from skmap.modeler import RFRegressor, RFRegressorTrees
from skmap.catalog import DataCatalog
from skmap.tiled_data import TiledData, TiledDataLoader, TiledDataExporter
from skmap.misc import TimeTracker, ttprint
import skmap_bindings as sb
import sys, os, warnings
import numpy as np
warnings.filterwarnings("ignore", module="sklearn")
import time

version = '20250122'

YEARS = [2000, 2005, 2010, 2015, 2020, 2022]
DEPTHS = [0, 30, 60, 100]
QUANTILES = [0.16, 0.84]

BASE_PATH = '/mnt/slurm/jobs/global_soc'
CATALOG_PATH = f'{BASE_PATH}/global_soil_mapping_v{version}.csv'
TILES_PATH = f'{BASE_PATH}/ard2_final_status.gpkg'
MODEL_PATH = f'{BASE_PATH}'
TILES_IDS = f'{BASE_PATH}/global_tile_ids.list'
MASK_TEMPLATE_PATH = 'http://192.168.49.30:8333/global/tiled.masks/mask_landsat_glad.lc.landmask_glc.desert.ice/{tile_id}.tif'
GDAL_OPTS = {'GDAL_HTTP_VERSION': '1.0', 'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': '.tif'}
GAIA_ADDRS = [f'http://192.168.49.{gaia_ip}:8333' for gaia_ip in range(30, 47)]
THREADS = 96
DEPTH_VAR = 'hzn_dep'
RESAMPLING_STRATEGY = "GRA_CubicSpline"

S3_PARAMS = {
    's3_addresses':GAIA_ADDRS,
    's3_access_key':'iwum9G1fEQ920lYV4ol9',
    's3_secret_key':'GMBME3Wsm8S7mBXw3U4CNWurkzWMqGZ0n2rXHggS0',
    's3_prefix':'/tmp-gpw/global_soc_v5',
}
# S3_PARAMS = None

# MODE, MODEL_TYPE, MSF = ('depths_years_quantiles', RFRegressorTrees, '.joblib')
MODE, MODEL_TYPE, MSF = ('depths_years', RFRegressor, '.so')

SPATIAL_AGGREGATION = 8
# SPATIAL_AGGREGATION = None

spatial_res = f'{30*SPATIAL_AGGREGATION}m' if SPATIAL_AGGREGATION else '30m'
out_files_suffix = f'g_epsg.4326_v{version}'

start_tile = 0
end_tile = 1
with open(TILES_IDS, 'r') as file:
    tile_ids = [line.strip() for line in file]
tile_ids = tile_ids[start_tile:end_tile]
server_name='ripley'
base_dir = f'/mnt/{server_name}/global_soc/tmp_data'
os.makedirs(f'/mnt/{server_name}/global_soc', exist_ok=True)
os.makedirs(base_dir, exist_ok=True)

# soc
soc_params = {
    'model':MODEL_TYPE(model_path=f'{MODEL_PATH}/model_rf.soc_production_v{version}{MSF}',),
    'expm1':True, 'scale':10, 'nodata':32767, 'dtype':'int16', 'prop_file_name':'oc_iso.10694.1995.wpct'
}

models_params = [
    soc_params
]

properties_features = {f for params in models_params for f in params['model'].model_covs}
catalog = DataCatalog.create_catalog(catalog_def=CATALOG_PATH, years=YEARS, base_path=GAIA_ADDRS, verbose=False)
YEARS_srt = [str(y) for y in YEARS]
catalog.query(properties_features, YEARS_srt)
properties_data = TiledDataLoader(catalog, MASK_TEMPLATE_PATH, SPATIAL_AGGREGATION, RESAMPLING_STRATEGY, verbose=False)
export_data = TiledDataExporter(spatial_res=spatial_res, s3_params=S3_PARAMS, tile_id=tile_ids[0],
                                mode=MODE, years=YEARS, depths=DEPTHS, quantiles=QUANTILES)


MI INM
Feature hzn_dep is missing in the original catalog, adding is in the otf (on the fly) common group
Added `g1` successfully.
Added `g2` successfully.
Added `g3` successfully.
Added `g4` successfully.
Added `g5` successfully.
Added `g6` successfully.
Added `g7` successfully.
Added `g8` successfully.
Added `g9` successfully.
Added `g10` successfully.
Added `g11` successfully.
Added `g12` successfully.
Added `g13` successfully.
Added `g14` successfully.
Added `g15` successfully.
Added `g16` successfully.
Added `g17` successfully.


In [2]:
for tile_id in tile_ids:
    if all(export_data.check_all_exported(params['prop_file_name'], out_files_suffix) for params in models_params):
        ttprint(f"All properties for tile {tile_id} already computed, skipping")
        continue

    with TimeTracker(f" o Reading data for tile {tile_id}", False):
        properties_data.load_tile_data(tile_id)
        if properties_data.n_pixels_valid == 0:
            ttprint("No pixels to predict in this tile, skipping")
            continue
        properties_data.convert_nan_to_median()
        properties_data.convert_nan_to_value(0.0)
    print("--------------------------------------------------------------")
        
    with TimeTracker(f" o Processing tile {tile_id}", False):
        for params in models_params:
            if export_data.check_all_exported(params['prop_file_name'], out_files_suffix):
                ttprint(f"Property {params['prop_file_name']} for tile {tile_id} already computed, skipping")
                continue
            with TimeTracker(f"   # Modeling {params['prop_file_name']}", False):
                properties_model:MODEL_TYPE = params['model']
                with TimeTracker(f"     - Getting predictions", False):
                    pred_depths = []
                    for depth in DEPTHS:
                        with TimeTracker(f"       - Depth {depth}", False):
                            properties_data.fill_otf_constant(DEPTH_VAR, depth)
                            pred_depths += [properties_model.predict(properties_data)]
            with TimeTracker(f"   # Deriving statistics", False):
                if MODE == 'depths_years_quantiles':
                    export_data.derive_block_quantiles_and_mean(pred_depths, params['expm1'])
                elif MODE == 'depths_years':
                    export_data.derive_block_mean(pred_depths, params['expm1'])
                    
    properties_data.__exit__(None,None,None)
    print("--------------------------------------------------------------")



[11:29:54]    Prepare mask for tile 141E_37S: 0.00 secs
[11:30:07]    Read rasters and compute whales for tile 141E_37S: 13.42 secs
[11:30:08]  o Reading data for tile 141E_37S: 14.28 secs
--------------------------------------------------------------
[11:30:08]           Transpose data (96 threads): 0.19 secs
[11:30:08]           Model prediction (96 threads): 0.41 secs
[11:30:08]        - Depth 0: 0.60 secs
[11:30:09]           Transpose data (96 threads): 0.19 secs
[11:30:09]           Model prediction (96 threads): 0.24 secs
[11:30:09]        - Depth 30: 0.43 secs
[11:30:09]           Transpose data (96 threads): 0.20 secs
[11:30:09]           Model prediction (96 threads): 0.24 secs
[11:30:09]        - Depth 60: 0.44 secs
[11:30:10]           Transpose data (96 threads): 0.18 secs
[11:30:10]           Model prediction (96 threads): 0.23 secs
[11:30:10]        - Depth 100: 0.42 secs
[11:30:10]      - Getting predictions: 1.90 secs
[11:30:10]    # Modeling oc_iso.10694.1995.wpct: 1.

In [4]:

# # bulk density
# bulk_density_params = {
#     'model':RFRegressorDepths(
#         model_name='bulk.density',
#         model_path=f'{MODEL_PATH}/model_rf.bulk.density_production_v{version}.joblib',
#         model_covs_path=None,
#         DEPTH_VAR='hzn_dep',
#         depths=DEPTHS,
#         predict_fn=lambda predictor, data: predictor.predict(data)
#     ),
#     'expm1':False,
#     'scale':100,
#     'nodata':32767,
#     'dtype':'int16',
#     'prop_file_name':'bd.core_iso.11272.2017.g.cm3',
#     's3_prefix':S3_PREFIX
# }

# ocd


# ocd_params = {
#     'model':RFRegressorDepths(
#         model_name='ocd', 
#         model_path=f'{MODEL_PATH}/model_rf.ocd_production_v{version}.joblib',
#         model_covs_path=None,
#         DEPTH_VAR='hzn_dep',
#         depths=DEPTHS,
#         predict_fn=predictTrees
#     ),
#     'expm1':True,
#     'scale':10,
#     'nodata':32767, 
#     'dtype':'int16',
#     'prop_file_name':'oc_iso.10694.1995.mg.cm3',
#     's3_prefix':S3_PREFIX
# }
# # ph h2o
# ph_h2o_params = {
#     'model':RFRegressorDepths(
#         model_name='ph.h2o',
#         model_path=f'{MODEL_PATH}/model_rf.ph.h2o_production_v{version}.joblib',
#         model_covs_path=None,
#         DEPTH_VAR='hzn_dep',
#         depths=DEPTHS,
#         predict_fn=lambda predictor, data: predictor.predict(data)
#     ),
#     'expm1':False,
#     'scale':10,
#     'nodata':255,
#     'dtype':'uint8',
#     'prop_file_name':'ph.h2o_iso.10390.2021.index',
#     's3_prefix':S3_PREFIX
# }