This is a prepared notebook intended to be used within **Dymaxion Labs Platform** that uses pre-trained weights to predict and then post-process its results.

## Preparation

In [None]:
# Task
INPUT_ARTIFACTS_URL = "gs://dym-temp/immap-informal-settlements/test-task/input"
OUTPUT_ARTIFACTS_URL = "gs://dym-temp/immap-informal-settlements/test-task/output"

WEIGHTS_URL = (
    "gs://dym-ml-models/dymaxionlabs/immap-informal-settlements/v0.1.0/weights.h5"
)

# Extract chips
SIZE = 100
STEP_SIZE = SIZE

# Post-process
# MIN_AREA = 1000
THRESHOLD = 0.6

In [None]:
import os

from labfunctions.shortcuts import settings

In [None]:
# Predict
WEIGHTS_PATH = os.path.join(settings.BASE_PATH, "data", "weights.h5")
IMAGES_DIR = os.path.join(settings.BASE_PATH, "data/input/images")
POST_DIR = os.path.join(settings.BASE_PATH, "data/input/post")
PREDICT_CHIPS_DIR = os.path.join(settings.BASE_PATH, "data/chips")
PREDICT_RESULTS_DIR = os.path.join(settings.BASE_PATH, "data/predict")
PREDICT_RASTER_PATH = os.path.join(settings.BASE_PATH, "data/predict/prob.tif")

# Output
OUTPUT_DIR = os.path.join(settings.BASE_PATH, "data/results/")
OUTPUT_RASTER_PATH = os.path.join(settings.BASE_PATH, "data/results/prob.tif")
OUTPUT_GRID_PATH = os.path.join(settings.BASE_PATH, "data/results/grid.gpkg")

### Configure Google Application credentials

In [None]:
import json
import base64

from labfunctions.shortcuts import secrets

In [None]:
GOOGLE_APPLICATION_CREDENTIALS_PATH = os.path.join(settings.BASE_PATH, ".google_aplication_credentials.json")

with open(GOOGLE_APPLICATION_CREDENTIALS_PATH, "w") as f:
    f.write(base64.b64decode(secrets["GOOGLE_APPLICATION_CREDENTIALS"]).decode())

In [None]:
!gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS_PATH

### Download pre-trained weights

In [None]:
os.makedirs(os.path.dirname(WEIGHTS_PATH), exist_ok=True)

In [None]:
!gsutil cp -n $WEIGHTS_URL $WEIGHTS_PATH

### Download input artifacts

In [None]:
os.makedirs(IMAGES_DIR, exist_ok=True)

In [None]:
!gsutil -m cp -r $INPUT_ARTIFACTS_URL/images/* $IMAGES_DIR

In [None]:
os.makedirs(POST_DIR, exist_ok=True)

In [None]:
!gsutil -m cp -r $INPUT_ARTIFACTS_URL/post/* $POST_DIR

## Prepare prediction dataset

In [None]:
!satproc_extract_chips $IMAGES_DIR/*.tif \
    -o $PREDICT_CHIPS_DIR \
    --size $SIZE \
    --step-size $STEP_SIZE \
    --rescale \
    --rescale-mode values --min -15 --max 5

## Predict

In [None]:
from unetseg.predict import PredictConfig, predict
from unetseg.evaluate import plot_data_results
import subprocess
import glob

In [None]:
cfg = PredictConfig(
    images_path=PREDICT_CHIPS_DIR,
    results_path=PREDICT_RESULTS_DIR,
    batch_size=16,
    model_path=WEIGHTS_PATH,
    height=160,
    width=160,
    n_channels=3,
    n_classes=1,
    class_weights=[1],
)

In [None]:
predict(cfg)

In [None]:
def build_prediction_raster(*, input_dir, output_path):
    filenames = glob.glob(os.path.join(input_dir, "*.tif"))

    vrt_path = "/tmp/predict.vrt"
    tmp_list_path = "/tmp/list.txt"
    with open(tmp_list_path, "w") as f:
        for line in filenames:
            f.write(f"{line}\n")

    # Build virtual raster of all chip tifs into a single VRT
    subprocess.run(
        f"gdalbuildvrt -input_file_list {tmp_list_path} {vrt_path}",
        shell=True,
        check=True,
    )
    
    # Convert VRT to GeoTiff with DEFLATE compression enabled
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    subprocess.run(
        f"gdal_translate -of GTiff -co COMPRESS=DEFLATE -co PREDICTOR=2 -co ZLEVEL=9 {vrt_path} {output_path}",
        shell=True,
        check=True,
    )

In [None]:
build_prediction_raster(
    input_dir=PREDICT_RESULTS_DIR,
    output_path=PREDICT_RASTER_PATH,
)

## Post-process

In [None]:
import geopandas as gpd
import pandas as pd

In [None]:
def run_command(cmd, cwd=None):
    subprocess.run(cmd, shell=True, check=True, cwd=cwd)


def run_otb_command(cmd, cwd=None):
    otb_profile_path = os.getenv("OTBENV_PROFILE_PATH")
    if otb_profile_path:
        cmd = f"bash -c 'source {otb_profile_path}; {cmd}'"
    run_command(cmd, cwd=cwd)


def zonal_statistics(image_file, vector_file, out_vector_file):
    cmd = (
        "otbcli_ZonalStatistics "
        f"-in {image_file} "
        f"-inzone.vector.in {vector_file} "
        f"-out.vector.filename {out_vector_file} "
    )
    run_otb_command(cmd)

In [None]:
def run_postprocess_raster(
    path_unet_results,
    lulc_masks,
    rendered_raster_path,
    thr=0.5,
):
    thr = thr * 255  # rescale to 0-255

    # filtro pixeles con probabilidad < threshold
    run_otb_command(
        f'otbcli_BandMath -il {path_unet_results} -out /tmp/raster_tmp_math.tif -exp "(im1b1>={thr})*im1b1 + (im1b1<{thr})*0"'
    )
    # alineo tif y lulc recortado
    run_otb_command(
        f"otbcli_Superimpose -inr /tmp/raster_tmp_math.tif -inm {lulc_masks} -out /tmp/lulc.tif"
    )
    # filtro pixeles que no cae en el uso del suelo built-up
    run_otb_command(
        f'otbcli_BandMath -il /tmp/raster_tmp_math.tif /tmp/lulc.tif -out /tmp/raster_tmp_math_2.tif -exp "((im2b1==1)*im1b1 + (im2b1==0)*0)"'
    )
    # seteo no data y comprimo el raster
    run_otb_command(
        f'gdal_translate -a_nodata 0 /tmp/raster_tmp_math_2.tif /tmp/thr.tif -co "COMPRESS=LZW" -co "TILED=YES"'
    )
    subprocess.run(
        f"python3 {settings.BASE_PATH}/script/render_rgb.py /tmp/thr.tif {rendered_raster_path}",
        shell=True,
    )
    files_del = glob.glob("raster_tmp*.gpkg")
    for f in files_del:
        os.remove(f)


def downscale_grid(grid_path, out_grid_path, level=17):
    gdf_grid = gpd.read_file(grid_path)
    gdf_grid["tile_level"] = gdf_grid["tile_id"].apply(lambda x: str(x)[:17])
    os.makedirs(os.path.dirname(out_grid_path), exist_ok=True)
    gdf_grid.dissolve(by="tile_level").to_file(out_grid_path)


def run_postprocess_grid(raster_path, grid_filt_name, thr, grid_downscaled_path):
    zonal_statistics(raster_path, grid_downscaled_path, "grid_tmp.gpkg")
    gdf_zs = gpd.read_file("grid_tmp.gpkg")
    gdf_zs_max = (
        gdf_zs[["tile_level", "max_0"]]
        .groupby("tile_level")
        .median()
        .reset_index(drop=False)
    )
    gdf_zs = pd.merge(gdf_zs_max, gdf_zs[["tile_level", "geometry"]], on="tile_level")
    gdf_zs = gpd.GeoDataFrame(gdf_zs, geometry=gdf_zs.geometry)
    gdf_zs_filt = gdf_zs[gdf_zs["max_0"] >= thr]
    if gdf_zs_filt.shape[0] > 0:
        gdf_zs_filt.to_file(grid_filt_name, driver="GPKG")
    else:
        print("Empty grid or probability map")
    files_del = glob.glob("grid_tmp*.gpkg")
    for f in files_del:
        os.remove(f)

In [None]:
MASK_PATH = os.path.join(POST_DIR, "lulc_mask.tif")
GRID_PATH = os.path.join(POST_DIR, "grid.gpkg")

In [None]:
run_postprocess_raster(PREDICT_RASTER_PATH, MASK_PATH, OUTPUT_RASTER_PATH, THRESHOLD)

In [None]:
downscaled_grid_path = "/tmp/grid_downscaled.gpkg"

downscale_grid(GRID_PATH, downscaled_grid_path)

In [None]:
run_postprocess_grid("/tmp/thr.tif", OUTPUT_GRID_PATH, THRESHOLD, downscaled_grid_path)

## Export results as artifacts

In [None]:
!gsutil -m cp -r $OUTPUT_DIR/* $OUTPUT_ARTIFACTS_URL/