This is a prepared notebook intended to be used within **Dymaxion Labs Platform** that uses pre-trained weights to predict and then post-process its results.

## Preparation

In [1]:
# Task
INPUT_ARTIFACTS_URL = "gs://dym-temp/immap-flood-areas/test-task/input"
OUTPUT_ARTIFACTS_URL = "gs://dym-temp/immap-flood-areas/test-task/output"

WEIGHTS_URL = (
    "gs://dym-ml-models/dymaxionlabs/immap-flood-areas/v0.1.0/weights.h5"
)

# Extract chips
SIZE = 160
STEP_SIZE = SIZE

# Post-process
MIN_AREA = 750000
THRESHOLD = 0.2

In [2]:
import os

from labfunctions.shortcuts import settings

In [3]:
# Predict
WEIGHTS_PATH = os.path.join(settings.BASE_PATH, "data", "weights.h5")
IMAGES_DIR = os.path.join(settings.BASE_PATH, "data/images")
PREDICT_CHIPS_DIR = os.path.join(settings.BASE_PATH, "data/chips")
PREDICT_RESULTS_DIR = os.path.join(settings.BASE_PATH, "data/predict")
PREDICT_RASTER_PATH = os.path.join(settings.BASE_PATH, "data/predict/prob.tif")

# Post-process
POST_BIN_RASTER_PATH = os.path.join(settings.BASE_PATH, "data/post/bin.tif")
POST_BIN_VECTOR_PATH = os.path.join(settings.BASE_PATH, "data/post/bin.gpkg")
POST_FILTERED_PATH = os.path.join(settings.BASE_PATH, "data/post/filtered_bin.gpkg")

# Output
OUTPUT_DIR = os.path.join(settings.BASE_PATH, "data/results/")
OUTPUT_RASTER_PATH = os.path.join(settings.BASE_PATH, "data/results/prob.tif")
OUTPUT_MASK_PATH = os.path.join(settings.BASE_PATH, "data/results/prob.gpkg")

### Configure Google Application credentials

In [4]:
import json
import base64

from labfunctions.shortcuts import secrets

In [5]:
GOOGLE_APPLICATION_CREDENTIALS_PATH = os.path.join(settings.BASE_PATH, ".google_aplication_credentials.json")

with open(GOOGLE_APPLICATION_CREDENTIALS_PATH, "w") as f:
    f.write(base64.b64decode(secrets["GOOGLE_APPLICATION_CREDENTIALS"]).decode())

In [6]:
!gcloud auth activate-service-account --key-file=$GOOGLE_APPLICATION_CREDENTIALS_PATH

### Download pre-trained weights

In [7]:
os.makedirs(os.path.dirname(WEIGHTS_PATH), exist_ok=True)

In [8]:
!gsutil cp -n $WEIGHTS_URL $WEIGHTS_PATH

Skipping existing item: file:///home/munshkr/dym/immap-flood-areas/data/weights.h5


### Download input images

In [9]:
os.makedirs(IMAGES_DIR, exist_ok=True)

In [10]:
!gsutil -m cp -r $INPUT_ARTIFACTS_URL/* $IMAGES_DIR

Copying gs://dym-temp/immap-flood-areas/test-task/input/dataset_prediction_500m_epsg4326.tif...
/ [1/1 files][ 61.0 MiB/ 61.0 MiB] 100% Done                                    
Operation completed over 1 objects/61.0 MiB.                                     


## Prepare prediction dataset

In [11]:
!satproc_extract_chips $IMAGES_DIR/*.tif \
    -o $PREDICT_CHIPS_DIR \
    --size $SIZE \
    --step-size $STEP_SIZE \
    --rescale \
    --rescale-mode percentiles --lower-cut 0 --upper-cut 100

Rasters:   0%|                                            | 0/1 [00:00<?, ?it/s]
dataset_prediction_500m_epsg4326.tif windows:   0%|     | 0/216 [00:00<?, ?it/s][A
dataset_prediction_500m_epsg4326.tif windows:   7%| | 15/216 [00:00<00:01, 143.1[A
dataset_prediction_500m_epsg4326.tif windows:  16%|1| 34/216 [00:00<00:01, 161.6[A
dataset_prediction_500m_epsg4326.tif windows:  24%|2| 52/216 [00:00<00:00, 169.2[A
dataset_prediction_500m_epsg4326.tif windows:  32%|3| 69/216 [00:00<00:00, 167.6[A
dataset_prediction_500m_epsg4326.tif windows:  40%|4| 87/216 [00:00<00:00, 169.6[A
dataset_prediction_500m_epsg4326.tif windows:  48%|4| 104/216 [00:00<00:00, 169.[A
dataset_prediction_500m_epsg4326.tif windows:  56%|5| 122/216 [00:00<00:00, 168.[A
dataset_prediction_500m_epsg4326.tif windows:  64%|6| 139/216 [00:00<00:00, 167.[A
dataset_prediction_500m_epsg4326.tif windows:  72%|7| 156/216 [00:00<00:00, 164.[A
dataset_prediction_500m_epsg4326.tif windows:  81%|8| 174/216 [00:01<00:00, 167

## Predict

In [12]:
from unetseg.predict import PredictConfig, predict
from unetseg.evaluate import plot_data_results
import subprocess
import glob

2022-08-03 17:03:48.380756: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/munshkr/.local/lib/python3.10/site-packages/cv2/../../lib64:
2022-08-03 17:03:48.380792: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [13]:
cfg = PredictConfig(
    images_path=PREDICT_CHIPS_DIR,
    results_path=PREDICT_RESULTS_DIR,
    batch_size=4,
    model_path=WEIGHTS_PATH,
    height=160,
    width=160,
    n_channels=6,
    n_classes=1,
    class_weights=[1],
)

In [14]:
predict(cfg)

Total images to predict (/home/munshkr/dym/immap-flood-areas/data/chips/images/*.tif): 216
After skipping existing results: 216


2022-08-03 17:03:52.628290: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-08-03 17:03:52.628326: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (munshkr-x260): /proc/driver/nvidia/version does not exist
2022-08-03 17:03:52.628972: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-03 17:03:52.906050: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 37748736 exceeds 10% of free system memory.
2022-08-03 17:03:52.938150: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 37748736 exceeds 10% of free system memory.
2022-08-03 17



  2%|███                                                                                                                                                               | 1/54 [00:01<01:08,  1.28s/it]



  4%|██████                                                                                                                                                            | 2/54 [00:01<00:44,  1.17it/s]



  6%|█████████                                                                                                                                                         | 3/54 [00:02<00:36,  1.40it/s]



  7%|████████████                                                                                                                                                      | 4/54 [00:02<00:32,  1.52it/s]



  9%|███████████████                                                                                                                                                   | 5/54 [00:03<00:30,  1.60it/s]



 11%|██████████████████                                                                                                                                                | 6/54 [00:04<00:29,  1.65it/s]



 13%|█████████████████████                                                                                                                                             | 7/54 [00:04<00:27,  1.69it/s]



 15%|████████████████████████                                                                                                                                          | 8/54 [00:05<00:27,  1.66it/s]



 17%|███████████████████████████                                                                                                                                       | 9/54 [00:05<00:26,  1.71it/s]



 19%|█████████████████████████████▊                                                                                                                                   | 10/54 [00:06<00:25,  1.73it/s]



 20%|████████████████████████████████▊                                                                                                                                | 11/54 [00:06<00:24,  1.76it/s]



 22%|███████████████████████████████████▊                                                                                                                             | 12/54 [00:07<00:23,  1.77it/s]



 24%|██████████████████████████████████████▊                                                                                                                          | 13/54 [00:08<00:22,  1.78it/s]



 26%|█████████████████████████████████████████▋                                                                                                                       | 14/54 [00:08<00:22,  1.79it/s]



 28%|████████████████████████████████████████████▋                                                                                                                    | 15/54 [00:09<00:21,  1.79it/s]



 30%|███████████████████████████████████████████████▋                                                                                                                 | 16/54 [00:09<00:21,  1.79it/s]



 31%|██████████████████████████████████████████████████▋                                                                                                              | 17/54 [00:10<00:20,  1.80it/s]



 33%|█████████████████████████████████████████████████████▋                                                                                                           | 18/54 [00:10<00:19,  1.80it/s]



 35%|████████████████████████████████████████████████████████▋                                                                                                        | 19/54 [00:11<00:19,  1.81it/s]



 37%|███████████████████████████████████████████████████████████▋                                                                                                     | 20/54 [00:11<00:18,  1.79it/s]



 39%|██████████████████████████████████████████████████████████████▌                                                                                                  | 21/54 [00:12<00:18,  1.79it/s]



 41%|█████████████████████████████████████████████████████████████████▌                                                                                               | 22/54 [00:13<00:17,  1.80it/s]



 43%|████████████████████████████████████████████████████████████████████▌                                                                                            | 23/54 [00:13<00:17,  1.82it/s]



 44%|███████████████████████████████████████████████████████████████████████▌                                                                                         | 24/54 [00:14<00:16,  1.79it/s]



 46%|██████████████████████████████████████████████████████████████████████████▌                                                                                      | 25/54 [00:14<00:16,  1.80it/s]



 48%|█████████████████████████████████████████████████████████████████████████████▌                                                                                   | 26/54 [00:15<00:15,  1.80it/s]



 50%|████████████████████████████████████████████████████████████████████████████████▌                                                                                | 27/54 [00:15<00:14,  1.82it/s]



 52%|███████████████████████████████████████████████████████████████████████████████████▍                                                                             | 28/54 [00:16<00:14,  1.83it/s]



 54%|██████████████████████████████████████████████████████████████████████████████████████▍                                                                          | 29/54 [00:16<00:13,  1.83it/s]



 56%|█████████████████████████████████████████████████████████████████████████████████████████▍                                                                       | 30/54 [00:17<00:13,  1.83it/s]



 57%|████████████████████████████████████████████████████████████████████████████████████████████▍                                                                    | 31/54 [00:17<00:12,  1.84it/s]



 59%|███████████████████████████████████████████████████████████████████████████████████████████████▍                                                                 | 32/54 [00:18<00:11,  1.83it/s]



 61%|██████████████████████████████████████████████████████████████████████████████████████████████████▍                                                              | 33/54 [00:19<00:11,  1.82it/s]



 63%|█████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                           | 34/54 [00:19<00:11,  1.81it/s]



 65%|████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                        | 35/54 [00:20<00:10,  1.81it/s]



 67%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                     | 36/54 [00:20<00:10,  1.71it/s]



 69%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                  | 37/54 [00:21<00:10,  1.60it/s]



 70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                               | 38/54 [00:22<00:11,  1.41it/s]



 72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                            | 39/54 [00:23<00:10,  1.38it/s]



 74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                         | 40/54 [00:23<00:10,  1.36it/s]



 76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                      | 41/54 [00:24<00:09,  1.43it/s]



 78%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                   | 42/54 [00:25<00:08,  1.47it/s]



 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                | 43/54 [00:25<00:07,  1.47it/s]



 81%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                             | 44/54 [00:26<00:06,  1.45it/s]



 83%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                          | 45/54 [00:27<00:05,  1.52it/s]



 85%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                       | 46/54 [00:27<00:05,  1.59it/s]



 87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                    | 47/54 [00:28<00:04,  1.52it/s]



 89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                  | 48/54 [00:29<00:03,  1.52it/s]



 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████               | 49/54 [00:29<00:03,  1.52it/s]



 93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 50/54 [00:30<00:02,  1.54it/s]



 94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████         | 51/54 [00:31<00:02,  1.44it/s]



 96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 52/54 [00:31<00:01,  1.49it/s]



 98%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████   | 53/54 [00:32<00:00,  1.54it/s]



100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 54/54 [00:33<00:00,  1.62it/s]

Done!





In [15]:
def build_prediction_raster(*, input_dir, output_path):
    filenames = glob.glob(os.path.join(input_dir, "*.tif"))

    vrt_path = "/tmp/predict.vrt"
    tmp_list_path = "/tmp/list.txt"
    with open(tmp_list_path, "w") as f:
        for line in filenames:
            f.write(f"{line}\n")

    # Build virtual raster of all chip tifs into a single VRT
    subprocess.run(
        f"gdalbuildvrt -input_file_list {tmp_list_path} {vrt_path}",
        shell=True,
        check=True,
    )
    
    # Convert VRT to GeoTiff with DEFLATE compression enabled
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    subprocess.run(
        f"gdal_translate -of GTiff -co COMPRESS=DEFLATE -co PREDICTOR=2 -co ZLEVEL=9 {vrt_path} {output_path}",
        shell=True,
        check=True,
    )

In [16]:
build_prediction_raster(
    input_dir=PREDICT_RESULTS_DIR,
    output_path=PREDICT_RASTER_PATH,
)

0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 1915, 2785
0...10...20...30...40...50...60...70...80...90...100 - done.


## Post-process

### Binarize image with threshold to create mask

In [17]:
# expresion para seleccionar los pixeles mayores al umbral
# se convierte el valor umbral al rango de valores del raster de prediccion (que va de 0 a 255)
exp = f"((A > {int(THRESHOLD * 255)})*199)"

os.makedirs(os.path.dirname(POST_BIN_RASTER_PATH), exist_ok=True)
subprocess.run(
    (
        "gdal_calc.py --quiet --overwrite "
        f"-A {PREDICT_RASTER_PATH} "
        "--A_band=1 "
        f"--outfile {POST_BIN_RASTER_PATH} "
        f'--calc="{exp}" '
        "--NoDataValue=0"
    ),
    shell=True,
    check=True,
)

CompletedProcess(args='gdal_calc.py --quiet --overwrite -A /home/munshkr/dym/immap-flood-areas/data/predict/prob.tif --A_band=1 --outfile /home/munshkr/dym/immap-flood-areas/data/post/bin.tif --calc="((A > 51)*199)" --NoDataValue=0', returncode=0)

### Poligonize binary mask

In [18]:
# funcion para poligonizar las areas seleccionadas con pb mayor al umbral
os.makedirs(os.path.dirname(POST_BIN_VECTOR_PATH), exist_ok=True)
if os.path.exists(POST_BIN_VECTOR_PATH):
    os.unlink(POST_BIN_VECTOR_PATH)
subprocess.run(f'gdal_polygonize.py {POST_BIN_RASTER_PATH} {POST_BIN_VECTOR_PATH}', shell=True, check=True)

0...10...20...30...40...50...60...70...80...90...Creating output /home/munshkr/dym/immap-flood-areas/data/post/bin.gpkg of format GPKG.
100 - done.


CompletedProcess(args='gdal_polygonize.py /home/munshkr/dym/immap-flood-areas/data/post/bin.tif /home/munshkr/dym/immap-flood-areas/data/post/bin.gpkg', returncode=0)

### Filter by min. area

In [19]:
import math

import fiona
from satproc.utils import reproject_shape
from shapely.geometry import shape
from tqdm import tqdm


def filter_by_min_area(src_file, dst_file, min_area=500, utm_code=None):
    if not utm_code:
        utm_code = get_epsg_utm_from(src_file)
        print(f"Using projected CRS {utm_code} for filtering by meters")

    with fiona.open(src_file) as src:
        with fiona.open(
            dst_file, "w", driver="GPKG", crs=src.crs, schema=src.schema
        ) as dst:
            for feature in tqdm(
                src, desc=f"Filtering polygons by area (>={min_area}m)", ascii=True
            ):
                geom = shape(feature["geometry"])
                repr_geom = reproject_shape(geom, src.crs, utm_code)
                if repr_geom.area >= min_area:
                    dst.write(feature)


def get_epsg_utm_from(vector_path):
    """Calculate UTM zone from a vector file in WGS84 geographic coordinates"""
    with fiona.open(vector_path) as src:
        some_feat = next(iter(src), None)
        if not some_feat:
            raise ValueError(f"{vector_path} has no features")
        some_geom = shape(some_feat["geometry"])
        if src.crs["init"] != "epsg:4326":
            some_geom = reproject_shape(some_geom, src.crs["init"], "epsg:4326")
        return get_epsg_utm_from_wgs_geom(some_geom)


def get_epsg_utm_from_wgs_geom(geom):
    """
    Calculate UTM zone from a geometry in WGS84 geographic coordinates and
    get corresponding EPSG code.

    """
    centroid = geom.centroid
    lon, lat = centroid.x, centroid.y
    utm_band = str((math.floor((lon + 180) / 6) % 60) + 1)
    if len(utm_band) == 1:
        utm_band = f"{utm_band}"
    if lat >= 0:
        epsg_code = f"epsg:326{utm_band}"
    else:
        epsg_code = f"epsg:327{utm_band}"
    return epsg_code

In [20]:
filter_by_min_area(POST_BIN_VECTOR_PATH, POST_FILTERED_PATH, min_area=MIN_AREA)

Using projected CRS epsg:32618 for filtering by meters


  in_crs_string = _prepare_from_proj_string(in_crs_string)
Filtering polygons by area (>=750000m): 100%|####################################################################################################################| 5085/5085 [00:37<00:00, 137.10it/s]


### Clip raster with filtered vector mask

In [21]:
# -cutline indica el archivo vectorial que se utiliza para recortar al raster original (input_pred)
os.makedirs(os.path.dirname(OUTPUT_RASTER_PATH), exist_ok=True)
subprocess.run(
    (
        f"gdalwarp -overwrite "
        f"-cutline {POST_FILTERED_PATH} "
        "-crop_to_cutline "
        f"-dstalpha {PREDICT_RASTER_PATH} "
        f"{OUTPUT_RASTER_PATH}"
    ),
    shell=True,
    check=True,
)

Creating output file that is 1756P x 2638L.
Processing /home/munshkr/dym/immap-flood-areas/data/predict/prob.tif [1/1] : 0Using internal nodata values (e.g. 0) for image /home/munshkr/dym/immap-flood-areas/data/predict/prob.tif.
...10...20...30...40...50...60...70...80...90...100 - done.


CompletedProcess(args='gdalwarp -overwrite -cutline /home/munshkr/dym/immap-flood-areas/data/post/filtered_bin.gpkg -crop_to_cutline -dstalpha /home/munshkr/dym/immap-flood-areas/data/predict/prob.tif /home/munshkr/dym/immap-flood-areas/data/results/prob.tif', returncode=0)

In [22]:
import shutil

shutil.copyfile(POST_FILTERED_PATH, OUTPUT_MASK_PATH)

'/home/munshkr/dym/immap-flood-areas/data/results/prob.gpkg'

## Export results as artifacts

In [23]:
!gsutil -m cp -r $OUTPUT_DIR/* $OUTPUT_ARTIFACTS_URL/

Copying file:///home/munshkr/dym/immap-flood-areas/data/results//prob.gpkg [Content-Type=application/geopackage+sqlite3]...
Copying file:///home/munshkr/dym/immap-flood-areas/data/results//prob.tif [Content-Type=image/tiff]...
/ [2/2 files][ 10.0 MiB/ 10.0 MiB] 100% Done                                    
Operation completed over 2 objects/10.0 MiB.                                     
