In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import ee
import geemap

from src.gee.utils import init_gee
init_gee()

In [None]:
orbits_train = ee.List([14, 21, 36, 43, 65, 87, 94, 109, 116, 138, 145, 160, 167])
orbits_test = ee.List([14, 36, 65, 87, 116, 138, 160])
orbits_ukraine = [7, 14, 21, 29, 36, 43, 58, 65, 72, 80, 87, 94, 102, 109, 116, 131, 138, 145, 153, 160, 167]

# Train Classifier without intermediate steps (not working)

In [None]:
#from src.gee.constants import ASSETS_PATH
#ds_train = ee.FeatureCollection(ASSETS_PATH + 's1tsdd_Ukraine/unosat_points_train')
#ds_test = ee.FeatureCollection(ASSETS_PATH + 's1tsdd_Ukraine/unosat_points_test')

In [None]:
from src.gee.data.datasets import load_dataset, get_all_start_dates
from omegaconf import OmegaConf

first_start_date = "2020-06-01"
last_start_date = "2022-05-01"
every_n_months = 1
#start_dates = get_all_start_dates(first_start_date, last_start_date, every_n_months=every_n_months)

# start_dates = ["2020-06-01", "2020-10-01", "2021-06-01", "2021-10-01"]
start_dates = ["2020-10-01", "2021-10-01"]
print(f"Start dates: {start_dates}")


cfg_train = OmegaConf.create(
    dict(
        split="train",
        fold=None,
        random_loc=0,
        keep_damage=[1, 2],
        n_tiles=32,
        extract_window=30,
        start_dates=start_dates,
        save_if_doesnt_exist=True,
    )
)

cfg_test = OmegaConf.create(
    dict(
        split="test",
        fold=None,
        random_loc=0,
        keep_damage=[1, 2],
        n_tiles=32,
        extract_window=30,
        start_dates=["2020-10-01", "2021-10-01"],
        save_if_doesnt_exist=True,
    )
)

ds_train = load_dataset(**cfg_train).select(['unosat_id', 'orbit', 'label'])
ds_test = load_dataset(**cfg_test).select(['unosat_id', 'orbit', 'label'])


In [None]:
from src.utils.geometry import load_country_boundaries
from src.utils.gee import shapely_to_gee
ukraine_geo = load_country_boundaries('Ukraine')
ukraine_geo_ee = shapely_to_gee(ukraine_geo)

In [None]:
from src.gee.data.unosat import get_unosat_geo
from src.gee.constants import TRAIN_AOIS, TEST_AOIS

def get_fc_aois(aois):
    fs = []
    for aoi in aois:
        geo = get_unosat_geo(aoi)
        fs.append(ee.Feature(geo, {'name': aoi}))
    return ee.FeatureCollection(fs)

geo_train = get_fc_aois(TRAIN_AOIS)
geo_test = get_fc_aois(TEST_AOIS)

In [None]:
stats_reducers = (
    ee.Reducer.mean()
    .combine(reducer2=ee.Reducer.stdDev(), sharedInputs=True)
    .combine(reducer2=ee.Reducer.median(), sharedInputs=True)
    .combine(reducer2=ee.Reducer.max(), sharedInputs=True)
    .combine(reducer2=ee.Reducer.min(), sharedInputs=True)
    .combine(reducer2=ee.Reducer.skew(), sharedInputs=True)
    .combine(reducer2=ee.Reducer.kurtosis(), sharedInputs=True)
    .combine(reducer2=ee.Reducer.variance(), sharedInputs=True)
)

In [None]:
def compute_stats_orbit(orbit, geo, ds):

    s1 = (
        ee.ImageCollection("COPERNICUS/S1_GRD")
        .filter(ee.Filter.listContains("transmitterReceiverPolarisation", "VV"))
        .filter(ee.Filter.listContains("transmitterReceiverPolarisation", "VH"))
        .filter(ee.Filter.eq("instrumentMode", "IW"))
        .filter(ee.Filter.eq("platform_number", "A"))
        .filter(ee.Filter.eq("relativeOrbitNumber_start", orbit))
        .filterBounds(geo)
        .select(['VV', 'VH'])
    )

    def compute_stats_labels(label):
        s1_label = ee.ImageCollection(ee.Algorithms.If(
            label,
            s1.filterDate('2021-10-01', '2022-10-01'),
            s1.filterDate('2020-10-01', '2021-10-01')
        ))
        stats = s1_label.reduce(stats_reducers)

        def sample_region(f):
            local_stats = stats.reduceRegion(
                reducer=ee.Reducer.first(),
                geometry=f.geometry(),
                scale=10,
            )
            return f.set(local_stats)

        ds_orbit_label = ds.filterMetadata('orbit', 'equals', orbit).filterMetadata('label', 'equals', label)
        fc_label = ds_orbit_label.map(sample_region)
        return fc_label

    fc = ee.FeatureCollection(ee.List([0, 1]).map(compute_stats_labels)).flatten()
    return fc

orbits_train = ee.List([14, 21, 36, 43, 65, 87, 94, 109, 116, 138, 145, 160, 167])
orbits_test = ee.List([14, 36, 65, 87, 116, 138, 160])
fc_train = ee.FeatureCollection(orbits_train.map(lambda orbit: compute_stats_orbit(orbit, geo_train, ds_train))).flatten()
# fc_train_post = ee.FeatureCollection(orbits_train.map(lambda orbit: compute_stats_orbit(orbit, label=1))).flatten()
# fc_train = fc_train_pre.merge(fc_train_post)
fc_test = ee.FeatureCollection(orbits_test.map(lambda orbit: compute_stats_orbit(orbit, geo_test, ds_test))).flatten()
# fc_test_post = ee.FeatureCollection(orbits_test.map(lambda orbit: compute_stats_orbit(orbit, label=1))).flatten()
# fc_test = fc_test_pre.merge(fc_test_post)

In [None]:
fc_train.size()

In [None]:
bands = fc_train.first().propertyNames().filter(
    ee.Filter.Or(ee.Filter.stringStartsWith("item", "VV_"), ee.Filter.stringStartsWith("item", "VH_"))
)

In [None]:
bands.getInfo()

In [None]:
clf = ee.Classifier.smileRandomForest(50).train(fc_train, 'label', bands)

In [None]:
preds = fc_test.classify(clf)

### Not working from here

In [None]:
from src.gee.classification.utils import compute_metrics
compute_metrics(preds)

In [None]:
trained_clf = clf.setOutputMode('PROBABILITY')
preds_proba = ds_test.classify(trained_clf)

In [None]:
def aggregate_predictions(preds):
    unique_dates = preds.aggregate_array("startDate").distinct()

    def aggregate_date(date):
        preds_date = preds.filter(ee.Filter.eq("startDate", date))
        unique_ids = preds_date.aggregate_array("unosat_id").distinct()

        def aggregate_id(id):
            all_preds_date_id = preds_date.filter(ee.Filter.eq("unosat_id", id))
            geo = all_preds_date_id.first().geometry()
            new_props = {
                "label": ee.String(all_preds_date_id.first().get("label")),
                "unosat_id": ee.String(id),
                "start_date": ee.String(date),
                "classification": all_preds_date_id.aggregate_mean("classification"),
            }
            new_feature = ee.Feature(ee.Geometry(geo), new_props)
            return new_feature

        _preds = ee.FeatureCollection(unique_ids.map(aggregate_id))
        return _preds

    return ee.FeatureCollection(unique_dates.map(aggregate_date)).flatten()

In [None]:
agg_preds = aggregate_predictions(preds_proba)
agg_preds = agg_preds.map(lambda f: f.set("classification_bin", ee.Number(f.get("classification")).gte(0.5)))
compute_metrics(agg_preds, preds_name="classification_bin")

# Train Classifier from precomputed features

## Dataset

In [None]:
from src.gee.data.datasets import load_dataset, get_all_start_dates
from omegaconf import OmegaConf

# first_start_date = "2020-06-01"
# last_start_date = "2022-05-01"
# every_n_months = 1
# start_dates = get_all_start_dates(first_start_date, last_start_date, every_n_months=every_n_months)

# start_dates = ["2020-06-01", "2020-10-01", "2021-06-01", "2021-10-01"]
start_dates = ["2020-10-01", "2021-10-01"]
print(f"Start dates: {start_dates}")


cfg_train = OmegaConf.create(
    dict(
        split="train",
        fold=None,
        random_loc=0,
        keep_damage=[1, 2],
        n_tiles=32,
        extract_window=30,
        start_dates=start_dates,
        save_if_doesnt_exist=True,
    )
)

cfg_test = OmegaConf.create(
    dict(
        split="test",
        fold=None,
        random_loc=0,
        keep_damage=[1, 2],
        n_tiles=32,
        extract_window=30,
        start_dates=["2020-10-01", "2021-10-01"],
        save_if_doesnt_exist=True,
    )
)

ds_train = load_dataset(**cfg_train)
ds_test = load_dataset(**cfg_test)

In [None]:
# def get_dataset_start_dates(
#     start_dates,
#     split='train',
#     fold=None,
#     random_loc=0,
#     keep_damage=[1,2],
#     n_tiles=32,
#     extract_window=30,
#     save_if_doesnt_exist=True
# ):
#     start_dates = start_dates if isinstance(start_dates, list) else [start_dates]
#     cfg = OmegaConf.create(
#         dict(
#             split=split,
#             fold=fold,
#             random_loc=random_loc,
#             keep_damage=keep_damage,
#             n_tiles=n_tiles,
#             extract_window=extract_window,
#             start_dates=start_dates,
#             save_if_doesnt_exist=save_if_doesnt_exist,
#         )
#     )
#     return load_dataset(**cfg)

## Classifier

In [None]:
n_trees = 50
#classifier = ee.Classifier.smileGradientTreeBoost(n_trees)
classifier = ee.Classifier.smileRandomForest(n_trees)
features_names = [
    'VV_mean','VV_stdDev','VV_median','VV_max','VV_min','VV_skew',
    'VV_kurtosis','VV_variance','VH_mean','VH_stdDev','VH_median',
    'VH_max','VH_min','VH_skew','VH_kurtosis','VH_variance',
    'VV_ptp', 'VH_ptp',
    'VV_mean_slice0', 'VV_stdDev_slice0',
    'VV_mean_slice1', 'VV_stdDev_slice1', 'VV_mean_slice2',
    'VV_stdDev_slice2', 'VV_mean_slice3', 'VV_stdDev_slice3',
    'VV_mean_slice4', 'VV_stdDev_slice4', 'VV_mean_slice5',
    'VV_stdDev_slice5', 'VV_mean_slice6', 'VV_stdDev_slice6',
    'VV_mean_slice7', 'VV_stdDev_slice7',
    'VH_mean_slice0', 'VH_stdDev_slice0',
    'VH_mean_slice1', 'VH_stdDev_slice1', 'VH_mean_slice2',
    'VH_stdDev_slice2', 'VH_mean_slice3', 'VH_stdDev_slice3',
    'VH_mean_slice4', 'VH_stdDev_slice4', 'VH_mean_slice5',
    'VH_stdDev_slice5', 'VH_mean_slice6', 'VH_stdDev_slice6',
    'VH_mean_slice7', 'VH_stdDev_slice7'
]
trained_clf = classifier.train(features=ds_train.merge(ds_test), classProperty="label", inputProperties=ee.List(features_names))

In [None]:
# first_start_date = "2020-06-01"
# last_start_date = "2021-12-01"
# every_n_months = 2
# start_dates = get_all_start_dates(first_start_date, last_start_date, every_n_months=every_n_months)

# start_dates = ['2020-06-01', '2020-10-01', '2021-06-01', '2021-10-01']

# ds_test = get_dataset_start_dates(["2020-10-01", "2021-10-01"], split='test')
# classifier = ee.Classifier.smileRandomForest(50)
# ds_train = None
# for start_date in start_dates:
#     ds_train = ds_train.merge(get_dataset_start_dates(start_date)) if ds_train else get_dataset_start_dates(start_date)


# classifier = classifier.train(features=ds_train, classProperty="label", inputProperties=ee.List(features_names))

# preds = ds_test.classify(classifier)
# # compute_metrics(preds)

In [None]:
from src.gee.classification.utils import infer_and_compute_metrics
infer_and_compute_metrics(ds_test, trained_clf, aggregate_preds=True)

## Classification entire country

In [None]:
from src.utils.geometry import load_country_boundaries
from src.utils.gee import shapely_to_gee
ukraine_geo = load_country_boundaries('Ukraine')
ukraine_geo_ee = shapely_to_gee(ukraine_geo)

In [None]:
n_tiles = 32
start_date = ee.Date("2021-10-01")
end_date = start_date.advance(12*(n_tiles+1)-1, 'day')
s1 = (
    ee.ImageCollection("COPERNICUS/S1_GRD")
    .filter(ee.Filter.listContains("transmitterReceiverPolarisation", "VV"))
    .filter(ee.Filter.listContains("transmitterReceiverPolarisation", "VH"))
    .filter(ee.Filter.eq("instrumentMode", "IW"))
    .filter(ee.Filter.eq("platform_number", "A"))
    .filterDate(start_date, end_date)
    .filterBounds(ukraine_geo_ee)
    .select(['VV', 'VH'])
)

In [None]:
from src.gee.classification.features_extractor import manual_stats_from_s1

trained_clf = trained_clf.setOutputMode('PROBABILITY')


def infer_orbit(orbit):

    s1_orbit = s1.filter(ee.Filter.eq('relativeOrbitNumber_start', orbit))
    stats_orbit = manual_stats_from_s1(s1_orbit, start_date)

    preds_proba_orbit = stats_orbit.classify(trained_clf)
    return preds_proba_orbit

orbits= ee.List(orbits_ukraine)
orbits_preds = ee.ImageCollection(orbits.map(infer_orbit)).mean()

In [None]:
threshold = 0.75
orbits_preds = orbits_preds.updateMask(orbits_preds.gt(threshold))

urban = ee.ImageCollection("GOOGLE/DYNAMICWORLD/V1").filterDate("2020-02-24", "2022-02-24").mean().select("built")
orbits_preds = orbits_preds.updateMask(urban.gt(0.1))

In [None]:
map = geemap.Map()
map.centerObject(ukraine_geo_ee, 6)
map.addLayer(urban.updateMask(urban.gt(0.1)), {'min': 0, 'max': 1, 'palette': ['yellow', 'red']}, 'urban')
map

In [None]:
map = geemap.Map()
map.centerObject(ukraine_geo_ee, 6)
map.addLayer(orbits_preds, {'min': threshold, 'max': 1, 'palette': ['yellow', 'red']}, 'urban_preds_above')
map

# Pipeline export global predictions

In [None]:
from omegaconf import OmegaConf

from src.gee.data.datasets import load_dataset
from src.utils.geometry import load_country_boundaries
from src.utils.gee import shapely_to_gee
from src.gee.classification.features_extractor import manual_stats_from_s1

def preds_full_pipeline(start_dates_training, random_loc, n_tiles, start_date_inference, geo_inference, orbits_inference=None, verbose=1):
    # training dataset
    cfg_train = OmegaConf.create(
        dict(
            split="train",
            fold=None,
            random_loc=random_loc,
            keep_damage=[1, 2],
            n_tiles=n_tiles,
            extract_window=30,
            start_dates=start_dates_training,
            save_if_doesnt_exist=True,
            verbose=verbose
        )
    )
    ds_train = load_dataset(**cfg_train)
    if verbose:
        print(f'start_dates_training: {start_dates_training} - random_loc: {random_loc} - n_tiles: {n_tiles}')

    # train classifier
    classifier = ee.Classifier.smileRandomForest(50)
    features_names = [
        'VV_mean','VV_stdDev','VV_median','VV_max','VV_min','VV_skew',
        'VV_kurtosis','VV_variance','VH_mean','VH_stdDev','VH_median',
        'VH_max','VH_min','VH_skew','VH_kurtosis','VH_variance',
        'VV_ptp', 'VH_ptp',
        'VV_mean_slice0', 'VV_stdDev_slice0',
        'VV_mean_slice1', 'VV_stdDev_slice1', 'VV_mean_slice2',
        'VV_stdDev_slice2', 'VV_mean_slice3', 'VV_stdDev_slice3',
        'VV_mean_slice4', 'VV_stdDev_slice4', 'VV_mean_slice5',
        'VV_stdDev_slice5', 'VV_mean_slice6', 'VV_stdDev_slice6',
        'VV_mean_slice7', 'VV_stdDev_slice7',
        'VH_mean_slice0', 'VH_stdDev_slice0',
        'VH_mean_slice1', 'VH_stdDev_slice1', 'VH_mean_slice2',
        'VH_stdDev_slice2', 'VH_mean_slice3', 'VH_stdDev_slice3',
        'VH_mean_slice4', 'VH_stdDev_slice4', 'VH_mean_slice5',
        'VH_stdDev_slice5', 'VH_mean_slice6', 'VH_stdDev_slice6',
        'VH_mean_slice7', 'VH_stdDev_slice7'
    ]
    trained_clf = classifier.train(features=ds_train, classProperty="label", inputProperties=ee.List(features_names))
    if verbose:
        print('Classifier trained.')

    # Sentinel-1 data
    start_date_ee  = ee.Date(start_date_inference)
    end_date_ee = start_date_ee.advance(12*(n_tiles+1)-1, 'day')
    s1 = (
        ee.ImageCollection("COPERNICUS/S1_GRD")
        .filter(ee.Filter.listContains("transmitterReceiverPolarisation", "VV"))
        .filter(ee.Filter.listContains("transmitterReceiverPolarisation", "VH"))
        .filter(ee.Filter.eq("instrumentMode", "IW"))
        .filter(ee.Filter.eq("platform_number", "A"))
        .filterDate(start_date_ee, end_date_ee)
        .filterBounds(geo_inference)
        .select(['VV', 'VH'])
    )
    if verbose:
        print(f'Sentinel-1 data loaded from {start_date_inference}')

    # Inference for each orbit and mean
    trained_clf = trained_clf.setOutputMode('PROBABILITY')
    def infer_orbit(orbit):
        s1_orbit = s1.filter(ee.Filter.eq('relativeOrbitNumber_start', orbit))
        stats_orbit = manual_stats_from_s1(s1_orbit, start_date_inference)
        preds_proba_orbit = stats_orbit.classify(trained_clf)
        return preds_proba_orbit

    if orbits_inference is None:
        orbits_inference = s1.aggregate_array('relativeOrbitNumber_start').distinct()
    orbits_preds = ee.ImageCollection(ee.List(orbits_inference).map(infer_orbit)).mean()
    if verbose:
        print('Inference done.')
    return orbits_preds

In [None]:
cfg_inference = dict(
    # training
    start_dates_training = ["2020-10-01", "2021-10-01"],
    random_loc = 0,
    n_tiles = 32,
    # inference
    start_date_inference = "2021-10-01",
    geo_inference = shapely_to_gee(load_country_boundaries('Ukraine')),
    orbits_inference = orbits_ukraine,
)
preds = preds_full_pipeline(**cfg_inference)

In [None]:
#  Export it
task = ee.batch.Export.image.toDrive(
    image=preds.multiply(2**8-1).toUint8(),
    description=f'Predition-Ukraine-{cfg_inference["start_date_inference"]}',
    folder = 'S1tsdd-Ukraine',
    fileNamePrefix=f'preds_Ukraine_{cfg_inference["start_date_inference"]}_2dates',
    region=cfg_inference['geo_inference'],
    scale=10,
    maxPixels=1e13
)
task.start()

# Global predictions but for each settlements separately

In [None]:
from pathlib import Path
from src.gee.constants import ASSETS_PATH


def infer_for_all_settlements(settlements, start_dates_training, random_loc, n_tiles, start_date_inference):

    def infer_one_settlements(f):
        geo = f.geometry()
        preds = preds_full_pipeline(
            start_dates_training,
            random_loc,
            n_tiles,
            start_date_inference,
            geo,
            orbits_inference=None,
            verbose=0
        )
        preds = preds.set('settlement_id', f.get('settlement_id'))
        return preds

    preds = settlements.map(infer_one_settlements)
    return preds

In [None]:
settlements = ee.FeatureCollection(ASSETS_PATH + 's1tsdd_Ukraine/ukraine_settlements')
settlements = settlements.limit(5)
cfg_inference = {
    "start_dates_training": ["2020-10-01", "2021-10-01"],
    "random_loc": 0,
    "n_tiles": 32,
    "start_date_inference": "2021-10-01",
}
preds = infer_for_all_settlements(settlements, **cfg_inference)

In [None]:
id_ = 3
folder = "settlements_preds_2021-10-01_2dates_32d"
name = f'settlement_{id_}'
pred_settlement = ee.Image(preds.filterMetadata('settlement_id', 'equals', id_).first())
geo = settlements.filterMetadata('settlement_id', 'equals', id_).first().geometry()
description = f'Ukraine_settlement_{id_}_2021-10-01_2dates_32d'
# geemap.ee_export_image(
#     pred_settlement.multiply(2**8-1).toUint8(),
#     filename=f'{name}.tif',
#     scale=10,
#     region=geo
# )
# task = ee.batch.Export.image.toDrive(
#     image=pred_settlement.multiply(2**8-1).toUint8(),
#     description=description,
#     folder=folder,
#     fileNamePrefix=name,
#     region=geo,
#     scale=10,
# )
# task.start()

In [None]:
map = geemap.Map()
map.addLayer(pred_settlement, {'min': 0, 'max': 1, 'palette': ['yellow', 'red']}, 'urban_preds_above')
map

In [None]:
folder = "settlements_preds_2021-10-01_2dates_32d"
for id_ in settlements.aggregate_array('settlement_id').getInfo():
    name = f'settlement_{id_}'
    pred_settlement = ee.Image(preds.filterMetadata('settlement_id', 'equals', id_).first())
    geo = settlements.filterMetadata('settlement_id', 'equals', id_).first().geometry()
    description = f'Ukraine_settlement_{id_}_2021-10-01_2dates_32d'
    task = ee.batch.Export.image.toDrive(
        image=pred_settlement.multiply(2**8-1).toUint8(),
        description=description,
        folder=folder,
        fileNamePrefix=name,
        region=geo,
        scale=10,
    )
    task.start()

In [None]:
from src.gee.utils import draw_polygon_edges
threshold = 0.65
pred_settlement = pred_settlement.updateMask(pred_settlement.gt(threshold))

#urban = ee.ImageCollection("GOOGLE/DYNAMICWORLD/V1").filterDate("2020-02-24", "2022-02-24").mean().select("built")
#orbits_preds = orbits_preds.updateMask(urban.gt(0.1))

map = geemap.Map()
map.centerObject(geo, 12)
draw_polygon_edges(geo, map)
map.addLayer(pred_settlement, {'min': threshold, 'max': 1, 'palette': ['yellow', 'red']}, 'urban_preds_above')
map

In [None]:
map = geemap.Map()
map.centerObject(geo, 12)
map.addLayer(pred_settlement.multiply(2**8-1).toUint8(), {'min': 128, 'max': 255, 'palette': ['yellow', 'red']}, 'preds')
map

In [None]:
list_tasks = []
list_errors = []
folder = "settlements_preds_2021-10-01_2dates_32d"
for id_ in settlements.aggregate_array('settlement_id').getInfo():

    name = f'settlement_{id_}'
    try:
        pred_settlement = ee.Image(preds.filterMetadata('settlement_id', 'equals', id_).first())
        geo = settlements.filterMetadata('settlement_id', 'equals', id_).first().geometry()
        description = f'Ukraine_settlement_{id_}_2021-10-01_2dates_32d'
        task = ee.batch.Export.image.toDrive(
            image=pred_settlement.multiply(2**8-1).toUint8(),
            description=description,
            folder=folder,
            fileNamePrefix=name,
            region=geo,
            scale=10,

        )
        task.start()
        list_tasks.append(name)
        if id_ % 10 == 0:
            print(f'Exporting {name}...')
    except Exception as e:
        print(e)
        list_errors.append(name)

In [None]:
from src.gee.utils import draw_polygon_edges
threshold = 0.65
preds = preds.updateMask(preds.gt(threshold))

#urban = ee.ImageCollection("GOOGLE/DYNAMICWORLD/V1").filterDate("2020-02-24", "2022-02-24").mean().select("built")
#orbits_preds = orbits_preds.updateMask(urban.gt(0.1))

map = geemap.Map()
map.centerObject(settlement, 12)
draw_polygon_edges(settlement.geometry(), map)
map.addLayer(preds, {'min': threshold, 'max': 1, 'palette': ['yellow', 'red']}, 'urban_preds_above')
map