In [None]:
%load_ext autoreload
%autoreload 2

# Evaluate Google Earth Engine model on test data

In [None]:
from src.data.unosat import get_unosat_geometry
from src.postprocessing.preds_buildings import vectorize_xarray_with_gdf
from src.postprocessing.utils import read_fp_within_geo
from src.constants import PREDS_PATH
from src.data.buildings.microsoft_unosat import load_buildings_aoi
from src.data import load_unosat_labels

RUN_NAME = '240224'
AOIS_TEST = [f'UKR{i}' for i in range(1, 19) if i not in [1,2,3,4]]
LABELS_TO_KEEP = [1,2]

## Point-wise

In [None]:
from tqdm import tqdm

labels = load_unosat_labels(AOIS_TEST, labels_to_keep=LABELS_TO_KEEP)
labels_with_preds = labels[['damage', 'geometry', 'aoi']].copy()
for name_period, dates in dict(pre=("2021-02-24", "2022-02-23"), post=("2022-02-24", "2023-02-23")).items():
    print(name_period, dates)

    fp_preds = PREDS_PATH / RUN_NAME / '_'.join(dates) / 'ukraine_padded.tif'
    labels_with_preds[f'preds_{name_period}'] = 0
    for aoi in tqdm(AOIS_TEST):
        geo = get_unosat_geometry(aoi)
        preds = read_fp_within_geo(fp_preds, geo)
        labels_aoi = labels_with_preds[labels_with_preds.aoi== aoi]
        labels_with_preds.loc[labels_aoi.index, f'preds_{name_period}'] = labels_aoi.geometry.apply(lambda geo: preds.sel(x=geo.x, y=geo.y, method='nearest').values[0]) / 255

In [None]:
def compute_metrics(labels_with_preds, threshold=0.5, verbose=0):
    tp = len(labels_with_preds[labels_with_preds.preds_post>=threshold])
    fn = len(labels_with_preds[labels_with_preds.preds_post<threshold])
    fp = len(labels_with_preds[labels_with_preds.preds_pre>=threshold])
    tn = len(labels_with_preds[labels_with_preds.preds_pre<threshold])

    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    f1 = 2 * (precision * recall) / (precision + recall)
    if verbose:
        print(f'precision: {precision:.2f}, recall: {recall:.2f}, f1: {f1:.2f}, n_support: {len(labels_with_preds)}')

    return {'precision': precision, 'recall': recall, 'f1': f1, 'threshold': threshold, 'tp': tp, 'fn': fn, 'fp': fp, 'tn': tn}


In [None]:
print(labels_with_preds.shape)
labels_with_preds_ = labels_with_preds[labels_with_preds.preds_post!=0]
print(labels_with_preds_.shape)
labels_with_preds_ = labels_with_preds_[labels_with_preds_.preds_pre!=0]
print(labels_with_preds_.shape)

In [None]:
threshold = 0.5
compute_metrics(labels_with_preds_, threshold, verbose=1);

In [None]:
import numpy as np
metrics = []
for t in np.arange(0.1, 0.9, 0.02):
    metrics.append(compute_metrics(labels_with_preds, t, verbose=0))

import matplotlib.pyplot as plt
import pandas as pd
metrics = pd.DataFrame(metrics).set_index('threshold')[['precision', 'recall', 'f1']]
metrics.plot();

## Building-wise

In [None]:
all_preds = None
for aoi in tqdm(AOIS_TEST):
    gdf_buildings = load_buildings_aoi(aoi)
    if gdf_buildings.empty:
        print(f'no buildings for {aoi}')
        continue
    gdf_buildings = gdf_buildings[gdf_buildings.damage_5m.isin(LABELS_TO_KEEP)]
    gdf_buildings_with_preds = gdf_buildings.copy()

    for name_period, dates in dict(pre=("2021-02-24", "2022-02-23"), post=("2022-02-24", "2023-02-23")).items():

        fp_preds = PREDS_PATH / RUN_NAME / '_'.join(dates) / 'ukraine_padded.tif'
        geo = get_unosat_geometry(aoi)
        preds = read_fp_within_geo(fp_preds, geo)

        preds_vectorized = vectorize_xarray_with_gdf(preds, gdf_buildings_with_preds, name_id="building_id", verbose=0)
        d_rename = {c: f'{c}_{name_period}' for c in ['weighted_mean', 'max']}
        preds_vectorized = preds_vectorized.rename(columns=d_rename)
        gdf_buildings_with_preds = gdf_buildings_with_preds.merge(preds_vectorized, on="building_id")

    if all_preds is None:
        all_preds = gdf_buildings_with_preds
    else:
        all_preds = pd.concat([all_preds, gdf_buildings_with_preds])
all_preds.to_file('240224_preds_with_buildings.geojson', driver='GeoJSON')

In [None]:
import geopandas as gpd
target_col = 'weighted_mean'
buildings_with_preds = gpd.read_file('240224_preds_with_buildings.geojson')
buildings_with_preds.rename(columns={f'{target_col}_pre': 'preds_pre', f'{target_col}_post': 'preds_post'}, inplace=True)
buildings_with_preds[['preds_pre']] /= 255
buildings_with_preds[['preds_post']] /= 255

In [None]:
print(buildings_with_preds.shape)
buildings_with_preds_ = buildings_with_preds[buildings_with_preds.preds_post!=0]
print(buildings_with_preds_.shape)
buildings_with_preds_ = buildings_with_preds_[buildings_with_preds_.preds_pre!=0]
print(buildings_with_preds_.shape)

In [None]:
compute_metrics(buildings_with_preds_, threshold, verbose=1);

In [None]:
import numpy as np
metrics = []
for t in np.arange(0.1, 0.9, 0.02):
    metrics.append(compute_metrics(buildings_with_preds_, t, verbose=0))

import matplotlib.pyplot as plt
import pandas as pd
metrics = pd.DataFrame(metrics).set_index('threshold')[['precision', 'recall', 'f1']]
metrics.plot();

## Check prediction in settlement that was not affected

In [None]:
from src.data.settlements import load_gdf_settlements
gdf_settlements = load_gdf_settlements()

In [None]:
settlement_id = 2802 # Chernivtsi, does not seem to have been attacked
geo  = gdf_settlements.loc[settlement_id, 'geometry']
#gdf_settlements.loc[[settlement_id]].explore()

In [None]:
from src.postprocessing.preds_buildings import vectorize_xarray_with_gdf
from src.postprocessing.utils import read_fp_within_geo
from src.data.settlements import MSFT_SETTLEMENTS_PATH

# Load precomputed building footprints for the settlement
gdf_buildings = gpd.read_file(MSFT_SETTLEMENTS_PATH / f'{settlement_id}.geojson')
assert not gdf_buildings.empty, f'no buildings for {settlement_id}'
gdf_buildings_with_preds = gdf_buildings.copy()

for name_period, dates in dict(pre=("2021-02-24", "2022-02-23"), post=("2022-02-24", "2023-02-23")).items():

    # Load prediction
    fp_preds = PREDS_PATH / RUN_NAME / "_".join(dates) / "ukraine_padded.tif"
    preds = read_fp_within_geo(fp_preds, geo)

    # Vectorize predictions (weighted mean and max)
    preds_vectorized = vectorize_xarray_with_gdf(preds, gdf_buildings, name_id="building_id", verbose=0)
    preds_vectorized[["weighted_mean", "max"]]/= 255
    d_rename = {c: f'{c}_{name_period}' for c in ['weighted_mean', 'max']}
    preds_vectorized = preds_vectorized.rename(columns=d_rename)
    gdf_buildings_with_preds = gdf_buildings_with_preds.merge(preds_vectorized, on="building_id")

In [None]:
threshold = 0.5
n_destroyed_post = len(gdf_buildings_with_preds[gdf_buildings_with_preds.weighted_mean_post>threshold])
n_destroyed_pre = len(gdf_buildings_with_preds[gdf_buildings_with_preds.weighted_mean_pre>threshold])
n_destroyed_pre, n_destroyed_post

In [None]:
n_destroyed_posts = []
n_destroyed_pres = []
n_destroyed_post_not_pres = []
thresholds = np.arange(0.5, 0.95, 0.05)
for t in np.arange(0.5, 0.95, 0.05):
    n_destroyed_post = len(gdf_buildings_with_preds[gdf_buildings_with_preds.weighted_mean_post>=t])
    n_destroyed_pre = len(gdf_buildings_with_preds[gdf_buildings_with_preds.weighted_mean_pre>=t])
    n_destroyed_post_not_pre = len(gdf_buildings_with_preds[(gdf_buildings_with_preds.weighted_mean_post>=t) & (gdf_buildings_with_preds.weighted_mean_pre<t)])

    if t == 0.5:
        print(n_destroyed_pre, n_destroyed_post, n_destroyed_post_not_pre)

    n_destroyed_posts.append(n_destroyed_post)
    n_destroyed_pres.append(n_destroyed_pre)
    n_destroyed_post_not_pres.append(n_destroyed_post_not_pre)

In [None]:
n_destroyed_posts

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 5))
width = 0.01
x_adjustments = [-width, 0, width]
labels = [
    'Damaged in 2022',
    'Damaged in 2021',
    'Damaged in 2022 but not in 2021'
]
for i, ns in enumerate([n_destroyed_posts, n_destroyed_pres, n_destroyed_post_not_pres]):
    ax.bar(thresholds + x_adjustments[i], ns, width=width, label=labels[i])
ax.legend()
ax.set_xlabel('threshold')
ax.set_ylabel('Number buildings damaged')
ax.set_title('Chernivtsi')
plt.show()