In [1]:
%load_ext autoreload
%autoreload 2

# Full country-wide scale

In [2]:
from src.utils.geometry import load_ukraine_admin_polygons

adm1 = load_ukraine_admin_polygons(adm_level=1).rename({'admin_id': 'adm1_id'},axis=1)
adm2 = load_ukraine_admin_polygons(adm_level=2).rename({'admin_id': 'adm2_id'},axis=1)
adm3 = load_ukraine_admin_polygons(adm_level=3).rename({'admin_id': 'adm3_id'},axis=1)
adm4 = load_ukraine_admin_polygons(adm_level=4).rename({'admin_id': 'adm4_id'},axis=1)
d_adm = {i+1: adm for i, adm in enumerate([adm1, adm2, adm3, adm4])}
adm1.shape, adm2.shape, adm3.shape, adm4.shape

((27, 3), (139, 4), (1769, 5), (29708, 6))

In [3]:
import duckdb
from src.constants import PREDS_PATH
import geopandas as gpd

run_name = '240307'
db_name = PREDS_PATH / run_name / 'buildings_preds.db'

db = duckdb.connect(f'{db_name}')

In [4]:
def get_condition_damaged(threshold=0.65):
    post_dates_neg = ['2021-02-24', '2021-05-24', '2021-08-24', '2021-11-24']
    post_dates = ['2022-02-24', '2022-05-24', '2022-08-24', '2022-11-24', '2023-02-24', '2023-05-24', '2023-08-24', '2023-11-24']
    condition = ' OR '.join([f'"{post_date}" >= 255*{threshold}' for post_date in post_dates])
    conditon_neg = ' AND '.join([f'"{post_date}" < 255*{threshold}' for post_date in post_dates_neg])
    condition_damaged = f'({condition}) AND ({conditon_neg})'
    return condition_damaged

In [5]:
def get_df_adm_with_n_buildings(adm_level, threshold=0.65, save=False):

    adm_id = f'adm{adm_level}_id'
    adm_en = f'ADM{adm_level}_EN'
    df_adm = d_adm[adm_level].set_index([adm_id, adm_en])



    df_damaged = db.execute(
        f"""
            SELECT {adm_id}, {adm_en}, COUNT(*) as n_buildings_damaged, SUM(area) as area_damaged
            FROM buildings_preds
            WHERE {get_condition_damaged(threshold)} AND area > 50
            GROUP BY {adm_id}, {adm_en}
        """
    ).fetchdf().set_index([adm_id, adm_en])
    df_intact = db.execute(
        f"""
            SELECT {adm_id}, {adm_en}, COUNT(*) as n_buildings, SUM(area) as area
            FROM buildings_preds
            WHERE area > 50
            GROUP BY {adm_id}, {adm_en}
        """
    ).fetchdf().set_index([adm_id, adm_en])

    df_with_geo = df_adm[['geometry']].join(df_intact, how='left').join(df_damaged, how='left').fillna(0)
    gdf = gpd.GeoDataFrame(df_with_geo, crs=df_adm.crs)

    if save:
        fp = PREDS_PATH / run_name / f"n_buildings_damaged_adm{adm_level}_t{f'{threshold}'.replace('.', '_')}.geojson"
        gdf.to_file(fp, driver='GeoJSON')
        print(f'Saved to {fp}')
    return gdf

In [6]:
d = {}
for i in range(1,5):
    d[i] = get_df_adm_with_n_buildings(i, save=False)

# UNOSAT evaluation

In [12]:
tp = db.execute(
    f"""
        SELECT COUNT(*) as n_buildings_damaged
        FROM buildings_preds
        WHERE (unosat_damage = 1 OR unosat_damage = 2) AND {get_condition_damaged(0.5)}
    """
).fetchone()[0]

fn = db.execute(
    f"""
        SELECT COUNT(*) as n_buildings_damaged
        FROM buildings_preds
        WHERE (unosat_damage = 1 OR unosat_damage = 2) AND NOT ({get_condition_damaged(0.5)})
    """
).fetchone()[0]
tp/(tp+fn)

0.7560808830712122