In [None]:
%load_ext autoreload
%autoreload 2

# Split Ukraine into small quadkeys for global predictions

In [None]:
from src.data.quadkeys import load_ukraine_quadkeys_grid
gdf_grid = load_ukraine_quadkeys_grid(zoom=8, clip_to_ukraine=True)
print(gdf_grid.shape)
gdf_grid.explore()

# Adding Overture buildings

In [None]:
from src.data.buildings.overture_quadkeys import load_gdf_overture_qk
gdf_qk = load_gdf_overture_qk(zoom=8)
print(gdf_qk.shape)
gdf_qk.explore('n_buildings')

In [None]:
import warnings
from src.utils.geometry import load_country_boundaries

# in minutes
d_zoom = {
    7: {'time': 72},
    8: {'time': 19},
    9: {'time': 13},
    10: {'time': 15},
    11: {'time': 11}
}
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=UserWarning)

    area_ukr = load_country_boundaries('Ukraine').area
    for zoom in range(7, 12):
        gdf_qk = load_gdf_overture_qk(zoom=zoom)
        area_one_cell = gdf_qk.area.max()
        area = gdf_qk.area.sum()
        d_zoom[zoom]['area'] = area/area_ukr
        d_zoom[zoom]['area_one_cell'] = area_one_cell
        d_zoom[zoom]['t_estimated'] = area / area_one_cell * d_zoom[zoom]['time']
d_zoom

In [None]:
import ee
import geemap
from src.utils.gee import init_gee
from src.data.buildings.overture_quadkeys import load_gdf_overture_qk

init_gee()
from src.gee.constants import ASSETS_PATH

zoom=8
gdf = load_gdf_overture_qk(zoom=zoom)
ee.batch.Export.table.toAsset(
    collection=geemap.geopandas_to_ee(gdf),
    description=f"grid_quadkey_overture_zoom{zoom}",
    assetId=ASSETS_PATH + f"s1tsdd_Ukraine/quadkeys_grid_zoom{zoom}",
).start()

In [None]:
import pandas as pd
from tqdm import tqdm
from src.data.buildings.microsoft import MICROSOFT_BUILDINGS_RAW_PATH
import warnings
from shapely.geometry import box

original_qks = [fp.stem for fp in MICROSOFT_BUILDINGS_RAW_PATH.glob('*.geojson')]
gdf_grid = load_ukraine_quadkeys_grid()
gdf_grid['n_buildings'] = 0
gdf_grid['buildings_bounds'] = None
for original_qk in tqdm(original_qks):

    gdf_grid_ = gdf_grid[gdf_grid.qk.str.startswith(original_qk)].copy()

    gdf_buildings = gpd.read_file(MICROSOFT_BUILDINGS_RAW_PATH / f"{original_qk}.geojson")
    if gdf_buildings.empty:
        continue

    if gdf_grid_.area_in_ukraine.sum() != len(gdf_grid_):
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore',category=UserWarning)
            gdf_buildings = gdf_buildings[gdf_buildings.centroid.intersects(gdf_grid_.unary_union)]

    def buildings_within_geo(row):
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore',category=UserWarning)
            buildings = gdf_buildings[gdf_buildings.centroid.intersects(row.geometry)]

            if buildings.empty:
                return {'n_buildings': 0, 'buildings_bounds': None}
            else:
                return {'n_buildings': len(buildings), 'buildings_bounds': box(*buildings.geometry.make_valid().total_bounds)}

    d_buildings = gdf_grid_.apply(buildings_within_geo, axis=1)
    df_buildings = pd.DataFrame(d_buildings.tolist(), index=gdf_grid_.index)
    gdf_grid.loc[gdf_grid_.index, ('n_buildings', 'buildings_bounds')] = df_buildings

gdf_grid = gdf_grid[gdf_grid.n_buildings > 0].copy()
gdf_grid.geometry = gdf_grid.buildings_bounds
gdf_grid.drop(columns=['buildings_bounds'], inplace=True)
gdf_grid.to_file(PROCESSED_PATH / 'grid_zoom2_with_buildings.geojson', driver='GeoJSON')
print('File saved.')

In [None]:
def create_grid_buildings(zoom=2):
    """takes ~1h, could use multiprocessing"""
    original_qks = [fp.stem for fp in MICROSOFT_BUILDINGS_RAW_PATH.glob('*.geojson')]
    gdf_grid = load_ukraine_quadkeys_grid(zoom=zoom)
    gdf_grid['n_buildings'] = 0
    gdf_grid['buildings_bounds'] = None
    for original_qk in tqdm(original_qks):

        gdf_grid_ = gdf_grid[gdf_grid.qk.str.startswith(original_qk)].copy()

        gdf_buildings = gpd.read_file(MICROSOFT_BUILDINGS_RAW_PATH / f"{original_qk}.geojson")
        if gdf_buildings.empty:
            continue

        if gdf_grid_.area_in_ukraine.sum() != len(gdf_grid_):
            with warnings.catch_warnings():
                warnings.filterwarnings('ignore',category=UserWarning)
                gdf_buildings = gdf_buildings[gdf_buildings.centroid.intersects(gdf_grid_.unary_union)]

        def buildings_within_geo(row):
            with warnings.catch_warnings():
                warnings.filterwarnings('ignore',category=UserWarning)
                buildings = gdf_buildings[gdf_buildings.centroid.intersects(row.geometry)]

                if buildings.empty:
                    return {'n_buildings': 0, 'buildings_bounds': None}
                else:
                    return {'n_buildings': len(buildings), 'buildings_bounds': box(*buildings.geometry.make_valid().total_bounds)}

        d_buildings = gdf_grid_.apply(buildings_within_geo, axis=1)
        df_buildings = pd.DataFrame(d_buildings.tolist(), index=gdf_grid_.index)
        gdf_grid.loc[gdf_grid_.index, ('n_buildings', 'buildings_bounds')] = df_buildings

    gdf_grid = gdf_grid[gdf_grid.n_buildings > 0].copy()
    gdf_grid.geometry = gdf_grid.buildings_bounds
    gdf_grid.drop(columns=['buildings_bounds'], inplace=True)
    gdf_grid.to_file(PROCESSED_PATH / f'grid_zoom{zoom}_with_buildings.geojson', driver='GeoJSON')
    print('File saved.')

def load_grid_buildings(zoom=2):

    fp = PROCESSED_PATH / f'grid_zoom{zoom}_with_buildings.geojson'
    if not fp.exists():
        create_grid_buildings(zoom=zoom)
    return gpd.read_file(fp)

In [None]:
grid_buildings = load_grid_buildings()
grid_buildings.explore('n_buildings', vmax=50000)

In [None]:
from src.utils.geometry import load_country_boundaries
import warnings
with warnings.catch_warnings():
    warnings.filterwarnings('ignore',category=UserWarning)
    ukraine = load_country_boundaries('Ukraine')
    print(f'{len(grid_buildings)} polygons, {100*grid_buildings.area.sum()/ukraine.area:.2f}% of ukraine area')

In [None]:
import matplotlib.pyplot as plt
_,ax = plt.subplots(figsize=(5,4))
ax.hist(grid_buildings.n_buildings.values, bins=200)
ax.set_xlabel('Number of buildings')
ax.set_ylabel('Count')
ax.set_title('Distribution of number of buildings per grid cell')
plt.show()

# Precompute geojson for each new quadkey

In [None]:
grid_buildings = load_grid_buildings().set_index('qk')
grid_buildings.head()

In [None]:
from src.constants import PROCESSED_PATH
folder = PROCESSED_PATH / 'quadkeys_grid'
previous_original_qk = None
for qk, row in grid_buildings.iterrows():

    original_qk = qk[:-2]
    if original_qk != previous_original_qk:
        gdf_buildings = gpd.read_file(MICROSOFT_BUILDINGS_RAW_PATH / f"{original_qk}.geojson")[['geometry']]
        gdf_buildings.index.name = 'building_id'
        gdf_buildings.reset_index(inplace=True)
        gdf_buildings['building_id'] = gdf_buildings['building_id'].apply(lambda x: f"{original_qk}_{x}")
        previous_original_qk = original_qk

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=UserWarning)
        gdf_buildings_qk = gdf_buildings[gdf_buildings.centroid.intersects(row.geometry)]

    gdf_buildings_qk[["geometry"]].to_file(folder / f"{qk}.geojson", driver="GeoJSON")

In [None]:
gdf_buildings_qk

In [None]:
gdf_buildings.index.name = 'building_id'
gdf_buildings.head()