In [1]:
from pathlib import Path
import geopandas as gpd
import building_zonals as bz

BASE = Path(r'C:\Users\dkerr\Documents\GISRede\buildings\UK\London\data\building_heights_tiles').resolve()
gdf = gpd.read_file(BASE.joinpath('buildings.gpkg'), layer='buildings_uk')
gdf.head(1)

Unnamed: 0,osm_id,name,type,tile_name,geometry
0,2956186,Laurence House,block,TQ37,"POLYGON ((537579.219 173589.264, 537579.375 17..."


In [2]:
RASTERS = [x for x in BASE.joinpath('rasters').iterdir() if x.name.endswith('.tif')]
TILES = [x.name.split('_')[2] for x in RASTERS]
TILES


['TQ36', 'TQ37', 'TQ38']

In [3]:
gdf_in_tiles = gdf[gdf.tile_name.isin(['TQ36','TQ37', 'TQ38'])]
print(len(gdf_in_tiles))

219642


In [4]:
import pandas as pd
df_zonals = pd.read_csv(BASE.joinpath('rasters/tmp/ZONALS.csv'))
print(df_zonals.columns)
df_zonals = df_zonals[['osm_id', 'heights_med']]
print(len(df_zonals))
df_zonals.groupby(['osm_id']).mean()

Index(['osm_id', 'heights_mean', 'heights_med', 'heights_min', 'heights_max',
       'tile_name'],
      dtype='object')
219542


Unnamed: 0_level_0,heights_med
osm_id,Unnamed: 1_level_1
2.131000e+03,1410.0
2.171000e+03,1111.0
5.533000e+03,348.0
1.873600e+04,881.0
1.873700e+04,934.0
...,...
1.116975e+09,740.5
1.116975e+09,747.5
1.116975e+09,728.5
1.116975e+09,733.0


In [11]:
df_final = pd.read_csv(BASE.joinpath('BUILDING_ZONALS.csv'))
display(df_final)
display(df_final.dropna(subset='heights_mean')[['osm_id', 'heights_mean']].groupby('osm_id').mean())

Unnamed: 0,osm_id,heights_mean,heights_min,heights_max,heights_med
0,1.140000e+02,,,,
1,1.480000e+02,,,,
2,4.490000e+02,,,,
3,1.089000e+03,,,,
4,1.256000e+03,,,,
...,...,...,...,...,...
9355179,1.116989e+09,,,,
9355180,1.116989e+09,,,,
9355181,1.116989e+09,,,,
9355182,1.116989e+09,,,,


Unnamed: 0_level_0,heights_mean
osm_id,Unnamed: 1_level_1
1.026363e+06,2820.0
1.439717e+06,0.0
2.143552e+06,3135.0
2.433106e+06,536.0
2.437402e+06,1243.0
...,...
1.046337e+09,861.0
1.046337e+09,784.0
1.046337e+09,840.0
1.068255e+09,1324.0


In [26]:
from typing import Union
def find_missing_buildings_tmp(
        gdf: gpd.GeoDataFrame,
        csv: Union[Path, str]) -> gpd.GeoDataFrame:
    """Returns gdf of buildings in gpkg missing in csv
    
    Args:
    gdf: gpd.GeoDataFrame
    csv : Path to csv

    Returns:
    gdf : Dataframe of missing buildings
    """
    gdf = gdf#.set_index('osm_id')
    df = pd.read_csv(csv)#.set_index('osm_id')
    gdf = gdf[gdf.tile_name.isin(df.tile_name.unique())]
    gdf = gdf[~gdf.osm_id.isin(df.osm_id)]
    gdf['area'] = gdf.area
    gdf['centroid'] = gdf.centroid
    gdf = gdf.set_geometry('centroid')
    df = df[[x for x in df.columns if not x == 'tile_name']]
    #gdf = gdf.join(df, how='left').reset_index()
    heights_col = [x for x in df.columns if x.startswith('heights')][0]
    #gdf = gdf[gdf[heights_col].isna()]
    return gdf

In [27]:
gdf_missing = find_missing_buildings_tmp(gdf, BASE.joinpath('rasters/tmp/ZONALS.csv'))
print(gdf_missing.iloc[41])
gdf_missing

osm_id                                               155363850
name                                         Curved Angel Cafe
type                                                      None
tile_name                                                 TQ38
geometry     POLYGON ((531529.4456662947 182192.05623275065...
area                                                 33.435195
centroid           POINT (531533.6296030761 182192.1451215923)
Name: 1007798, dtype: object


Unnamed: 0,osm_id,name,type,tile_name,geometry,area,centroid
1188,5986805,HSBC UK,office,TQ38,"POLYGON ((537657.389 180341.198, 537665.311 18...",3604.684921,POINT (537691.251 180363.094)
3071,23095299,John Orwell Sports Centre,,TQ38,"POLYGON ((534609.281 180150.507, 534614.112 18...",2121.369210,POINT (534628.000 180199.238)
6497,26183417,Southwark Cathedral,cathedral,TQ38,"POLYGON ((532641.994 180309.682, 532642.516 18...",1808.082765,POINT (532681.735 180306.746)
10531,4578642,,,TQ38,"POLYGON ((538708.425 188495.877, 538718.663 18...",4593.297470,POINT (538768.833 188469.269)
15865,31175906,,,TQ37,"POLYGON ((532438.337 179252.315, 532461.932 17...",270.723834,POINT (532452.932 179258.108)
...,...,...,...,...,...,...,...
8913239,1046336675,,,TQ38,"POLYGON ((531958.704 189714.021, 531964.307 18...",61.876826,POINT (531964.024 189716.765)
8913240,1046336676,,,TQ38,"POLYGON ((531953.664 189717.251, 531959.267 18...",61.959645,POINT (531958.985 189719.988)
8913241,1046336677,,,TQ38,"POLYGON ((531948.631 189720.469, 531954.234 18...",61.835501,POINT (531953.949 189723.211)
9078121,1068255283,9 Missenden,apartments,TQ37,"POLYGON ((532789.796 178037.174, 532794.901 17...",39.343576,POINT (532794.283 178036.584)


In [35]:
gdf_missing.sort_values(by='area')

Unnamed: 0,osm_id,name,type,tile_name,geometry,area,centroid
1478885,204122372,,piling,TQ38,"POLYGON ((532674.404 180602.150, 532674.410 18...",0.025945,POINT (532674.499 180602.151)
4636761,556482992,,,TQ37,"POLYGON ((535525.628 179027.260, 535526.386 17...",0.685687,POINT (535526.133 179026.962)
4636759,556482990,,,TQ37,"POLYGON ((535523.460 179034.335, 535524.273 17...",0.734581,POINT (535523.992 179034.043)
6486866,785294247,Bin store,,TQ37,"POLYGON ((532685.247 179766.992, 532685.262 17...",2.478093,POINT (532686.065 179767.755)
1549043,210409888,,,TQ38,"POLYGON ((537035.733 180780.006, 537036.926 18...",5.120126,POINT (537037.312 180780.345)
...,...,...,...,...,...,...,...
135862,1026363,Perronet House,,TQ37,"POLYGON ((531819.508 179109.928, 531821.005 17...",3027.499754,POINT (531857.114 179126.751)
1188,5986805,HSBC UK,office,TQ38,"POLYGON ((537657.389 180341.198, 537665.311 18...",3604.684921,POINT (537691.251 180363.094)
1054058,2143552,,,TQ38,"POLYGON ((532745.334 181549.656, 532747.448 18...",3876.041512,POINT (532791.307 181569.963)
10531,4578642,,,TQ38,"POLYGON ((538708.425 188495.877, 538718.663 18...",4593.297470,POINT (538768.833 188469.269)


In [34]:
ZONALS = pd.read_csv(BASE.joinpath('rasters/tmp/ZONALS.csv')).set_index('osm_id')
ZONALS[ZONALS.index == 392097978]

Unnamed: 0_level_0,heights_mean,heights_med,heights_min,heights_max,tile_name
osm_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
392097978.0,2282.410539,2779.0,,,TQ38
