In [1]:
# import packages and libraries

#import osmnx as ox
import geopandas as gpd
from pathlib import Path
import folium 


# init
@authors: [Alexandre Pereira Santos](alexandre.santos(at)lmu.de) & [Charlotta Mirbach](c.mirbach@lmu.de)

## data
- maximum flooded area (binarized)
- municipalities Rio grande do Sul (Brazil)

## tasks
- generate polygons for affected municipalities
- download OSM building for each affected municipality
- identify buildings within flooded area
- export to individual municipality buildings datasets


## functions

In [2]:
def osm_import_buildings(roi):
    # convert to epsg 4326
    roi_coords = roi.to_crs(epsg='4326').envelope

    # list key-value pairs for tags
    # Wiki url: https://wiki.openstreetmap.org/wiki/Key:building
    tags = {'building': True, 'source': True}  # Include 'source' column in tags

    # select relevant columns
    col_list = ['osmid', 'name', 'source', 'geometry', 'building']  # Include 'source' column in col_list

    # load buildings for bbox
    buildings = ox.geometries_from_bbox(north=roi_coords.bounds.values[0][3],
                                        south=roi_coords.bounds.values[0][1],
                                        west=roi_coords.bounds.values[0][0],
                                        east=roi_coords.bounds.values[0][2],
                                        tags=tags)

    buildings.reset_index(inplace=True)

    # filter out the wrong geometries (not polygons) and element types (not 'way')
    buildings = buildings.loc[(buildings.geometry.type.isin(['Polygon', 'MultiPolygon'])) & (buildings.element_type == 'way'), col_list]

    # convert crs to match roi
    buildings.to_crs(roi.crs, inplace=True)

    # clip geometries to roi
    clipped_gdf = gpd.clip(buildings, mask=roi, keep_geom_type=True)
    
    # return
    return clipped_gdf


## preprocessing

### load input data

In [54]:
#input flooded area vector 
flood_path = Path('../data/external/')
flood_file = 'HYD_maximum_flood_extent_IPH_UFRGS_20240506_A.shp' 
flood_gdf = gpd.read_file(flood_path/flood_file)
#flood_gdf.drop(['gridcode','area'], axis=1, inplace=True)

# import the municipality boundaries
mun_path = Path('../data/external/')
mun_file = 'LIM_RS_municipalites_2022_A.shp'
mun_gdf = gpd.read_file(mun_path/mun_file)
mun_gdf = mun_gdf[(mun_gdf['NM_MUN']!='Lagoa dos Patos')]

#import building footprints
foot_path = Path('../../../Dropbox/x/PostDoc/02 colab and other/24 05 RS Floods/UFRGS/DADOS/Zenodo/V05/')
foot_file = 'insumos_googlebuildings_edificacoes_rhguaiba.gpkg'
foot_gdf = gpd.read_file(foot_path/foot_file, layer = 'rhguaiba_google_buildings')
foot_gdf.to_crs(flood_gdf.crs, inplace=True)

print('Flood CRS:', flood_gdf.crs,', Municipality CRS:', mun_gdf.crs, 'Buildings CRS:', foot_gdf.crs)

Flood CRS: EPSG:32722 , Municipality CRS: EPSG:32722 Buildings CRS: EPSG:32722


In [6]:
foot_gdf.to_crs(flood_gdf.crs, inplace=True)

### joining gdfs

In [40]:
join_gdf = foot_gdf.sjoin(mun_gdf, how='inner', predicate="within")
join_gdf.drop(['confidence','index_right','SIGLA_UF','AREA_KM2'], axis=1, inplace=True)
#affected_mun = join_gdf[join_gdf['index_right'].notna()].dissolve('NM_MUN') #.index_right.dropna(axis=0, subset=['index_right'], inplace=True)
#affected_mun.reset_index(inplace=True)
#affected_mun

### check join consistency

In [41]:
foot_gdf.head()

Unnamed: 0,latitude,longitude,area_in_meters,confidence,full_plus_code,geometry
0,-28.848039,-51.572102,27.0563,0.6875,583C5C2H+Q5JG,"POLYGON ((444198.500 6808713.500, 444198.500 6..."
1,-30.026502,-51.051474,84.7454,0.7986,48XCXWFX+9CRX,"POLYGON ((495044.173 6678274.812, 495043.362 6..."
2,-28.805628,-51.295699,216.0186,0.898,583C5PV3+PPVW,"POLYGON ((471152.134 6813507.314, 471155.542 6..."
3,-29.346574,-51.385515,7.1515,0.7107,582CMJ37+9QH4,"POLYGON ((462577.930 6753555.721, 462575.757 6..."
4,-30.667396,-51.931613,109.664,0.8104,48XC83M9+29R3,"POLYGON ((410755.858 6606894.732, 410746.009 6..."


In [42]:
join_gdf.head()

Unnamed: 0,latitude,longitude,area_in_meters,full_plus_code,geometry,CD_MUN,NM_MUN
0,-28.848039,-51.572102,27.0563,583C5C2H+Q5JG,"POLYGON ((444198.500 6808713.500, 444198.500 6...",4323309,Vila Flores
2870,-28.880362,-51.496078,16.7416,583C4G93+VH3G,"POLYGON ((451626.273 6805171.279, 451623.739 6...",4323309,Vila Flores
4275,-28.859079,-51.528961,18.3865,583C4FRC+9C7R,"POLYGON ((448410.267 6807509.061, 448411.112 6...",4323309,Vila Flores
6394,-28.839627,-51.545011,20.6623,583C5F63+4XXW,"POLYGON ((446835.882 6809657.594, 446835.618 6...",4323309,Vila Flores
7671,-28.860752,-51.548561,228.9957,583C4FQ2+MHWR,"POLYGON ((446503.439 6807306.307, 446502.770 6...",4323309,Vila Flores


In [43]:
print(join_gdf.shape, foot_gdf.shape)

(5847115, 7) (5857714, 6)


In [45]:
foot_len = len(foot_gdf.full_plus_code.unique())

In [46]:
foot_len - foot_gdf.shape[0] # full_plus_code is not unique, but repeats in exceptional cases

-56

In [47]:
join_gdf.full_plus_code.isna().describe() # there are no NA values in the full_plus_code in the joined df, though

count     5847115
unique          1
top         False
freq      5847115
Name: full_plus_code, dtype: object

In [48]:
join_full_len = len(join_gdf.full_plus_code.unique())

In [49]:
join_full_len - join_gdf.shape[0] # full_plus_code repeats in the same way it did for the foot_gdf, minus 1

-55

In [50]:
print('there are', foot_gdf.shape[0] - join_gdf.shape[0], 'less observations in the joined df than in the footprints df', 
      'this means', (foot_gdf.shape[0] - join_gdf.shape[0])/foot_gdf.shape[0] * 100, '%')
# less than half a percent seems fair enough

there are 10599 less observations in the joined df than in the footprints df this means 0.18094089264173704 %


### identifying buildings within the flooded area

In [69]:
# eliminating unnecessary columns in the flooded area gdf
# these columns are all filled with a single value, probably a residue from the previous analyis process
flood_gdf.drop(['CD_MUN', 'NM_MUN', 'SIGLA_UF', 'bacia', 'MUN_KM2', 'area_km2'], axis=1, inplace=True)
flood_gdf['flooded'] = 1
flood_gdf


Unnamed: 0,fid,geometry,flooded
0,1.0,"POLYGON ((432485.932 6687734.730, 432400.328 6...",1
1,2.0,"POLYGON ((432400.328 6687309.661, 432485.932 6...",1
2,3.0,"POLYGON ((432203.416 6686331.757, 432096.190 6...",1
3,4.0,"POLYGON ((432096.190 6685799.174, 432203.416 6...",1
4,5.0,"POLYGON ((432080.152 6685719.510, 432082.067 6...",1
...,...,...,...
4714,4715.0,"POLYGON ((495814.422 6715449.995, 495814.407 6...",1
4715,4716.0,"POLYGON ((496109.812 6715649.104, 496109.798 6...",1
4716,4717.0,"POLYGON ((496613.887 6715569.711, 496613.881 6...",1
4717,4718.0,"POLYGON ((496648.698 6715629.445, 496662.436 6...",1


In [67]:
flood_gdf.columns

Index(['fid', 'CD_MUN', 'NM_MUN', 'SIGLA_UF', 'bacia', 'MUN_KM2', 'area_km2',
       'geometry'],
      dtype='object')

In [None]:
flooded_join_gdf  = join_gdf.sjoin(flood_gdf, how='left', predicate="intersects")


In [74]:
flooded_join_gdf.shape[0] - join_gdf.shape[0]

10

In [None]:
flooded_join_gdf.head()

In [75]:
flooded_join_gdf.plot(color='red')

  flooded_join_gdf.plot('flooded', color='red')


### export individual shapefiles for the affected municipalities

In [51]:
join_mun_gdf = mun_gdf.sjoin(flood_gdf,how='left',)
affected_mun = join_mun_gdf[join_mun_gdf['index_right'].notna()].dissolve('NM_MUN') #.index_right.dropna(axis=0, subset=['index_right'], inplace=True)
affected_mun.reset_index(inplace=True)
affected_mun.drop(['index_right','SIGLA_UF'], axis=1, inplace=True)
affected_mun

Unnamed: 0,NM_MUN,geometry,CD_MUN,AREA_KM2,Id
0,Agudo,"POLYGON ((280783.563 6703228.671, 280675.755 6...",4300109,534.624,2.0
1,Alvorada,"POLYGON ((492936.910 6675795.361, 492919.340 6...",4300604,71.700,2.0
2,Arroio do Meio,"POLYGON ((411847.248 6752613.429, 411975.881 6...",4301008,157.088,2.0
3,Arroio dos Ratos,"POLYGON ((421437.093 6668556.192, 421378.385 6...",4301107,425.791,2.0
4,Barra do Ribeiro,"POLYGON ((471898.641 6647458.786, 472204.596 6...",4301909,729.316,2.0
...,...,...,...,...,...
74,Venâncio Aires,"POLYGON ((384109.019 6717301.324, 383911.476 6...",4322608,772.588,2.0
75,Vera Cruz,"POLYGON ((350157.500 6699362.116, 350034.264 6...",4322707,309.621,2.0
76,Veranópolis,"POLYGON ((444228.397 6786584.884, 443912.133 6...",4322806,289.397,2.0
77,Vespasiano Corrêa,"POLYGON ((424131.410 6781322.209, 423765.510 6...",4322855,113.622,2.0


In [52]:
#setting the environment

AOI_path = Path('../data/processed/')
mun_file_list = []

# clipping the buildings to each affected municipality and saving it to a shapefile

for mun in affected_mun.iloc[0:9].iterrows():
    #print(mun[1].geometry)
    roi_gdf = gpd.GeoDataFrame(geometry=[mun[1].geometry],crs=mun_gdf.crs)
    mun_buildings = join_gdf.clip(roi_gdf)
    foot_file = 'URB_' + mun[1].NM_MUN + '_affected_buildings_A.shp'
    mun_file_list.append(foot_file)
    mun_buildings.to_file(AOI_path / foot_file)

  mun_buildings.to_file(AOI_path / foot_file)
  mun_buildings.to_file(AOI_path / foot_file)
  mun_buildings.to_file(AOI_path / foot_file)


KeyboardInterrupt: 