## Assign target labels from the cadastre-level classification to the tessellation cells

In [3]:
import geopandas as gpd
import pandas as pd

In [4]:
regions_datadir = "/data/uscuni-eurofab/"
tessellations_dir = '/data/uscuni-eurofab/processed_data/tessellations/'
buildings_dir = '/data/uscuni-eurofab/processed_data/buildings/'

region_hulls = gpd.read_parquet(
        regions_datadir + "regions/" + "ms_ce_region_hulls.parquet"
    )
region_hulls.shape

cadastre_hulls = gpd.read_parquet('/data/uscuni-ulce/regions/cadastre_regions_hull.parquet')

v = 'v3'

In [5]:
def process_region_targets(region_id, region_hull):
    """Match cadastre regions to a specific MS region, and the assign each building in the MS region a label based on spatial intersection."""
    
    # read all buildings that intersect with this ms region
    cadastre_regions_to_read = cadastre_hulls[cadastre_hulls.intersects(region_hull.iloc[0])].index
    cluster_data = pd.concat(
        [
            gpd.read_parquet(f'/data/uscuni-ulce/processed_data/clusters/clusters_{rid}_{v}.pq', columns=['final_without_noise', 'geometry']) 
            for rid in cadastre_regions_to_read
        ], ignore_index=True
    )
    
    # read target tessellation
    region_tessellations = gpd.read_parquet(
            tessellations_dir + f"tessellation_{region_id}.parquet"
    )

    ## assign targets based on intersection between cadastre buildings and eurofab tessellations
    tess_idxs, blg_idxs = cluster_data.sindex.query(region_tessellations.geometry, predicate='intersects')
    target_clusters = cluster_data.iloc[blg_idxs, 0].groupby(tess_idxs).agg(lambda x: pd.Series.mode(x)[0])
    # go from tessellation ilocs to locs
    target_clusters.index = region_tessellations.index[target_clusters.index]
    target_clusters.reset_index().to_parquet(f'/data/uscuni-eurofab/processed_data/target_clusters/{region_id}_target.pq')

Run the assignment funciton for every region.

In [4]:
%%time
for region_id, region_hull in region_hulls.iterrows():
    print(region_id)
    process_region_targets(region_id, region_hull)

19
24
33
478
754
817
1049
1485
1677
2415
2513
2707
2785
2790
2820
3228
3307
3313
3357
3540
3661
3762
3806
4271
4285
4640
4763
5175
5189
5320
5429
5874
6337
6351
6477
6858
6881
7113
7381
7411
7640
7693
7728
7921
7924
8014
8087
8147
8440
8659
8927
8960
9560
9840
9887
10197
10283
10600
10673
10764
10875
11024
11178
11550
11623
11640
12080
12222
12247
12347
12401
12546
12614
12649
12695
12736
13285
13496
13497
14086
14327
14383
14605
14836
15151
15308
15362
15415
15540
15560
15646
16446
16582
16687
17219
17720
17763
17808
17857
17874
17951
18006
18143
18215
19325
19474
20008
20063
20356
20573
20597
20811
21128
22022
22398
22633
22770
23227
23621
23941
24079
24141
24683
24737
25065
25497
25588
25814
25964
26146
26642
26773
27374
27700
27783
27997
28059
28060
28237
28601
28751
28795
28961
29387
30259
30571
30662
30938
31101
31696
31807
32671
32890
33094
33150
33415
33718
33769
33803
34553
34902
36043
36064
36227
36327
36396
36457
37246
37360
37371
37637
37789
37937
38374
38429
38584
38606
38

## Explore assignment

In [6]:
region_id = 65806 # prague


from lonboard import SolidPolygonLayer, Map
from lonboard.basemap import CartoBasemap
from lonboard.colormap import apply_categorical_cmap
from palettable.colorbrewer.qualitative import Set3_12
from core.cluster_validation import get_color

In [7]:
region_buildings = gpd.read_parquet(buildings_dir + f'buildings_{region_id}.parquet')
target_clusters = pd.read_parquet(f'/data/uscuni-eurofab/processed_data/target_clusters/{region_id}_target.pq').set_index('index')
building_targets = target_clusters[target_clusters.index >= 0]
region_buildings['label'] = -1
region_buildings.loc[building_targets.index, 'label'] = building_targets.values

In [8]:
plotting = region_buildings.iloc[target_clusters.index]
plotting['geometry'] = region_buildings.simplify(1)

layer = SolidPolygonLayer.from_geopandas(
    gdf=plotting[["geometry", "label"]], get_fill_color=get_color(plotting['label'].values.astype(int)), opacity=0.15
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [9]:
m = Map(layer, basemap_style=CartoBasemap.Positron)
m

Map(basemap_style=<CartoBasemap.Positron: 'https://basemaps.cartocdn.com/gl/positron-gl-style/style.json'>, la…

In [10]:
layer.get_fill_color = get_color(plotting['label'].values.astype(int))