## Assign target labels from the cadastre-level classification to the tessellation cells

In [1]:
import geopandas as gpd
import pandas as pd

In [5]:
regions_datadir = "/data/uscuni-eurofab-overture/"
tessellations_dir = '/data/uscuni-eurofab-overture/processed_data/tessellations/'
buildings_dir = '/data/uscuni-eurofab-overture/processed_data/buildings/'

region_hulls = gpd.read_parquet(
        regions_datadir + "regions/" + "ov_ce_region_hulls.parquet"
    )
region_hulls.shape

cadastre_hulls = gpd.read_parquet('/data/uscuni-ulce/regions/cadastre_regions_hull.parquet')

v = 'v3'

In [6]:
def process_region_targets(region_id, region_hull):
    """Match cadastre regions to a specific MS region, and the assign each building in the MS region a label based on spatial intersection."""
    
    # read all buildings that intersect with this ms region
    cadastre_regions_to_read = cadastre_hulls[cadastre_hulls.intersects(region_hull.iloc[0])].index
    cluster_data = pd.concat(
        [
            gpd.read_parquet(f'/data/uscuni-ulce/processed_data/clusters/clusters_{rid}_{v}.pq', columns=['final_without_noise', 'geometry']) 
            for rid in cadastre_regions_to_read
        ], ignore_index=True
    )
    
    # read target tessellation
    region_tessellations = gpd.read_parquet(
            tessellations_dir + f"tessellation_{region_id}.parquet"
    )

    ## assign targets based on intersection between cadastre buildings and eurofab tessellations
    tess_idxs, blg_idxs = cluster_data.sindex.query(region_tessellations.geometry, predicate='intersects')
    target_clusters = cluster_data.iloc[blg_idxs, 0].groupby(tess_idxs).agg(lambda x: pd.Series.mode(x)[0])
    # go from tessellation ilocs to locs
    target_clusters.index = region_tessellations.index[target_clusters.index]
    target_clusters.reset_index().to_parquet(f'/data/uscuni-eurofab-overture/processed_data/target_clusters/{region_id}_target.pq')

Run the assignment funciton for every region.

In [7]:
%%time
for region_id, region_hull in region_hulls.iterrows():
    print(region_id)
    process_region_targets(region_id, region_hull)

4
5
20
356
401
646
687
810
1014
1049
1054
1117
1321
1476
1598
1642
1827
1940
2110
2112
2268
2304
2485
2779
2846
2885
2997
3086
3147
3256
3335
3408
3415
3462
3631
3751
3758
3770
3888
3892
3907
4040
4066
4347
4356
4798
4833
5084
5388
5513
5832
6004
6037
6085
6090
6108
6574
6612
6738
6918
7098
7111
7125
7355
7607
7634
7674
7688
7823
7867
7962
8160
8213
8245
8272
8320
8360
8438
8854
9048
9147
9310
9353
9656
9678
9728
9754
9830
9871
9981
10078
10135
10196
10385
10446
10454
10574
10771
10846
10939
10957
10962
11055
11196
11282
11345
11633
11700
11785
11818
11855
11860
11944
12041
12214
12311
12363
12458
12493
12502
12511
12551
12590
12647
12815
13347
13348
13385
13475
13566
13633
13928
14064
14137
14242
14328
14404
14459
14496
14663
14759
14766
14946
15176
15241
15338
15351
15406
15416
15882
16414
16430
16439
17204
17373
17744
17780
17846
17892
17933
18116
18127
18230
19263
19442
19537
19811
19906
20034
20332
20490
20556
20796
20861
21115
21229
21365
21811
22026
22246
22417
22543
22552
23103

## Explore assignment

In [9]:
region_id = 65806 # prague
region_id = 66292 # prague


from lonboard import SolidPolygonLayer, Map
from lonboard.basemap import CartoBasemap
from lonboard.colormap import apply_categorical_cmap
from palettable.colorbrewer.qualitative import Set3_12
from core.cluster_validation import get_color

In [10]:
region_buildings = gpd.read_parquet(buildings_dir + f'buildings_{region_id}.parquet')
target_clusters = pd.read_parquet(f'/data/uscuni-eurofab-overture/processed_data/target_clusters/{region_id}_target.pq').set_index('index')
building_targets = target_clusters[target_clusters.index >= 0]
region_buildings['label'] = -1
region_buildings.loc[building_targets.index, 'label'] = building_targets.values

In [11]:
plotting = region_buildings.iloc[target_clusters.index]
plotting['geometry'] = region_buildings.simplify(1)

layer = SolidPolygonLayer.from_geopandas(
    gdf=plotting[["geometry", "label"]], get_fill_color=get_color(plotting['label'].values.astype(int)), opacity=0.15
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [1]:
m = Map(layer, basemap_style=CartoBasemap.Positron)
m

In [13]:
layer.get_fill_color = get_color(plotting['label'].values.astype(int))