In [2]:
import geopandas as gpd
import pandas as pd

In [3]:
regions_datadir = "/data/uscuni-eurofab/"
tessellations_dir = '/data/uscuni-eurofab/processed_data/tessellations/'
buildings_dir = '/data/uscuni-eurofab/processed_data/buildings/'

region_hulls = gpd.read_parquet(
        regions_datadir + "regions/" + "ms_ce_region_hulls.parquet"
    )
region_hulls.shape

(474, 1)

In [4]:
cadastre_hulls = gpd.read_parquet('/data/uscuni-ulce/regions/cadastre_regions_hull.parquet')

In [5]:
def process_region_targets(region_id, region_hull):

    # read all buildings that intersect with this data
    cadastre_regions_to_read = cadastre_hulls[cadastre_hulls.intersects(region_hull.iloc[0])].index
    cluster_data = pd.concat(
        [
            gpd.read_parquet(f'/data/uscuni-ulce/processed_data/clusters/{rid}_clusters.pq', columns=['final_without_noise', 'geometry']) 
            for rid in cadastre_regions_to_read
        ], ignore_index=True
    )
    
    # read target tessellation
    region_tessellations = gpd.read_parquet(
            tessellations_dir + f"tessellation_{region_id}.parquet"
    )

    ## assign targets based on intersection between cadastre buildings and eurofab tessellations
    tess_idxs, blg_idxs = cluster_data.sindex.query(region_tessellations.geometry, predicate='intersects')
    target_clusters = cluster_data.iloc[blg_idxs, 0].groupby(tess_idxs).agg(lambda x: pd.Series.mode(x)[0])
    # go from tessellation ilocs to locs
    target_clusters.index = region_tessellations.index[target_clusters.index]
    target_clusters.reset_index().to_parquet(f'/data/uscuni-eurofab/processed_data/target_clusters/{region_id}_target.pq')

In [6]:
%%time
for region_id, region_hull in region_hulls.iterrows():
    print(region_id)
    process_region_targets(region_id, region_hull)

65806
CPU times: user 16.2 s, sys: 984 ms, total: 17.2 s
Wall time: 16.9 s


In [7]:
region_id = 65806

In [8]:
region_buildings = gpd.read_parquet(buildings_dir + f'buildings_{region_id}.parquet')
target_clusters = pd.read_parquet(f'/data/uscuni-eurofab/processed_data/target_clusters/{region_id}_target.pq').set_index('index')
building_targets = target_clusters[target_clusters.index >= 0]
region_buildings['label'] = -1
region_buildings.loc[building_targets.index, 'label'] = building_targets.values

In [9]:
from lonboard import SolidPolygonLayer, Map
from lonboard.basemap import CartoBasemap
from lonboard.colormap import apply_categorical_cmap
from palettable.colorbrewer.qualitative import Set3_12
from core.cluster_validation import get_color

In [10]:
plotting = region_buildings.iloc[target_clusters.index]
plotting['geometry'] = region_buildings.simplify(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [11]:
layer = SolidPolygonLayer.from_geopandas(
    gdf=plotting[["geometry", "label"]], get_fill_color=get_color(plotting['label'].values.astype(int)), opacity=0.15
)



In [None]:
m = Map(layer, basemap_style=CartoBasemap.Positron)
m