## Processing

In [1]:
import geopandas as gpd
import pandas as pd
import glob

def get_processed_region_ids(directory):
    return [int(s.split('_')[-1].split('.')[0]) for s in glob.glob(directory + '*')]

In [2]:
regions_buildings_dir = '/data/uscuni-eurofab/regions/buildings/'
buildings_dir = '/data/uscuni-eurofab/processed_data/buildings/'
overture_streets_dir = '/data/uscuni-eurofab/overture_streets/'
streets_dir = '/data/uscuni-eurofab/processed_data/streets/'
enclosures_dir = '/data/uscuni-eurofab/processed_data/enclosures/'
tessellations_dir = '/data/uscuni-eurofab/processed_data/tessellations/'
graph_dir = '/data/uscuni-eurofab/processed_data/neigh_graphs/'
chars_dir = '/data/uscuni-eurofab/processed_data/chars/'
simplfied_buildings_dir = '/data/uscuni-eurofab/processed_data/simplified_buildings/'


regions_datadir = "/data/uscuni-eurofab/"

In [3]:
region_hulls = gpd.read_parquet(
        regions_datadir + "regions/" + "ms_ce_region_hulls.parquet"
    )
region_hulls.shape

(474, 1)

## process buildings

In [4]:
from core.generate_buildings import read_region_buildings, process_region_buildings

In [5]:
def process_single_region_buildings(region_id):
    print('processing', region_id)
    buildings = gpd.read_parquet(regions_buildings_dir + f'buildings_{region_id}.pq')
    buildings = process_region_buildings(buildings, True, simplification_tolerance=.1, merge_limit=25)
    buildings.to_parquet(simplfied_buildings_dir + f"buildings_{region_id}.parquet")

In [None]:
%%capture cap
for region_id, _ in region_hulls.iterrows():
    process_single_region_buildings(region_id)

In [10]:
# !cp -r /data/uscuni-eurofab/processed_data/simplified_buildings/ /data/uscuni-eurofab/processed_data/buildings/

## process streets

In [11]:
from core.generate_streets import process_region_streets

In [None]:
%%capture cap
for region_id, _ in region_hulls.iterrows():
    streets = process_region_streets(region_id, overture_streets_dir, buildings_dir)
    streets.to_parquet(streets_dir + f'streets_{region_id}.parquet')

1

## process elements

In [4]:
from core.generate_elements import process_region_elements, generate_enclosures_representative_points, generate_tess

In [5]:
processed_region_ids = get_processed_region_ids(tessellations_dir)


In [6]:
for region_id, _ in region_hulls[~region_hulls.index.isin(processed_region_ids)].iterrows():
    enclosures, tesselations = process_region_elements(buildings_dir, streets_dir, region_id)

    enclosures.to_parquet(enclosures_dir + f"enclosure_{region_id}.parquet")
    print("Processed enclosures")
    
    ## save files
    tesselations.to_parquet(
        tessellations_dir + f"tessellation_{region_id}.parquet"
    )
    print("processed tesselations")

---- Processing region:  109005 2024-10-30 10:06:52.742627
Problem with topology,  TopologyException: side location conflict at 4949043.1038501924 3416357.1029251725. This can occur if the input geometry is invalid.
Processed enclosures
processed tesselations
---- Processing region:  109050 2024-10-30 10:07:26.743389
Processed enclosures
processed tesselations
---- Processing region:  109585 2024-10-30 10:07:47.451349
Retrying tesselation with less buildings, potentially changing building data.
Dropping 1 buildings due to tesselation problems
Processed enclosures
processed tesselations
---- Processing region:  109676 2024-10-30 10:09:08.122266
Processed enclosures
processed tesselations
---- Processing region:  109949 2024-10-30 10:09:14.117998
Processed enclosures
processed tesselations
---- Processing region:  110157 2024-10-30 10:09:41.488838




Processed enclosures
processed tesselations
---- Processing region:  110373 2024-10-30 10:10:16.750592
Processed enclosures
processed tesselations
---- Processing region:  111252 2024-10-30 10:10:30.123462
Processed enclosures
processed tesselations
---- Processing region:  111314 2024-10-30 10:10:38.858915
Processed enclosures
processed tesselations
---- Processing region:  111594 2024-10-30 10:16:44.142521
Processed enclosures
processed tesselations
---- Processing region:  111672 2024-10-30 10:16:52.813275
Processed enclosures
processed tesselations
---- Processing region:  111683 2024-10-30 10:17:09.975325
Processed enclosures
processed tesselations
---- Processing region:  111801 2024-10-30 10:17:14.989017
Processed enclosures
processed tesselations
---- Processing region:  112098 2024-10-30 10:17:37.984215
Processed enclosures
processed tesselations
---- Processing region:  112842 2024-10-30 10:17:42.138408
Processed enclosures
processed tesselations
---- Processing region:  1129



Processed enclosures
processed tesselations
---- Processing region:  125294 2024-10-30 10:31:33.755284
Processed enclosures
processed tesselations
---- Processing region:  125389 2024-10-30 10:32:02.672615
Processed enclosures
processed tesselations
---- Processing region:  125591 2024-10-30 10:32:22.993008
Processed enclosures
processed tesselations
---- Processing region:  125840 2024-10-30 10:32:32.609490
Processed enclosures
processed tesselations
---- Processing region:  126119 2024-10-30 10:32:47.296417
Retrying tesselation with less buildings, potentially changing building data.
Dropping 1 buildings due to tesselation problems
Processed enclosures
processed tesselations
---- Processing region:  126381 2024-10-30 10:36:41.841199
Processed enclosures
processed tesselations
---- Processing region:  126542 2024-10-30 10:37:24.054340
Processed enclosures
processed tesselations
---- Processing region:  126723 2024-10-30 10:38:20.290779
Processed enclosures
processed tesselations
---- 

## process graphs

In [4]:
from core.generate_ngraphs import process_region_graphs

In [None]:
for region_id, _ in region_hulls.iterrows():

    process_region_graphs(
        region_id,
        graph_dir,
        buildings_dir,
        streets_dir,
        enclosures_dir,
        tessellations_dir,
    )

## process chars

In [None]:
from core.generate_chars import process_single_region_chars

In [None]:
for region_id, _ in region_hulls.iterrows():

    process_single_region_chars(
        region_id,
        graph_dir,
        buildings_dir,
        streets_dir,
        enclosures_dir,
        tessellations_dir,
        chars_dir
    )

## merge data

In [26]:
tessellation = gpd.read_parquet(chars_dir + f"tessellations_chars_{region_id}.parquet")
buildings = gpd.read_parquet(chars_dir + f"buildings_chars_{region_id}.parquet")
enclosures = gpd.read_parquet(chars_dir + f"enclosures_chars_{region_id}.parquet")
streets = gpd.read_parquet(chars_dir + f"streets_chars_{region_id}.parquet")
nodes = gpd.read_parquet(chars_dir + f"nodes_chars_{region_id}.parquet")

merged = pd.merge(
    tessellation.drop(columns=["geometry"]),
    buildings.drop(columns=["nodeID", "geometry", 'nID']),
    right_index=True,
    left_index=True,
    how="left",
)

merged = merged.merge(
    enclosures.drop(columns="geometry"),
    right_on="eID",
    left_on="enclosure_index",
    how="left",
)

merged = merged.merge(streets.drop(columns="geometry"), on="nID", how="left")
merged = merged.merge(nodes.drop(columns="geometry"), on="nodeID", how="left")

merged = merged.drop(
    columns=[
        "nID",
        "eID",
        "nodeID",
        "mm_len",
        "cdsbool",
        "node_start",
        "node_end",
        "x",
        "y",
        "enclosure_index",
        # "id",
        # "osm_id",
    ]
)
merged = merged.set_index(tessellation.index)

from core.utils import used_keys
primary = merged[list(used_keys.keys())]
primary.to_parquet(chars_dir + f'primary_chars_{region_id}.parquet')