In [1]:
import os
data_path = os.path.join(os.getcwd(), 'data')
print(f"{data_path=}")

data_path='/app/notebooks/rapprochements/Marseille pôle rénovation urbaine/data'


In [2]:
import geopandas as gpd
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
from shapely import to_geojson
import json
from batid.services.guess_bdg_new import Guesser, PartialRoofHandler

ignored_files = ['amest_amest_pdv_projets_logneuf_amp.gpkg'] # Ne pas rapprocher les logements neufs

def row_to_input(df_row_raw):
    df_row = dict(df_row_raw)
        
    return {
        "ext_id" : df_row["OBJECTID"],
        "polygon": json.loads(to_geojson(df_row["geometry"]))
    }

def run_rapprochement(gpkg_path, output_path):
    print(f"Running rapprochement for {gpkg_path}")
    df = gpd.read_file(gpkg_path)
    df = df.to_crs(epsg=4326)
    inputs = list(df.apply(row_to_input, axis=1))
    guesses_path = f"{gpkg_path}.guesses.json"
    guesser = Guesser(batch_size=50)
    guesser.handlers = [
        PartialRoofHandler(
            multiple_bdgs_covered_enough_threshold = 0.85,
            sole_bdg_intersecting_enough_threshold = 0.85
        ),
    ]
    guesser.create_work_file(list(inputs), guesses_path)
    guesser.guess_work_file(guesses_path)
    guesser.report()
    guesser.to_csv(output_path, ext_id_col_name='OBJECTID')
    

def list_files(directory):
    files = []
    for filename in os.listdir(directory):
        if filename in ignored_files:
            print(f"File {filename} is ignored.")
            continue
            
        if filename.endswith('.gpkg'):
            fq_path = os.path.join(directory, filename)
            columns = list_columns(fq_path)
            if 'id_rnb' in columns:
                files.append(fq_path)
            else:
                print(f"No id_rnb in {filename}, skipping")
    return files

def list_columns(filepath):
    df = gpd.read_file(filepath)
    return df.columns

files = list_files(data_path)
print(f"Found {len(files)} files")
for file in files:
    csv_path = f"{file}.csv"
    final_output_path = f"{file}.out"
    run_rapprochement(file, output_path=csv_path)

File amest_amest_pdv_projets_logneuf_amp.gpkg is ignored.
No id_rnb in amest_amest_pdv_perimetres_projet_npnru_amp.gpkg, skipping
No id_rnb in amest_amest_pdv_projets_24_amenagement_amp.gpkg, skipping
Found 6 files
Running rapprochement for /app/notebooks/rapprochements/Marseille pôle rénovation urbaine/data/amest_amest_pdv_projets_habitat_public_amp.gpkg


  0%|          | 0/14 [00:00<?, ?it/s]

Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
-- Report --
Number of rows: 684
Number of match: 650 (95.03%)

-- finished_steps --
Rows with finished_steps partial_roof: 684 (100.00%)
Rows with empty finished_steps: 0 (0.00%)

-- match_reasons : absolute --
match_reason
sole_bdg_intersects_roof_enough  

  0%|          | 0/1 [00:00<?, ?it/s]

Partial roof with multiple polygons is not supported
-- Report --
Number of rows: 16
Number of match: 13 (81.25%)

-- finished_steps --
Rows with finished_steps partial_roof: 16 (100.00%)
Rows with empty finished_steps: 0 (0.00%)

-- match_reasons : absolute --
match_reason
sole_bdg_intersects_roof_enough     10
many_bdgs_covered_enough_by_roof     2
isolated_bdg_intersects_roof         1
Name: count, dtype: int64

-- match_reasons : % --
match_reason
sole_bdg_intersects_roof_enough     62.50
many_bdgs_covered_enough_by_roof    12.50
isolated_bdg_intersects_roof         6.25
Name: count, dtype: float64

-- Inputs --
Running rapprochement for /app/notebooks/rapprochements/Marseille pôle rénovation urbaine/data/amest_pdv_projets_residentialisation_amp.gpkg


  0%|          | 0/8 [00:00<?, ?it/s]

Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
-- Report --
Number of rows: 378
Number of match: 370 (97.88%)

-- finished_steps --
Rows with finished_steps partial_roof: 378 (100.00%)
Rows with empty finished_steps: 0 (0.00%)

-- match_reasons : absolute --
match_reason
sole_bdg_intersects_roof_enough     307
many_bdgs_covered_enough_by_roof     62
isolated_bdg_intersects_roof          1
Name: count, dtype: int64

-- match_reasons : % --
match_reason
sole_bdg_intersects_roof_enough     81.216931
many_bdgs_covered_enough_by_roof    16.402116
isolated_bdg_intersects_roof         0.264550
Name: count, dtype: float64

-- Inputs --
Running rapprochement for /app/notebooks/rapprochements/Marseille pôle rénovation urbaine/data/amest_amest_pdv_projets_22_recyclage_coprodegr_am

  0%|          | 0/4 [00:00<?, ?it/s]

Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
-- Report --
Number of rows: 151
Number of match: 143 (94.70%)

-- finished_steps --
Rows with finished_steps partial_roof: 151 (100.00%)
Rows with empty finished_steps: 0 (0.00%)

-- match_reasons : absolute --
match_reason
sole_bdg_intersects_roof_enough     128
many_bdgs_covered_enough_by_roof     15
Name: count, dtype: int64

-- match_reasons : % --
match_reason
sole_bdg_intersects_roof_enough     84.768212
many_bdgs_covered_enough_by_roof     9.933775
Name: count, dtype: float64

-- Inputs --
Running rapprochement for /app/notebooks/rapprochements/Marseille pôle rénovation urbaine/data/amest_amest_pdv_projets_37_eq

  0%|          | 0/5 [00:00<?, ?it/s]

Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not sup

  0%|          | 0/5 [00:00<?, ?it/s]

Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
Partial roof with multiple polygons is not supported
-- Report --
Number of rows: 239
Number of match: 211 (88.28%)

-- finished_steps --
Rows with finished_steps partial_roof: 239 (100.00%)
Rows with empty finished_steps: 0 (0.00%)

-- match_reasons : absolute --
match_reason
sole_bdg_intersects_roof_enough     158
many_bdgs_covered_enough_by_roof     52
isolated_bdg_intersects_roof          1
Name: count, dtype: int64

-- match_reasons : % --
match_reason
sole_bdg_intersects_roof_enough     66.108787
many_bdgs_covered_enough_by_roof    21.757322
isolated_bdg_intersects_roof         0.418410
Name: count, dtype: float64

-- Inputs --


In [4]:
def merge_ids_from_csv_into_gpkg(gpkg_path, csv_path, output_path):
    join_key = 'OBJECTID'
    original_column = 'id_rnb'
    replaced_column = 'rnb_ids'
    df = gpd.read_file(gpkg_path)
    df_csv = pd.read_csv(csv_path)
    merged = df.merge(df_csv, on=join_key, how='left')
    df[original_column] = merged[replaced_column]
    df.to_file(output_path, driver='GPKG')



files = list_files(data_path)
for file in files:
    csv_path = f"{file}.csv"
    final_output_path = f"{file}"
    merge_ids_from_csv_into_gpkg(file, csv_path, output_path=final_output_path)

File amest_amest_pdv_projets_logneuf_amp.gpkg is ignored.


[2025-09-16 13:48:50 +0000] [2605] [INFO] Created 684 records
[2025-09-16 13:48:51 +0000] [2605] [INFO] Created 16 records


No id_rnb in amest_amest_pdv_perimetres_projet_npnru_amp.gpkg, skipping
No id_rnb in amest_amest_pdv_projets_24_amenagement_amp.gpkg, skipping


[2025-09-16 13:48:51 +0000] [2605] [INFO] Created 381 records
[2025-09-16 13:48:51 +0000] [2605] [INFO] Created 151 records
[2025-09-16 13:48:51 +0000] [2605] [INFO] Created 206 records
[2025-09-16 13:48:51 +0000] [2605] [INFO] Created 239 records
