In [3]:
import geopandas as gpd
from pathlib import Path
from topojson import Topology
import pandas as pd
from shapely.geometry import Polygon

def process_dep(dep, bv_gdf, ci_gdf, bv_df, ci_df, election, t=0.001):
    print(f'Processing {dep}...')
    dep_bv_gdf = bv_gdf[bv_gdf['bv_id'].str.startswith(str(dep))].copy()
    dep_ci_gdf = ci_gdf[ci_gdf['dep'] == str(dep)].copy()
    dep_bv_df = bv_df[bv_df['Code département'] == dep].copy()
    dep_ci_df = ci_df[ci_df['Code département'] == dep].copy()
    print(f'Simplifying {dep}...')

    try:
        initial_crs = dep_bv_gdf.crs
        dep_bv_gdf.geometry = dep_bv_gdf.geometry.make_valid().to_crs(epsg=2154).buffer(0.00001)
        geojson_data = dep_bv_gdf.__geo_interface__

        # Create Topology and simplify
        topo = Topology(data=geojson_data, toposimplify=t).to_gdf()
        # Re-project back to the original CRS if needed
        topo = topo.set_crs(epsg=2154).to_crs(initial_crs)
        topo.set_index('bv_id', drop=True, inplace=True)
        topo.drop(columns=[c for c in topo.columns if c != 'geometry'], inplace=True)
        def convert_to_polygon(geometry):
            if geometry.geom_type == 'Polygon':
                return geometry
            elif geometry.geom_type == 'MultiPolygon':
                # Return the largest polygon by area
                return geometry
                #return max(geometry, key=lambda a: a.area)
            elif geometry.geom_type == 'GeometryCollection':
                # Extract polygons from GeometryCollection
                polygons = [geom for geom in geometry.geoms if isinstance(geom, Polygon)]
                if polygons:
                    return max(polygons, key=lambda a: a.area)
            return None
        topo['geometry'] = topo['geometry'].apply(convert_to_polygon)

    except Exception as e:
        print(e)

    dep_bv_df['bv_id'] = dep_bv_df['Code commune'].astype(str) + '_' + dep_bv_df['Code BV'].astype(str)
    dep_ci_df['ci_id'] = dep_ci_df['id_circo'].astype(str)
    dep_ci_df = dep_ci_df.drop(columns=['id_circo'])
    dep_ci_gdf['ci_id'] = dep_ci_gdf['id_circo'].astype(str)
    dep_ci_gdf = dep_ci_gdf.drop(columns=['id_circo'])

    print(f'Saving {dep}...')
    output_dir = Path(f'~/workspace/carto_legislatives_2024//départements/{dep}').expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)
    topo.to_file(output_dir / Path(f'bureaux_votes_{dep}.gpkg'), driver='GPKG')
    dep_ci_gdf.to_file(output_dir / Path(f'circonscriptions_{dep}.gpkg'), driver='GPKG')
    dep_bv_df.to_excel(output_dir / Path(f'{election}_bureaux_votes_{dep}.xlsx'), index=False)
    dep_ci_df.to_excel(output_dir / Path(f'{election}_circonscriptions_{dep}.xlsx'), index=False)

In [20]:
import geopandas as gpd
from pathlib import Path
from topojson import Topology
import pandas as pd

def process_simple_dep(dep, bv_gdf, ci_gdf, bv_df, ci_df, election, t=0.001):
    print(f'Processing {dep}...')
    dep_bv_gdf = bv_gdf[bv_gdf['bv_id'].str.startswith(str(dep))].copy()
    dep_ci_gdf = ci_gdf[ci_gdf['dep'] == str(dep)].copy()
    dep_bv_df = bv_df[bv_df['Code département'] == dep].copy()
    dep_ci_df = ci_df[ci_df['Code département'] == dep].copy()

    dep_bv_df['bv_id'] = dep_bv_df['Code commune'].astype(str) + '_' + dep_bv_df['Code BV'].astype(str)
    dep_ci_df['ci_id'] = dep_ci_df['id_circo'].astype(str)
    dep_ci_df = dep_ci_df.drop(columns=['id_circo'])
    dep_ci_gdf['ci_id'] = dep_ci_gdf['id_circo'].astype(str)
    dep_ci_gdf = dep_ci_gdf.drop(columns=['id_circo'])

    print(f'Saving {dep}...')
    output_dir = Path(f'~/workspace/carto_legislatives_2024//départements/{dep}').expanduser()
    output_dir.mkdir(parents=True, exist_ok=True)
    dep_bv_gdf.to_file(output_dir / Path(f'bureaux_votes_{dep}.gpkg'), driver='GPKG')
    dep_ci_gdf.to_file(output_dir / Path(f'circonscriptions_{dep}.gpkg'), driver='GPKG')
    dep_bv_df.to_excel(output_dir / Path(f'{election}_bureaux_votes_{dep}.xlsx'), index=False)
    dep_ci_df.to_excel(output_dir / Path(f'{election}_circonscriptions_{dep}.xlsx'), index=False)

In [2]:
election = 'euro24'
print(f'Loading big files...')
geom_dir = Path('~/workspace/carto_legislatives_2024/pre-traité/geometries/').expanduser()
data_dir = Path('~/workspace/carto_legislatives_2024/pre-traité/').expanduser() / Path(election)

bv_gdf = gpd.read_file(geom_dir / Path('bureaux_votes_reco.gpkg'))
ci_gdf = gpd.read_file(geom_dir / Path('circonscriptions.gpkg'))
bv_df = pd.read_excel(data_dir / Path('resultats_fr_par_bureau.xlsx'))
ci_df = pd.read_excel(data_dir / Path('resultats_fr_par_circonscription.xlsx'))

Loading big files...


In [4]:
process_dep(44, bv_gdf, ci_gdf, bv_df, ci_df, election)

Processing 44...
Simplifying 44...
Saving 44...
