In [1]:
%load_ext autoreload
%autoreload 2

# Paramètres de la pipeline

In [2]:
code_departement = "093"
logs_level = "WARNING"

# Imports & setup

In [3]:
import geopandas as gpd

from potentiel_solaire.attach_buildings_to_schools import attach_buildings_to_schools
from potentiel_solaire.constants import CRS, ALGORITHME_FOLDER, DATA_FOLDER
from potentiel_solaire.sources.bd_topo import extract_bd_topo, get_topo_zones_of_interest, \
    get_topo_buildings_of_interest
from potentiel_solaire.sources.bd_pci import extract_bd_pci
from potentiel_solaire.sources.bd_solar_irradiation import extract_bd_irradiation
from potentiel_solaire.sources.schools_establishments import extract_schools_establishments, \
    get_schools_establishments_of_interest
from potentiel_solaire.features.solar_potential import calculate_solar_potential
from potentiel_solaire.aggregate import aggregate_solar_potential_by
from potentiel_solaire.logger import get_logger

logger = get_logger()
logger.setLevel(logs_level)

2025-03-08 20:17:33,860 - DEBUG - rasterio.session - /home/kelu/projets/13_potentiel_solaire/.venv/lib/python3.10/site-packages/rasterio/session.py - <module> - Could not import boto3, continuing with reduced functionality.
2025-03-08 20:17:33,867 - DEBUG - rasterio.env - /home/kelu/projets/13_potentiel_solaire/.venv/lib/python3.10/site-packages/rasterio/env.py - <module> - GDAL data found in package: path='/home/kelu/projets/13_potentiel_solaire/.venv/lib/python3.10/site-packages/rasterio/gdal_data'.
2025-03-08 20:17:33,870 - DEBUG - rasterio.env - /home/kelu/projets/13_potentiel_solaire/.venv/lib/python3.10/site-packages/rasterio/env.py - <module> - PROJ data found in package: path='/home/kelu/projets/13_potentiel_solaire/.venv/lib/python3.10/site-packages/rasterio/proj_data'.


# Extraction des données sources

### Etablissements scolaires

In [4]:
schools_establishments_path = extract_schools_establishments()
print(f"Annuaire des établissements scolaires extrait ici: {schools_establishments_path}")

Annuaire des établissements scolaires extrait ici: /home/kelu/projets/13_potentiel_solaire/algorithme/data/fr-en-annuaire-education.geojson


### BD TOPO

In [5]:
bd_topo_path = extract_bd_topo(code_departement=code_departement)
print(f"BD TOPO extraite ici: {bd_topo_path}")

BD TOPO extraite ici: /home/kelu/projets/13_potentiel_solaire/algorithme/data/BDTOPO_3-4_TOUSTHEMES_GPKG_LAMB93_D093_2024-12-15/BDTOPO/1_DONNEES_LIVRAISON_2024-12-00134/BDT_3-4_GPKG_LAMB93_D093-ED2024-12-15/BDT_3-4_GPKG_LAMB93_D093-ED2024-12-15.gpkg


### BD PCI

In [6]:
bd_pci_path = extract_bd_pci(code_departement=code_departement)
print(f"BD PCI extraite ici: {bd_pci_path}")

BD PCI extraite ici: /home/kelu/projets/13_potentiel_solaire/algorithme/data/PARCELLAIRE-EXPRESS_1-1__SHP_LAMB93_D093_2024-10-01/PARCELLAIRE-EXPRESS/1_DONNEES_LIVRAISON_2024-11-00210/PEPCI_1-1_SHP_LAMB93_D093/BATIMENT.SHP


### BD IRRADIATION

In [7]:
bd_irradiation_path = extract_bd_irradiation()
print(f"BD irradiation extraite ici: {bd_irradiation_path}")

BD irradiation extraite ici: /home/kelu/projets/13_potentiel_solaire/algorithme/data/ENR_1-0_IRR-SOL_TIFF_WGS84G_FXX_2023-10-01/1_DONNEES_LIVRAISON/GlobalHorizontalIrradiation.tif


# Filtre des données sur le périmètre du calcul

### Etablissements scolaires


In [8]:
schools_establishments = get_schools_establishments_of_interest(
    schools_filepath=schools_establishments_path,
    code_departement=code_departement,
    types_etablissements=['Ecole', 'Lycée', 'Collège'],
    statut_public_prive="Public",
    etat="OUVERT",
    crs=CRS
)
nb_schools = schools_establishments.shape[0]
print(f"Nb d'établissements scolaires: {nb_schools}")

Nb d'établissements scolaires: 1130


### Zone d'intérêt géographique

In [9]:
codes_commune = schools_establishments["code_commune"].unique()
communes = gpd.read_file(bd_topo_path, layer="commune").to_crs(CRS)
communes = communes[communes.code_insee.isin(codes_commune)]
geom_of_interest = communes.dissolve()[["geometry"]]

### Zones d'éducations

In [10]:
educational_zones = get_topo_zones_of_interest(
    bd_topo_path=bd_topo_path,
    geom_of_interest=geom_of_interest,
    categories=["Science et enseignement"],
    natures=['Collège', 'Lycée', 'Enseignement primaire'],
    crs=CRS
)
nb_educational_zones = educational_zones.shape[0]
print("Nb de zones d'éducations: ", nb_educational_zones)

  return ogr_read_info(
  return ogr_read_info(
  return ogr_read_info(
  return ogr_read_info(
  crs = pyogrio.read_info(path_or_bytes).get("crs")


Nb de zones d'éducations:  1088


### Bâtiments

In [11]:
# TODO : ajout des batiments manquants avec la BD PCI
buildings = get_topo_buildings_of_interest(
    bd_topo_path=bd_topo_path,
    geom_of_interest=geom_of_interest,
    crs=CRS
)
nb_buildings = buildings.shape[0]
print("Nb de batiments: ", nb_buildings)

  return ogr_read_info(
  return ogr_read_info(
  return ogr_read_info(
  return ogr_read_info(
  crs = pyogrio.read_info(path_or_bytes).get("crs")


Nb de batiments:  351578


# Détermination des bâtiments scolaires

In [12]:
schools_buildings = attach_buildings_to_schools(
        schools_establishments=schools_establishments,
        educational_zones=educational_zones,
        buildings=buildings
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


# Calcul des attributs utiles pour le potentiel solaire

In [13]:
# TODO: v0 seulement à ce stade
# la V0 tourne en 1m30 sur mon laptop
solar_potential_of_schools_buildings = calculate_solar_potential(
    schools_buildings=schools_buildings,
    bd_irradiation_path=bd_irradiation_path
)

# Dump des donnees pour analyses

In [14]:
solar_potential_of_schools_buildings["zone_autour_du_batiment"] = solar_potential_of_schools_buildings["zone_autour_du_batiment"].to_wkt()

layers = ["schools_establishments", "educational_zones", "schools_buildings", "solar_potential_of_schools_buildings"]
gdfs = [schools_establishments, educational_zones, schools_buildings, solar_potential_of_schools_buildings]

for layer, gdf in zip(layers, gdfs):
    output_gpkg = DATA_FOLDER / f"{code_departement}_pipeline_results.gpkg"
    gdf.to_file(output_gpkg, layer=layer, driver="GPKG")

# Checks sur la qualité des données & calculs

In [15]:
nb_schools_with_buildings = len(schools_buildings.identifiant_de_l_etablissement.unique())
print("Nb d'établissements scolaires avec des batiments: {} ({}%)".format(
    nb_schools_with_buildings,
    round(100 * nb_schools_with_buildings / nb_schools)
))

Nb d'établissements scolaires avec des batiments: 1103 (98%)


# Aggrégations

### Par établissement scolaire

In [None]:
results_by_school = aggregate_solar_potential_by(
    schools_establishments=schools_establishments,
    solar_potential_of_schools_buildings=solar_potential_of_schools_buildings,
    group_by = [
        "identifiant_de_l_etablissement",
        #"nom_etablissement",  # @TODO comprendre pourquoi cet endroit bug
        "type_etablissement",
        "libelle_nature",
        "code_commune",
        "nom_commune",
        "code_departement",
        "libelle_departement",
        "code_region",
        "libelle_region",
    ]
)

### Par commune

In [19]:
results_by_commune = aggregate_solar_potential_by(
    schools_establishments=schools_establishments,
    solar_potential_of_schools_buildings=solar_potential_of_schools_buildings,
    group_by = [
        "code_commune",
        "nom_commune",
        "code_departement",
        "libelle_departement",
        "code_region",
        "libelle_region",
    ]
)
# TODO : la geometrie de la commune est fausse

### Par département

In [20]:
results_by_departement = aggregate_solar_potential_by(
    schools_establishments=schools_establishments,
    solar_potential_of_schools_buildings=solar_potential_of_schools_buildings,
    group_by = [
        "code_departement",
        "libelle_departement",
        "code_region",
        "libelle_region",
    ]
)
# TODO : la geometrie du departement est fausse

### Par région

In [21]:
# TODO : demande de se connecter au bucket avec les resultats existants pour aggreger 
# TODO : ou de faire le calcul pour tous les departements d une region

# Sauvegarde des fichiers

In [22]:
output_folder = ALGORITHME_FOLDER.parent / "results" / f"D{code_departement}"
output_folder.mkdir(exist_ok=True, parents=True)

# TODO : a voir cote front sil faut changer le format de mise a disposition
results_by_school.to_file(output_folder / f"D{code_departement}_ecoles.geojson", driver="GeoJSON")
results_by_commune.to_file(output_folder / f"D{code_departement}_communes.geojson", driver="GeoJSON")
results_by_departement.to_file(output_folder / f"D{code_departement}_departement.geojson", driver="GeoJSON")