## PREPARATION DES DONNEES POUR ANALYSE ET MODELISATION

In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sklearn as sk 
import datetime
import re


### Importation des données

In [25]:
DEPT_CODE = 69  # à changer facilement ici

# --- Chargement des fichiers ---
path_existants = f"../data/donnees_dpe_existants_{DEPT_CODE}.csv"
path_neufs = f"../data/donnees_dpe_neufs_{DEPT_CODE}.csv"

logements_existants = pd.read_csv(path_existants)
logements_neufs = pd.read_csv(path_neufs)

print(f"✅ Chargé DPE {DEPT_CODE} : {logements_existants.shape[0]} existants, {logements_neufs.shape[0]} neufs")

  logements_existants = pd.read_csv(path_existants)
  logements_neufs = pd.read_csv(path_neufs)


✅ Chargé DPE 69 : 430668 existants, 40277 neufs


In [26]:
# Ajouter de la colonne "Logement" dans chaque DF
logements_existants["Logement"]="Ancien"
logements_neufs["Logement"]="Neuf"

In [27]:
# Ajout année de construction
now=datetime.datetime.now()
logements_neufs["annee_construction"]=now.year
print (logements_neufs["annee_construction"].value_counts())

annee_construction
2025    40277
Name: count, dtype: int64


In [28]:
# Fusionner les deux DF
logements_merged=pd.concat([logements_neufs,logements_existants], join = 'inner')
print(logements_merged.columns)
print(len(logements_merged.columns))

Index(['numero_dpe', 'date_derniere_modification_dpe',
       'date_visite_diagnostiqueur', 'date_etablissement_dpe',
       'date_reception_dpe', 'date_fin_validite_dpe', 'numero_dpe_remplace',
       'numero_dpe_immeuble_associe', 'id_rnb', 'provenance_id_rnb',
       ...
       'surface_totale_capteurs_pv', 'nombre_module',
       'production_electricite_pv_kwhep_par_an',
       'electricite_pv_autoconsommee', '_geopoint', '_id', '_i', '_rand',
       'Logement', 'annee_construction'],
      dtype='object', length=209)
209


In [29]:
# Créer une colonne avec uniquement l'année de la Date de réception du DPE
logements_merged["annee_reception_DPE"]=logements_merged["date_reception_dpe"].str[:4]
print(logements_merged[["annee_reception_DPE","date_reception_dpe"]])

       annee_reception_DPE date_reception_dpe
0                     2021         2021-07-01
1                     2021         2021-07-05
2                     2021         2021-07-05
3                     2021         2021-07-05
4                     2021         2021-07-05
...                    ...                ...
430663                2025         2025-10-27
430664                2025         2025-10-27
430665                2025         2025-10-27
430666                2025         2025-10-27
430667                2025         2025-10-27

[470945 rows x 2 columns]


  logements_merged["annee_reception_DPE"]=logements_merged["date_reception_dpe"].str[:4]


In [30]:
# Créer une colonne periode_construction avec ces classes ci-dessous
def periode_construction(année):
    if année < 1960:
        return "Avant 1960"
    elif 1960 <= année <= 1970:
        return "1961 - 1970" 
    elif 1971 <= année <= 1980:
        return "1971 - 1980" 
    elif 1981 <= année <= 1990:
        return "1981 - 1990"
    elif 1991 <= année <= 2000:
        return "1991 - 2000"
    elif 2001 <= année <= 2010:
        return "2001 - 2010"
    elif pd.isna(année):
        return "Année inconnue"
    else:
        return "Après 2010"

logements_merged["periode_construction"]=logements_merged["annee_construction"].apply(periode_construction)
print(logements_merged[["periode_construction","annee_construction"]])

       periode_construction  annee_construction
0                Après 2010              2025.0
1                Après 2010              2025.0
2                Après 2010              2025.0
3                Après 2010              2025.0
4                Après 2010              2025.0
...                     ...                 ...
430663       Année inconnue                 NaN
430664          1961 - 1970              1962.0
430665          1961 - 1970              1967.0
430666          1961 - 1970              1962.0
430667          1981 - 1990              1986.0

[470945 rows x 2 columns]


  logements_merged["periode_construction"]=logements_merged["annee_construction"].apply(periode_construction)


In [31]:
pd.set_option('display.max_columns', None)  # Affiche toutes les colonnes
logements_merged.head(10)

Unnamed: 0,numero_dpe,date_derniere_modification_dpe,date_visite_diagnostiqueur,date_etablissement_dpe,date_reception_dpe,date_fin_validite_dpe,numero_dpe_remplace,numero_dpe_immeuble_associe,id_rnb,provenance_id_rnb,numero_rpls_logement,numero_immatriculation_copropriete,modele_dpe,version_dpe,methode_application_dpe,etiquette_dpe,etiquette_ges,classe_altitude,zone_climatique,type_batiment,type_installation_chauffage,type_installation_ecs,hauteur_sous_plafond,nombre_appartement,nombre_niveau_immeuble,nombre_niveau_logement,typologie_logement,appartement_non_visite,surface_habitable_immeuble,surface_habitable_logement,surface_tertiaire_immeuble,classe_inertie_batiment,adresse_ban,numero_voie_ban,nom_rue_ban,nom_commune_ban,code_postal_ban,code_insee_ban,code_departement_ban,code_region_ban,identifiant_ban,coordonnee_cartographique_x_ban,coordonnee_cartographique_y_ban,score_ban,statut_geocodage,adresse_brut,nom_commune_brut,code_postal_brut,numero_etage_appartement,position_logement_dans_immeuble,nom_residence,complement_adresse_batiment,complement_adresse_logement,indicateur_confort_ete,protection_solaire_exterieure,logement_traversant,presence_brasseur_air,inertie_lourde,isolation_toiture,deperditions_enveloppe,deperditions_ponts_thermiques,deperditions_murs,deperditions_planchers_hauts,deperditions_planchers_bas,deperditions_portes,deperditions_baies_vitrees,deperditions_renouvellement_air,qualite_isolation_enveloppe,qualite_isolation_murs,qualite_isolation_plancher_haut_comble_amenage,qualite_isolation_plancher_bas,qualite_isolation_menuiseries,ubat_w_par_m2_k,besoin_chauffage,besoin_ecs,besoin_refroidissement,conso_5_usages_ep,conso_5_usages_par_m2_ep,conso_chauffage_ep,conso_ecs_ep,conso_refroidissement_ep,conso_eclairage_ep,conso_auxiliaires_ep,conso_5_usages_ef,conso_5_usages_par_m2_ef,conso_chauffage_ef,conso_ecs_ef,conso_refroidissement_ef,conso_eclairage_ef,conso_auxiliaires_ef,emission_ges_5_usages,emission_ges_5_usages_par_m2,emission_ges_chauffage,emission_ges_ecs,emission_ges_refroidissement,emission_ges_eclairage,emission_ges_auxiliaires,type_energie_n1,conso_5_usages_ef_energie_n1,conso_chauffage_ef_energie_n1,conso_ecs_ef_energie_n1,cout_total_5_usages_energie_n1,cout_chauffage_energie_n1,cout_ecs_energie_n1,emission_ges_5_usages_energie_n1,emission_ges_chauffage_energie_n1,emission_ges_ecs_energie_n1,type_energie_n2,conso_5_usages_ef_energie_n2,conso_chauffage_ef_energie_n2,conso_ecs_ef_energie_n2,cout_total_5_usages_energie_n2,cout_chauffage_energie_n2,cout_ecs_energie_n2,emission_ges_5_usages_energie_n2,emission_ges_chauffage_energie_n2,emission_ges_ecs_energie_n2,type_energie_n3,conso_5_usages_ef_energie_n3,conso_chauffage_ef_energie_n3,conso_ecs_ef_energie_n3,cout_total_5_usages_energie_n3,cout_chauffage_energie_n3,cout_ecs_energie_n3,emission_ges_5_usages_energie_n3,emission_ges_chauffage_energie_n3,emission_ges_ecs_energie_n3,cout_total_5_usages,cout_chauffage,cout_ecs,cout_refroidissement,cout_eclairage,cout_auxiliaires,type_energie_principale_chauffage,type_generateur_chauffage_principal,type_installation_chauffage_n1,type_emetteur_installation_chauffage_n1,configuration_installation_chauffage_n1,description_installation_chauffage_n1,conso_chauffage_installation_chauffage_n1,surface_chauffee_installation_chauffage_n1,type_generateur_n1_installation_n1,type_energie_generateur_n1_installation_n1,usage_generateur_n1_installation_n1,conso_chauffage_generateur_n1_installation_n1,type_generateur_n2_installation_n1,type_energie_generateur_n2_installation_n1,usage_generateur_n2_installation_n1,conso_chauffage_generateur_n2_installation_n1,type_installation_chauffage_n2,type_emetteur_installation_chauffage_n2,configuration_installation_chauffage_n2,description_installation_chauffage_n2,conso_chauffage_installation_chauffage_n2,surface_chauffee_installation_chauffage_n2,type_generateur_n1_installation_n2,type_energie_generateur_n1_installation_n2,usage_generateur_n1_installation_n2,conso_chauffage_generateur_n1_installation_n2,type_generateur_n2_installation_n2,type_energie_generateur_n2_installation_n2,usage_generateur_n2_installation_n2,conso_chauffage_generateur_n2_installation_n2,type_energie_principale_ecs,type_generateur_chauffage_principal_ecs,besoin_ecs_batiment,besoin_ecs_logement,type_installation_ecs_n1,configuration_installation_ecs_n1,description_installation_ecs_n1,conso_ef_installation_ecs_n1,nombre_logements_desservis_par_installation_ecs_n1,surface_habitable_desservie_par_installation_ecs_n1,type_installation_solaire_n1,production_ecs_solaire_installation_n1,type_generateur_n1_ecs_n1,type_energie_generateur_n1_ecs_n1,usage_generateur_n1_ecs_n1,description_generateur_n1_ecs_n1,volume_stockage_generateur_n1_ecs_n1,cop_generateur_n1_ecs_n1,conso_ef_generateur_n1_ecs_n1,type_generateur_n2_ecs_n1,type_energie_generateur_n2_ecs_n1,usage_generateur_n2_ecs_n1,description_generateur_n2_ecs_n1,volume_stockage_generateur_n2_ecs_n1,cop_generateur_n2_ecs_n1,conso_ef_generateur_n2_ecs_n1,type_ventilation,surface_ventilee,ventilation_posterieure_2012,type_generateur_froid,description_generateur_froid,surface_climatisee,conso_refroidissement_annuel,categorie_enr,systeme_production_electricite_origine_renouvelable,presence_production_pv,surface_totale_capteurs_pv,nombre_module,production_electricite_pv_kwhep_par_an,electricite_pv_autoconsommee,_geopoint,_id,_i,_rand,Logement,annee_construction,annee_reception_DPE,periode_construction
0,2169N0340811D,2023-10-26,2021-06-30,2021-06-30,2021-07-01,2031-06-29,,,,,,,DPE NEUF logement : RT2012,1.0,dpe issu d'une étude thermique réglementaire R...,B,A,inférieur à 400m,H1c,maison,,,2.5,,,1.0,,,,101.6,,,La Picolette 69510 Yzeron,,,Yzeron,69510,69269,69.0,84.0,69269_dt014w,826468.95,6512440.36,0.85,adresse géocodée ban à l'adresse,La Picolette,THURINS,69510,,,,,,moyen,1.0,0.0,1.0,,1.0,,,,,,,,,très bonne,très bonne,,très bonne,insuffisante,0.35,,,,7036.0,69.2,4979.0,1587.3,0.0,456.8,12.9,3059.1,30.1,2164.8,690.2,0.0,198.6,5.6,256.9,2.5,181.8,58.0,0.0,16.7,0.4,Électricité,3331.6,2164.8,690.2,1057.8,687.4,219.1,279.9,181.8,58.0,,,,,,,,,,,,,,,,,,,,,971.3,687.4,219.1,0.0,63.1,1.8,Électricité,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non affecté,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,0.0,,"45.6998199734346,4.625363966081911",c588bf16e67e78c176917d25fcad4c922cee68d012bf2a...,3059210462324,559683,Neuf,2025.0,2021,Après 2010
1,2169N0093666O,2023-11-08,2021-07-04,2021-07-04,2021-07-05,2031-07-03,,,,,,,DPE NEUF logement : RT2012,1.0,dpe issu d'une étude thermique réglementaire R...,C,C,,,maison,,,2.5,1.0,1.0,1.0,,0.0,92.5,92.5,,,Rue de Saint Marc 69440 Taluyers,,Rue de Saint Marc,Taluyers,69440,69241,69.0,84.0,69241_0326,834613.94,6505805.35,0.59,adresse géocodée ban à l'adresse,Rue de Saint Marc,Taluyers,69440,,,,,,,,,,,,,,,,,,,,très bonne,très bonne,,très bonne,très bonne,0.37,,,,6190.7,66.9,4024.1,2168.0,0.0,437.6,27.3,5433.5,58.7,3638.0,1522.0,0.0,190.2,11.9,1143.9,12.4,781.9,484.4,0.0,13.1,0.8,Gaz naturel,4851.0,3341.0,1510.0,700.0,482.0,218.0,0.0,0.0,0.0,Électricité,309.0,297.0,12.0,211.0,108.0,4.0,18.5,0.0,0.0,,,,,,,,,,,911.0,590.0,222.0,0.0,69.0,4.0,Gaz naturel,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non affecté,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,0.0,,"45.63852895723026,4.728144023871749",bca0e8c30d0f6b5beeb5c502ae397a341b3d136a40edb6...,3056774622981,941151,Neuf,2025.0,2021,Après 2010
2,2169N0339060U,2023-11-08,2021-07-04,2021-07-04,2021-07-05,2031-07-03,,,,,,,DPE NEUF logement : RT2012,1.0,dpe issu d'une étude thermique réglementaire R...,C,C,,,maison,,,2.5,1.0,1.0,1.0,,0.0,92.5,92.5,,,Rue de Saint Marc 69440 Taluyers,,,Taluyers,69440,69241,69.0,84.0,69241_0326,834613.94,6505805.35,0.71,adresse non géocodée ban car aucune correspond...,128 Rue de Saint Marc,TALUYERS,69440,,,,,,,,,,,,,,,,,,,,très bonne,très bonne,,bonne,très bonne,0.36,,,,5480.0,59.2,3341.0,2129.1,0.0,159.3,10.0,5124.5,55.4,3341.0,1510.0,0.0,69.3,4.3,1119.6,12.0,758.4,483.3,0.0,4.8,0.3,Gaz naturel,4851.0,3341.0,1510.0,699.6,481.8,217.8,0.0,0.0,0.0,Électricité,73.6,0.0,0.0,202.1,0.0,0.0,5.1,0.0,0.0,,,,,,,,,,,799.2,481.8,217.8,0.0,190.2,11.9,Gaz naturel,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non affecté,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,0.0,,"45.63852895723026,4.728144023871749",d7910d4e7a66fe48d3ccda3975e6682628b85ed2cb5b92...,3056865629379,558136,Neuf,2025.0,2021,Après 2010
3,2169N0093663L,2023-11-08,2021-07-04,2021-07-04,2021-07-05,2031-07-03,,,,,,,DPE NEUF logement : RT2012,1.0,dpe issu d'une étude thermique réglementaire R...,C,C,,,maison,,,2.5,1.0,1.0,1.0,,0.0,92.2,92.2,,,Rue de Saint Marc 69440 Taluyers,,Rue de Saint Marc,Taluyers,69440,69241,69.0,84.0,69241_0326,834613.94,6505805.35,0.59,adresse géocodée ban à l'adresse,Rue de Saint Marc,Taluyers,69440,,,,,,,,,,,,,,,,,,,,très bonne,très bonne,,très bonne,très bonne,0.37,,,,6285.5,68.2,4036.1,2284.8,0.0,437.6,27.3,5481.5,59.4,3650.0,1558.0,0.0,190.2,11.9,1148.9,12.5,784.6,487.7,0.0,13.1,0.8,Gaz naturel,4863.0,3353.0,1510.0,702.0,484.0,218.0,0.0,0.0,0.0,Électricité,345.0,297.0,48.0,224.0,108.0,17.0,18.5,0.0,0.0,,,,,,,,,,,926.0,592.0,235.0,0.0,69.0,4.0,Gaz naturel,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non affecté,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,0.0,,"45.63852895723026,4.728144023871749",90c28a27cb8fba79c54cb4c59679471dadff7e09e7fcf8...,3057035710716,975618,Neuf,2025.0,2021,Après 2010
4,2169N0093667P,2023-11-08,2021-07-04,2021-07-04,2021-07-05,2031-07-03,,,,,,,DPE NEUF logement : RT2012,1.0,dpe issu d'une étude thermique réglementaire R...,B,B,,,maison,,,2.5,1.0,1.0,1.0,,0.0,108.7,108.7,,,Rue de Saint Marc 69440 Taluyers,,Rue de Saint Marc,Taluyers,69440,69241,69.0,84.0,69241_0326,834613.94,6505805.35,0.59,adresse géocodée ban à l'adresse,Rue de Saint Marc,Taluyers,69440,,,,,,,,,,,,,,,,,,,,très bonne,très bonne,,très bonne,très bonne,0.36,,,,6604.2,60.7,4236.1,2325.4,0.0,531.4,31.3,5897.6,54.3,3954.0,1631.0,0.0,231.0,13.6,1254.6,11.5,865.4,518.8,0.0,15.9,0.9,Gaz naturel,5354.0,3737.0,1617.0,749.0,523.0,226.0,0.0,0.0,0.0,Électricité,231.0,217.0,14.0,198.0,79.0,5.0,21.2,0.0,0.0,,,,,,,,,,,947.0,602.0,231.0,0.0,84.0,5.0,Gaz naturel,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non affecté,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,0.0,,"45.63852895723026,4.728144023871749",895fc389cc4dc67abba918ae79d90e72540859aa37c009...,3057354515506,11476,Neuf,2025.0,2021,Après 2010
5,2169N0339058S,2023-11-08,2021-07-04,2021-07-04,2021-07-05,2031-07-03,,,,,,,DPE NEUF logement : RT2012,1.0,dpe issu d'une étude thermique réglementaire R...,C,C,,,maison,,,2.5,1.0,1.0,1.0,,0.0,92.2,92.2,,,Rue de Saint Marc 69440 Taluyers,,,Taluyers,69440,69241,69.0,84.0,69241_0326,834613.94,6505805.35,0.71,adresse non géocodée ban car aucune correspond...,128 Rue de Saint Marc,TALUYERS,69440,,,,,,,,,,,,,,,,,,,,très bonne,très bonne,,bonne,très bonne,0.36,,,,5361.0,58.1,3222.0,2129.1,0.0,159.3,10.0,5005.5,54.3,3222.0,1510.0,0.0,69.3,4.3,1092.6,11.8,731.4,483.3,0.0,4.8,0.3,Gaz naturel,4732.0,3222.0,1510.0,682.4,464.6,217.8,0.0,0.0,0.0,Électricité,73.6,0.0,0.0,202.1,0.0,0.0,5.1,0.0,0.0,,,,,,,,,,,782.0,464.6,217.8,0.0,190.2,11.9,Gaz naturel,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non affecté,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,0.0,,"45.63852895723026,4.728144023871749",f63bb857d003c87a1d56b298cce7f94069d1ecbbd091c2...,3057647373500,470649,Neuf,2025.0,2021,Après 2010
6,2169N0339059T,2023-11-08,2021-07-04,2021-07-04,2021-07-05,2031-07-03,,,,,,,DPE NEUF logement : RT2012,1.0,dpe issu d'une étude thermique réglementaire R...,C,C,,,maison,,,2.5,1.0,1.0,1.0,,0.0,107.8,107.8,,,Rue de Saint Marc 69440 Taluyers,,,Taluyers,69440,69241,69.0,84.0,69241_0326,834613.94,6505805.35,0.71,adresse non géocodée ban car aucune correspond...,128 Rue de Saint Marc,TALUYERS,69440,,,,,,,,,,,,,,,,,,,,très bonne,très bonne,,bonne,très bonne,0.35,,,,5936.9,55.1,3601.0,2280.0,0.0,193.5,11.4,5530.6,51.3,3601.0,1617.0,0.0,84.1,4.9,1205.6,11.1,817.4,517.6,0.0,5.8,0.3,Gaz naturel,5218.0,3601.0,1617.0,738.7,509.8,228.9,0.0,0.0,0.0,Électricité,89.1,0.0,0.0,244.6,0.0,0.0,6.1,0.0,0.0,,,,,,,,,,,852.5,509.8,228.9,0.0,231.0,13.6,Gaz naturel,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non affecté,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,0.0,,"45.63852895723026,4.728144023871749",4631a15d0b9251e2ad7f90f68513829a39c1233a728afb...,3057773004444,958937,Neuf,2025.0,2021,Après 2010
7,2169N0339073H,2023-11-08,2021-07-04,2021-07-04,2021-07-05,2031-07-03,,,,,,,DPE NEUF logement : RT2012,1.0,dpe issu d'une étude thermique réglementaire R...,C,C,,,maison,,,2.5,1.0,1.0,1.0,,0.0,73.3,73.3,,,Rue de Saint Marc 69440 Taluyers,,,Taluyers,69440,69241,69.0,84.0,69241_0326,834613.94,6505805.35,0.71,adresse non géocodée ban car aucune correspond...,128 Rue de Saint Marc,TALUYERS,69440,,,,,,,,,,,,,,,,,,,,très bonne,très bonne,,bonne,très bonne,0.36,,,,4564.6,62.2,2614.0,1965.5,0.0,129.7,16.2,4250.0,58.0,2614.0,1394.0,0.0,56.4,7.1,926.1,12.5,593.4,446.2,0.0,3.9,0.5,Gaz naturel,4008.0,2614.0,1394.0,578.0,377.0,201.0,0.0,0.0,0.0,Électricité,63.5,0.0,0.0,174.2,0.0,0.0,4.3,0.0,0.0,,,,,,,,,,,666.1,377.0,201.0,0.0,154.9,19.4,Gaz naturel,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non affecté,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,0.0,,"45.63852895723026,4.728144023871749",655a0e3fc899e2df0c944474a8c1096736716a2ba46367...,3057896035030,527194,Neuf,2025.0,2021,Après 2010
8,2169N0093670S,2023-11-08,2021-07-04,2021-07-04,2021-07-05,2031-07-03,,,,,,,DPE NEUF logement : RT2012,1.0,dpe issu d'une étude thermique réglementaire R...,C,C,,,maison,,,2.5,1.0,1.0,1.0,,0.0,92.5,92.5,,,Rue de Saint Marc 69440 Taluyers,,Rue de Saint Marc,Taluyers,69440,69241,69.0,84.0,69241_0326,834613.94,6505805.35,0.59,adresse géocodée ban à l'adresse,Rue de Saint Marc,Taluyers,69440,,,,,,,,,,,,,,,,,,,,très bonne,très bonne,,très bonne,très bonne,0.36,,,,6027.6,65.2,3888.4,2168.0,0.0,437.6,0.0,5362.6,58.0,3579.0,1522.0,0.0,190.2,0.0,1138.5,12.3,777.2,484.4,0.0,13.1,0.0,Gaz naturel,4851.0,3341.0,1510.0,700.0,482.0,218.0,0.0,0.0,0.0,Électricité,250.0,238.0,12.0,186.0,87.0,4.0,17.7,0.0,0.0,,,,,,,,,,,886.0,569.0,222.0,0.0,69.0,0.0,Gaz naturel,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non affecté,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,0.0,,"45.63852895723026,4.728144023871749",0af0ef2e9cb6a876cee239fd77a14f9f623f219cba67bf...,3058123776885,660312,Neuf,2025.0,2021,Après 2010
9,2169N0339066A,2023-11-08,2021-07-04,2021-07-04,2021-07-05,2031-07-03,,,,,,,DPE NEUF logement : RT2012,1.0,dpe issu d'une étude thermique réglementaire R...,C,C,,,maison,,,2.5,1.0,1.0,1.0,,0.0,92.5,92.5,,,Rue de Saint Marc 69440 Taluyers,,,Taluyers,69440,69241,69.0,84.0,69241_0326,834613.94,6505805.35,0.71,adresse non géocodée ban car aucune correspond...,128 Rue de Saint Marc,TALUYERS,69440,,,,,,,,,,,,,,,,,,,,très bonne,très bonne,,bonne,très bonne,0.36,,,,5393.6,58.2,3282.0,2129.1,0.0,159.3,0.0,5053.6,54.6,3282.0,1510.0,0.0,69.3,0.0,1105.5,12.0,745.0,483.3,0.0,4.8,0.0,Gaz naturel,4792.0,3282.0,1510.0,691.1,473.3,217.8,0.0,0.0,0.0,Électricité,69.3,0.0,0.0,190.2,0.0,0.0,4.8,0.0,0.0,,,,,,,,,,,786.3,473.3,217.8,0.0,190.2,0.0,Gaz naturel,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Non affecté,,,,,,,,,,,,,,,,,,,,,,,,,,,,0,,,,,,,,,,0.0,,"45.63852895723026,4.728144023871749",fa97eb201f113ff42e0e28a44baf3f0b9de0362ae317f9...,3058449157128,601513,Neuf,2025.0,2021,Après 2010


Gestion des valeurs manquantes

In [32]:
# Identifier et supprimer les colonnes entièrement vides
empty_cols = logements_merged.columns[logements_merged.isna().all()]
logements_merged = logements_merged.drop(columns=empty_cols)

# Supprimer colonnes inutiles
cols_to_drop = ["_geopoint", "_id", "_rand", "_i"]
existing_cols = [col for col in cols_to_drop if col in logements_merged.columns]
logements_merged = logements_merged.drop(columns=existing_cols)

print(f"✅ {len(empty_cols)} colonnes vides supprimées.")
print(f"✅ Colonnes inutiles supprimées : {existing_cols}")
print(f"📊 Nouvelle dimension du DF : {logements_merged.shape}")

✅ 7 colonnes vides supprimées.
✅ Colonnes inutiles supprimées : ['_geopoint', '_id', '_rand', '_i']
📊 Nouvelle dimension du DF : (470945, 200)


In [33]:
# Pourcentage de valeurs manquantes par colonne
missing_ratio = logements_merged.isna().mean().sort_values(ascending=False)
print(missing_ratio.head(100))  # affiche les 20 colonnes les plus vides

numero_rpls_logement                             0.999904
conso_chauffage_generateur_n2_installation_n2    0.998299
type_energie_generateur_n2_installation_n2       0.998161
usage_generateur_n2_installation_n2              0.998161
type_generateur_n2_installation_n2               0.998161
                                                   ...   
type_generateur_chauffage_principal_ecs          0.109238
conso_chauffage_generateur_n1_installation_n1    0.108314
conso_chauffage_installation_chauffage_n1        0.107320
usage_generateur_n1_installation_n1              0.107308
configuration_installation_chauffage_n1          0.107308
Length: 100, dtype: float64


In [34]:
print("📊 Taux moyen de valeurs manquantes avant nettoyage :")
print(f"{logements_merged.isna().mean().mean():.2%}")

# Sélection des colonnes à supprimer (plus de 80% de valeurs manquantes)
cols_to_drop = missing_ratio[missing_ratio > 0.5].index
print(f"📉 Colonnes à supprimer (>{50}% de valeurs manquantes) : {len(cols_to_drop)}")

# Suppression de ces colonnes
logements_merged = logements_merged.drop(columns=cols_to_drop)
print(f"✅ Colonnes supprimées : {len(cols_to_drop)}")
print(f"📊 Nouvelle dimension du DF : {logements_merged.shape}")

print("📊 Taux moyen de valeurs manquantes après nettoyage :")
print(f"{logements_merged.isna().mean().mean():.2%}")

📊 Taux moyen de valeurs manquantes avant nettoyage :
31.91%
📉 Colonnes à supprimer (>50% de valeurs manquantes) : 57
✅ Colonnes supprimées : 57
📊 Nouvelle dimension du DF : (470945, 143)
📊 Taux moyen de valeurs manquantes après nettoyage :
8.20%


In [35]:
# --- 💾 Sauvegarde du jeu de données nettoyé ---
output_path = f"../data/donnees_dpe_{DEPT_CODE}_clean.csv"
logements_merged.to_csv(output_path, index=False)

print("\n✅ Données nettoyées sauvegardées avec succès.")
print(f"📁 Fichier exporté : {output_path}")

# --- 🔍 Vérification rapide ---
print("\nAperçu final :")
display(logements_merged.head(3))

print("\nTypes de données :")
print(logements_merged.dtypes.head(10))

print("\nTaux de valeurs manquantes par colonne (top 10) :")
print(logements_merged.isna().mean().sort_values(ascending=False).head(10))



✅ Données nettoyées sauvegardées avec succès.
📁 Fichier exporté : ../data/donnees_dpe_69_clean.csv

Aperçu final :


Unnamed: 0,numero_dpe,date_derniere_modification_dpe,date_visite_diagnostiqueur,date_etablissement_dpe,date_reception_dpe,date_fin_validite_dpe,modele_dpe,version_dpe,methode_application_dpe,etiquette_dpe,etiquette_ges,classe_altitude,zone_climatique,type_batiment,type_installation_chauffage,type_installation_ecs,hauteur_sous_plafond,nombre_appartement,nombre_niveau_logement,surface_habitable_immeuble,surface_habitable_logement,classe_inertie_batiment,adresse_ban,numero_voie_ban,nom_rue_ban,nom_commune_ban,code_postal_ban,code_insee_ban,code_departement_ban,code_region_ban,identifiant_ban,coordonnee_cartographique_x_ban,coordonnee_cartographique_y_ban,score_ban,statut_geocodage,adresse_brut,nom_commune_brut,code_postal_brut,numero_etage_appartement,complement_adresse_batiment,complement_adresse_logement,indicateur_confort_ete,protection_solaire_exterieure,logement_traversant,presence_brasseur_air,inertie_lourde,isolation_toiture,deperditions_enveloppe,deperditions_ponts_thermiques,deperditions_murs,deperditions_planchers_hauts,deperditions_planchers_bas,deperditions_portes,deperditions_baies_vitrees,deperditions_renouvellement_air,qualite_isolation_enveloppe,qualite_isolation_murs,qualite_isolation_plancher_bas,qualite_isolation_menuiseries,ubat_w_par_m2_k,besoin_chauffage,besoin_ecs,besoin_refroidissement,conso_5_usages_ep,conso_5_usages_par_m2_ep,conso_chauffage_ep,conso_ecs_ep,conso_refroidissement_ep,conso_eclairage_ep,conso_auxiliaires_ep,conso_5_usages_ef,conso_5_usages_par_m2_ef,conso_chauffage_ef,conso_ecs_ef,conso_refroidissement_ef,conso_eclairage_ef,conso_auxiliaires_ef,emission_ges_5_usages,emission_ges_5_usages_par_m2,emission_ges_chauffage,emission_ges_ecs,emission_ges_refroidissement,emission_ges_eclairage,emission_ges_auxiliaires,type_energie_n1,conso_5_usages_ef_energie_n1,conso_chauffage_ef_energie_n1,conso_ecs_ef_energie_n1,cout_total_5_usages_energie_n1,cout_chauffage_energie_n1,cout_ecs_energie_n1,emission_ges_5_usages_energie_n1,emission_ges_chauffage_energie_n1,emission_ges_ecs_energie_n1,type_energie_n2,conso_5_usages_ef_energie_n2,conso_chauffage_ef_energie_n2,conso_ecs_ef_energie_n2,cout_total_5_usages_energie_n2,cout_chauffage_energie_n2,cout_ecs_energie_n2,emission_ges_5_usages_energie_n2,emission_ges_chauffage_energie_n2,emission_ges_ecs_energie_n2,cout_total_5_usages,cout_chauffage,cout_ecs,cout_refroidissement,cout_eclairage,cout_auxiliaires,type_energie_principale_chauffage,type_generateur_chauffage_principal,type_installation_chauffage_n1,type_emetteur_installation_chauffage_n1,configuration_installation_chauffage_n1,description_installation_chauffage_n1,conso_chauffage_installation_chauffage_n1,surface_chauffee_installation_chauffage_n1,type_generateur_n1_installation_n1,type_energie_generateur_n1_installation_n1,usage_generateur_n1_installation_n1,conso_chauffage_generateur_n1_installation_n1,type_energie_principale_ecs,type_generateur_chauffage_principal_ecs,type_installation_ecs_n1,configuration_installation_ecs_n1,description_installation_ecs_n1,conso_ef_installation_ecs_n1,nombre_logements_desservis_par_installation_ecs_n1,surface_habitable_desservie_par_installation_ecs_n1,type_installation_solaire_n1,type_generateur_n1_ecs_n1,type_energie_generateur_n1_ecs_n1,usage_generateur_n1_ecs_n1,description_generateur_n1_ecs_n1,volume_stockage_generateur_n1_ecs_n1,conso_ef_generateur_n1_ecs_n1,ventilation_posterieure_2012,production_electricite_pv_kwhep_par_an,Logement,annee_construction,annee_reception_DPE,periode_construction
0,2169N0340811D,2023-10-26,2021-06-30,2021-06-30,2021-07-01,2031-06-29,DPE NEUF logement : RT2012,1.0,dpe issu d'une étude thermique réglementaire R...,B,A,inférieur à 400m,H1c,maison,,,2.5,,1.0,,101.6,,La Picolette 69510 Yzeron,,,Yzeron,69510,69269,69.0,84.0,69269_dt014w,826468.95,6512440.36,0.85,adresse géocodée ban à l'adresse,La Picolette,THURINS,69510,,,,moyen,1.0,0.0,1.0,,1.0,,,,,,,,,très bonne,très bonne,très bonne,insuffisante,0.35,,,,7036.0,69.2,4979.0,1587.3,0.0,456.8,12.9,3059.1,30.1,2164.8,690.2,0.0,198.6,5.6,256.9,2.5,181.8,58.0,0.0,16.7,0.4,Électricité,3331.6,2164.8,690.2,1057.8,687.4,219.1,279.9,181.8,58.0,,,,,,,,,,,971.3,687.4,219.1,0.0,63.1,1.8,Électricité,,,,,,,,,,,,Non affecté,,,,,,,,,,,,,,,0,0.0,Neuf,2025.0,2021,Après 2010
1,2169N0093666O,2023-11-08,2021-07-04,2021-07-04,2021-07-05,2031-07-03,DPE NEUF logement : RT2012,1.0,dpe issu d'une étude thermique réglementaire R...,C,C,,,maison,,,2.5,1.0,1.0,92.5,92.5,,Rue de Saint Marc 69440 Taluyers,,Rue de Saint Marc,Taluyers,69440,69241,69.0,84.0,69241_0326,834613.94,6505805.35,0.59,adresse géocodée ban à l'adresse,Rue de Saint Marc,Taluyers,69440,,,,,,,,,,,,,,,,,,très bonne,très bonne,très bonne,très bonne,0.37,,,,6190.7,66.9,4024.1,2168.0,0.0,437.6,27.3,5433.5,58.7,3638.0,1522.0,0.0,190.2,11.9,1143.9,12.4,781.9,484.4,0.0,13.1,0.8,Gaz naturel,4851.0,3341.0,1510.0,700.0,482.0,218.0,0.0,0.0,0.0,Électricité,309.0,297.0,12.0,211.0,108.0,4.0,18.5,0.0,0.0,911.0,590.0,222.0,0.0,69.0,4.0,Gaz naturel,,,,,,,,,,,,Non affecté,,,,,,,,,,,,,,,0,0.0,Neuf,2025.0,2021,Après 2010
2,2169N0339060U,2023-11-08,2021-07-04,2021-07-04,2021-07-05,2031-07-03,DPE NEUF logement : RT2012,1.0,dpe issu d'une étude thermique réglementaire R...,C,C,,,maison,,,2.5,1.0,1.0,92.5,92.5,,Rue de Saint Marc 69440 Taluyers,,,Taluyers,69440,69241,69.0,84.0,69241_0326,834613.94,6505805.35,0.71,adresse non géocodée ban car aucune correspond...,128 Rue de Saint Marc,TALUYERS,69440,,,,,,,,,,,,,,,,,,très bonne,très bonne,bonne,très bonne,0.36,,,,5480.0,59.2,3341.0,2129.1,0.0,159.3,10.0,5124.5,55.4,3341.0,1510.0,0.0,69.3,4.3,1119.6,12.0,758.4,483.3,0.0,4.8,0.3,Gaz naturel,4851.0,3341.0,1510.0,699.6,481.8,217.8,0.0,0.0,0.0,Électricité,73.6,0.0,0.0,202.1,0.0,0.0,5.1,0.0,0.0,799.2,481.8,217.8,0.0,190.2,11.9,Gaz naturel,,,,,,,,,,,,Non affecté,,,,,,,,,,,,,,,0,0.0,Neuf,2025.0,2021,Après 2010



Types de données :
numero_dpe                         object
date_derniere_modification_dpe     object
date_visite_diagnostiqueur         object
date_etablissement_dpe             object
date_reception_dpe                 object
date_fin_validite_dpe              object
modele_dpe                         object
version_dpe                       float64
methode_application_dpe            object
etiquette_dpe                      object
dtype: object

Taux de valeurs manquantes par colonne (top 10) :
surface_habitable_immeuble        0.443022
inertie_lourde                    0.436110
isolation_toiture                 0.432099
presence_brasseur_air             0.428797
protection_solaire_exterieure     0.428797
logement_traversant               0.428797
indicateur_confort_ete            0.426942
annee_construction                0.390712
cout_chauffage_energie_n2         0.310480
cout_total_5_usages_energie_n2    0.310480
dtype: float64
