## Import of data related to new housing (post 2021), with DPE (Energetic Performance Diagnosis) and GES (Greenhouse Gas) scores.

### Libs imports

In [41]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv

In [42]:
# Loading environment variables
load_dotenv()
API_KEY = os.getenv('ADEME_API_KEY')

### Connecting to ADEME's API (and testing the response's status code)

In [48]:
url=  'https://data.ademe.fr/data-fair/api/v1/datasets/dpe02neuf/lines'
headers = {
        'accept': 'application/json',
        'x-apikey': API_KEY
        }
params = {
        'select': '_geopoint,adresse_ban,numero_dpe,etiquette_dpe,etiquette_ges,conso_5_usages_par_m2_ef,emission_ges_5_usages_par_m2,categorie_enr,date_visite_diagnostiqueur',
        'qs': 'code_departement_ban:75'
        }
response = requests.get(url, headers=headers, params=params)
response.status_code

200

### Checking data we have connected to

In [49]:
pd.DataFrame(response.json()["results"])

Unnamed: 0,categorie_enr,adresse_ban,conso_5_usages_par_m2_ef,etiquette_ges,_geopoint,date_visite_diagnostiqueur,emission_ges_5_usages_par_m2,numero_dpe,etiquette_dpe,_score
0,Il existe plusieurs descriptifs ENR,8 Rue des Ardennes 75019 Paris,61.1,C,"48.88783098977528,2.3865770061142113",2022-11-17,12.3,2375N1958716O,C,
1,panneaux solaires photovoltaïques,122 Rue Damrémont 75018 Paris,58.8,C,"48.895067036589474,2.3375080036996607",2022-01-19,12.9,2275N0166221K,C,
2,,42 Avenue du Général Leclerc 75014 Paris,80.8,C,"48.83111701786987,2.3294309826496624",2022-07-17,17.2,2275N1629968J,C,
3,pompe à chaleur,16 Rue Germaine Krull 75013 Paris,33.0,A,"48.816886994730005,2.3589389686490447",2024-12-09,2.0,2575N0137735X,A,
4,,37 Rue Saint-Lambert 75015 Paris,96.0,C,"48.836651033793174,2.2930699414075817",2021-07-19,15.0,2175N0503783N,C,
5,,59b Rue de la Chapelle 75018 Paris,53.5,C,"48.89535897420391,2.3585060634025963",2022-09-07,19.7,2275N2041576L,C,
6,,182 Rue d'Aubervilliers 75019 Paris,80.3,C,"48.89502602383224,2.371699011130133",2022-06-08,17.5,2275N1346180L,C,
7,,182 Rue d'Aubervilliers 75019 Paris,73.1,C,"48.89502602383224,2.371699011130133",2022-06-08,15.9,2275N1346243W,C,
8,panneaux solaires thermiques,48 Avenue de Breteuil 75007 Paris,57.0,A,"48.84995400321463,2.311448012963403",2021-10-03,1.0,2175N0500056E,B,
9,,182 Rue d'Aubervilliers 75019 Paris,89.8,C,"48.89502602383224,2.371699011130133",2022-06-08,19.7,2275N1345935A,C,


### Main function importing all data from ADEME's database

In [52]:
def get_all_dpe_data():
    all_results = []
    offset = 0  # 
    limit = 10000  # Maximum size per request
    columns = ['_geopoint', 'adresse_ban', 'numero_dpe', 'etiquette_dpe', 'etiquette_ges', 
              'conso_5_usages_par_m2_ef', 'emission_ges_5_usages_par_m2', 'categorie_enr','date_visite_diagnostiqueur']
    
    while True:
        url=  'https://data.ademe.fr/data-fair/api/v1/datasets/dpe02neuf/lines'
        headers = {
                'accept': 'application/json',
                'x-apikey': API_KEY
        }
        params = {
            'size': limit,
            'skip': offset,
            'select': '_geopoint,adresse_ban,numero_dpe,etiquette_dpe,etiquette_ges,conso_5_usages_par_m2_ef,emission_ges_5_usages_par_m2,categorie_enr,date_visite_diagnostiqueur',
            'qs': 'code_departement_ban:75'
        }
        
        response = requests.get(url, headers=headers, params=params)
        
        # Checking HTTP response code and displaying error if any
        if response.status_code != 200:
            print(f"HTTP Error: {response.status_code}")
            print(f"Response: {response.text}")
            break
            
        try:
            data = response.json()
            batch = data.get("results", [])
            
            if not batch:  # If no more results
                break
                
            all_results.extend(batch)
            print(f"{len(all_results)} recordings fetched...")
            
            # If the total number is known, display it
            total = data.get("total", 0)
            if total > 0:
                print(f"Progress: {len(all_results)}/{total}")
            
            if len(batch) < limit:  # If batch' size is less than the limit, then we've reached the last batch
                break
                
            offset += len(batch)  # Offseting the request by the size of the current batch
            
        except json.JSONDecodeError as e:
            print(f"JSON decoding error on offset {offset}")
            print(f"Response: {response.text[:200]}...")  # Displaying the beginning of the response
            break
    
    print(f"\nFetching completed. Total: {len(all_results)} recordings")
    return pd.DataFrame(all_results, columns=columns)

# Running the main function
df = get_all_dpe_data()
print(f"\nFinal DataFrame's shape: {df.shape}")

6096 recordings fetched...
Progress: 6096/6096

Fetching completed. Total: 6096 recordings

Final DataFrame's shape: (6096, 9)


### Checking imported data

In [54]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6096 entries, 0 to 6095
Data columns (total 9 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   _geopoint                     6096 non-null   object 
 1   adresse_ban                   6096 non-null   object 
 2   numero_dpe                    6096 non-null   object 
 3   etiquette_dpe                 6096 non-null   object 
 4   etiquette_ges                 6096 non-null   object 
 5   conso_5_usages_par_m2_ef      6096 non-null   float64
 6   emission_ges_5_usages_par_m2  6096 non-null   float64
 7   categorie_enr                 3212 non-null   object 
 8   date_visite_diagnostiqueur    6096 non-null   object 
dtypes: float64(2), object(7)
memory usage: 428.8+ KB


In [55]:
df.head()

Unnamed: 0,_geopoint,adresse_ban,numero_dpe,etiquette_dpe,etiquette_ges,conso_5_usages_par_m2_ef,emission_ges_5_usages_par_m2,categorie_enr,date_visite_diagnostiqueur
0,"48.88783098977528,2.3865770061142113",8 Rue des Ardennes 75019 Paris,2375N1958716O,C,C,61.1,12.3,Il existe plusieurs descriptifs ENR,2022-11-17
1,"48.895067036589474,2.3375080036996607",122 Rue Damrémont 75018 Paris,2275N0166221K,C,C,58.8,12.9,panneaux solaires photovoltaïques,2022-01-19
2,"48.83111701786987,2.3294309826496624",42 Avenue du Général Leclerc 75014 Paris,2275N1629968J,C,C,80.8,17.2,,2022-07-17
3,"48.816886994730005,2.3589389686490447",16 Rue Germaine Krull 75013 Paris,2575N0137735X,A,A,33.0,2.0,pompe à chaleur,2024-12-09
4,"48.836651033793174,2.2930699414075817",37 Rue Saint-Lambert 75015 Paris,2175N0503783N,C,C,96.0,15.0,,2021-07-19


### Exporting data to a .csv

In [56]:
df.to_csv("dpe_logements_neufs_21_ademe.csv")