## Import of data related to vehicles circulating in Paris from 2014 to 2022 (based on their type, ecological index, fuel type).

### Libs imports

In [1]:
import requests
import pandas as pd
import time

### Connecting to DiDo's API (and testing the response's status code)

In [20]:
code_commune = 75101
url= 'https://data.statistiques.developpement-durable.gouv.fr/dido/api/v1/datafiles/37dd7056-6c4d-44e0-a720-32d4064f9a26/rows' # database url
params = {
    "millesime": "2023-05",
    "page": 1,
    "pageSize": 20  
    }

response = requests.get(url, params=params)
print(response.status_code)
# print(response.json()["message"])  # In case of error (not 200), gives the error message
# print(response.json()["errors"])  # In case of error (not 200), gives all the types of errors

200


### Checking data we have connected to

In [21]:
pd.DataFrame(response.json()["data"])

Unnamed: 0,CLASSE_VEHICULE,CATEGORIE_VEHICULE,CARBURANT,CRITAIR,PARC_2011,PARC_2012,PARC_2013,PARC_2014,PARC_2015,PARC_2016,PARC_2017,PARC_2018,PARC_2019,PARC_2020,PARC_2021,PARC_2022,COMMUNE_CODE,COMMUNE_LIBELLE
0,vp,Véhicule particulier,Diesel,Crit'Air 3,1469,1537,1573,1573,1652,1638,1563,1516,1410,1326,1230,1136,13110,Trets
1,vp,Véhicule particulier,Diesel,Crit'Air 4,1176,1161,1122,1090,1030,1001,927,834,778,641,551,463,13110,Trets
2,vp,Véhicule particulier,Diesel,Crit'Air 5,477,431,389,378,355,286,262,226,201,169,147,116,13110,Trets
3,vp,Véhicule particulier,Diesel,Non classé,828,703,607,527,457,377,300,256,189,129,96,73,13110,Trets
4,vp,Véhicule particulier,Diesel HNR,Crit'Air 2,0,0,1,6,7,7,8,6,7,9,14,24,13110,Trets
5,vp,Véhicule particulier,Hybride rechargeable,Crit'Air 1,3,4,3,3,4,6,8,9,16,14,25,49,13110,Trets
6,vp,Véhicule particulier,Electrique et hydrogène,Crit'Air E,0,0,0,3,5,7,12,13,18,24,68,146,13110,Trets
7,vp,Véhicule particulier,Essence,Crit'Air 1,44,141,222,297,405,525,684,925,1148,1372,1587,1732,13110,Trets
8,vp,Véhicule particulier,Essence,Crit'Air 2,477,488,483,488,463,460,457,464,448,449,442,424,13110,Trets
9,vp,Véhicule particulier,Essence,Crit'Air 3,946,898,838,821,791,771,690,656,616,525,439,422,13110,Trets


### Main function importing all needed data from DiDo's database
#### (segmenting API calls according to districts, construction periods and diagnostician's time of visit, to avoid reaching the 10000 entries' limit of the ADEME's API)

In [None]:
def get_all_Paris_data():
    
    columns = ['CLASSE_VEHICULE', 'CATEGORIE_VEHICULE', 'CARBURANT', 'CRITAIR', 'PARC_2014', 'PARC_2015', 'PARC_2016', 'PARC_2017', 'PARC_2018', 'PARC_2019', 'PARC_2020', 'PARC_2021', 'PARC_2022', 'COMMUNE_CODE', 'COMMUNE_LIBELLE']
    final_df = pd.DataFrame(columns=columns)
    
    # Segmentation
    codes_postaux = ['75101', '75102', '75103', '75104', '75105', '75106', '75107', '75108', '75109', '75110', '75111', '75112', '75113', '75114', '75115', '75116', '75117', '75118', '75119', '75120']
    
    batch_size = 100
    segment_count = 0
    total_segments = len(codes_postaux) * len(annees)
    
    for code in codes_postaux:
        segment_count += 1
        print(f"Segment {segment_count}/{total_segments} : code {code}, année {annee}")
        offset = 0
        while True:
            url = "https://data.statistiques.developpement-durable.gouv.fr/dido/api/v1/datafiles/37dd7056-6c4d-44e0-a720-32d4064f9a26/rows"
            params = {
                "millesime": "2023-05",
                "limit": batch_size,
                "offset": offset,
                "filters": f"code_postal={code};annee={annee}"  # À adapter selon la doc
            }
            response = requests.get(url, params=params)
            if response.status_code != 200:
                print(f"Erreur HTTP : {response.status_code}")
                print(response.text)
                break
            data = response.json()
            batch = data.get("data", [])
            if not batch:
                print("Aucune donnée pour ce segment")
                break
            batch_df = pd.DataFrame(batch)
            final_df = pd.concat([final_df, batch_df], ignore_index=True)
            print(f"Total enregistrements : {len(final_df)}")
            if len(batch) < batch_size:
                break
            offset += batch_size
            # Sauvegarde intermédiaire
            if len(final_df) % 50000 < batch_size:
                temp_filename = f"newapi_temp_{len(final_df)}.csv"
                final_df.to_csv(temp_filename, index=False)
                print(f"Sauvegarde intermédiaire : {temp_filename}")
            time.sleep(0.2)  # Pour éviter de surcharger l’API
    final_df.to_csv("newapi_final.csv", index=False)
    print(f"Récupération terminée. Total : {len(final_df)} lignes")
    return final_df

# Lancer la fonction
df = get_all_Paris_data()

### Checking imported data

In [None]:
df.info()

In [None]:
df.head()

### Exporting data to a .csv

In [38]:
df.to_csv("parc_vehicules_au_niveau_communal_Statistiques_Developpement_Durable.csv")