## Parisian population evolution from 2014 to 2024

### Libs import

In [16]:
import pandas as pd
import json
import requests

### Testing the call to Melodi, Insee's API, checking response status code.

In [17]:
url = "https://api.insee.fr/melodi/data/DS_ESTIMATION_POPULATION"
parameters = {"TIME_PERIOD" : "2025", "SEX" : "_T", "AGE" : "_T", "GEO" : "2023-DEP-75"}
response = requests.get(url, params=parameters)
print(response.status_code)

200


### Checking the JSON structure.

In [18]:
print(json.dumps(response.json(), indent=2, ensure_ascii=False))

{
  "identifier": "DS_ESTIMATION_POPULATION",
  "title": {
    "fr": "Les estimations de population",
    "en": "Population estimate"
  },
  "publisher": {
    "id": "INSEE",
    "label": [
      {
        "lang": "fr",
        "content": "Institut national de la statistique et des etudes economiques (INSEE)"
      },
      {
        "lang": "en",
        "content": "National Institute of Statistics and Economic Studies"
      }
    ]
  },
  "observations": [
    {
      "dimensions": {
        "GEO": "2023-DEP-75",
        "SEX": "_T",
        "TIME_PERIOD": "2025",
        "EP_MEASURE": "POP",
        "AGE": "_T"
      },
      "attributes": {
        "OBS_STATUS_FR": "PROV"
      },
      "measures": {
        "OBS_VALUE_NIVEAU": {
          "value": 2048472.0
        }
      }
    }
  ],
  "paging": {}
}


### DataFrame creation based on the JSON structure from API.

In [19]:
# Extracting list of observations/values
observations = response.json()["observations"]

# Unfolding each value to merge them into a list
flat_obs = []
for obs in observations:
    # Merging the three sub-dictionaries into one
    merged = {}
    merged.update(obs.get("dimensions", {}))
    merged.update(obs.get("attributes", {}))
    # For the last dictionary, checking for possible multiple values
    for k, v in obs.get("measures", {}).items():
        # if the value is a dictionary, keeping only the value
        if isinstance(v, dict) and "value" in v:
            merged[k] = v["value"]
        else:
            merged[k] = v
    flat_obs.append(merged)

# Dataframe creation from the list of unfold values
df = pd.DataFrame(flat_obs)
print(df)

           GEO SEX TIME_PERIOD EP_MEASURE AGE OBS_STATUS_FR  OBS_VALUE_NIVEAU
0  2023-DEP-75  _T        2025        POP  _T          PROV         2048472.0


In [20]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   GEO               1 non-null      object 
 1   SEX               1 non-null      object 
 2   TIME_PERIOD       1 non-null      object 
 3   EP_MEASURE        1 non-null      object 
 4   AGE               1 non-null      object 
 5   OBS_STATUS_FR     1 non-null      object 
 6   OBS_VALUE_NIVEAU  1 non-null      float64
dtypes: float64(1), object(6)
memory usage: 188.0+ bytes
None


### Duplicating the process for more data (data over a 10 years period (2014-2024), total population segmented by age)

In [22]:
url = "https://api.insee.fr/melodi/data/DS_ESTIMATION_POPULATION"
parameters = {"TIME_PERIOD" : ["2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023", "2024"], "SEX" : "_T", "GEO" : "2023-DEP-75", "maxResult" : 10000}

response = requests.get(url, params=parameters)
# Extracting list of observations/values
observations = response.json()["observations"]

# Unfolding each value to merge them into a list
flat_obs = []
for obs in observations:
    # Merging the three sub-dictionaries into one
    merged = {}
    merged.update(obs.get("dimensions", {}))
    merged.update(obs.get("attributes", {}))
    # For the last dictionary, checking for possible multiple values
    for k, v in obs.get("measures", {}).items():
        # if the value is a dictionary, keeping only the value
        if isinstance(v, dict) and "value" in v:
            merged[k] = v["value"]
        else:
            merged[k] = v
    flat_obs.append(merged)

# Dataframe creation from the list of unfold values
df = pd.DataFrame(flat_obs)
df = df[["TIME_PERIOD", "AGE", "OBS_VALUE_NIVEAU"]]
print(df.head(50))

   TIME_PERIOD     AGE  OBS_VALUE_NIVEAU
0         2024  Y60T74          298997.0
1         2015  Y30T34          185264.0
2         2017  Y75T79           59830.0
3         2021  Y85T89           33891.0
4         2023  Y40T59          519723.0
5         2024  Y20T39          704291.0
6         2015  Y45T49          141784.0
7         2020  Y30T34          191128.0
8         2017  Y65T69          111590.0
9         2020  Y70T74           98368.0
10        2018   Y_LT5          106798.0
11        2022    Y5T9           87962.0
12        2019  Y60T64          112014.0
13        2016  Y55T59          123982.0
14        2016  Y85T89           34384.0
15        2020  Y_GE75          173278.0
16        2019    Y5T9           97316.0
17        2019  Y50T54          134473.0
18        2018  Y45T49          141500.0
19        2018  Y20T39          733438.0
20        2017  Y70T74           83308.0
21        2023  Y60T74          302952.0
22        2017    Y5T9          100552.0
23        2018  

### Saving the results to a csv

In [23]:
df.to_csv("estimation-de-la-population_Insee.csv", sep=";", index=False)