In [68]:
#Mise en place de la dataframe Pandas (exécution environ 10s)

import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import requests
import io

json_url = "https://equipements.sports.gouv.fr/api/explore/v2.1/catalog/datasets/data-es/exports/json?lang=fr&timezone=Europe%2FParis&select=equip_nom%2C%20equip_service_date%2C%20equip_douche%2C%20equip_sanit%2C%20equip_travaux_date&where=reg_nom%20like%20%27%C3%8Ele-de-France%27"
req = requests.get(json_url)
json_data = req.json()
df_dirty = pd.DataFrame(json_data)
df_dirty['equip_service_date'] = pd.to_numeric(df_dirty['equip_service_date'], errors='coerce') # Conversion de variable en float
df_dirty['equip_travaux_date'] = pd.to_numeric(df_dirty['equip_travaux_date'], errors='coerce') # Conversion de variable en float
df = df.copy()
df['equip_service_date'] = df['equip_service_date'].fillna(0) # Remplacement des NaN par 0
df['equip_travaux_date'] = df['equip_travaux_date'].fillna(0) # Remplacement des NaN par 0

In [57]:
#Test
df.head(10)

Unnamed: 0,equip_nom,equip_service_date,equip_douche,equip_sanit,equip_travaux_date,derniere_renov
1,TERRAIN DE FOOTBALL SYNTHETIQUE,2009.0,True,True,0.0,2009.0
2,PISCINE,1975.0,,True,1995.0,1995.0
3,GRANDE SALLE,2000.0,True,True,0.0,2000.0
6,PLATEAU EPS,1994.0,False,False,0.0,1994.0
11,Salle polyvalente,1960.0,False,True,0.0,1960.0
13,COURT DE TENNIS COUVERT 1,1982.0,True,True,0.0,1982.0
14,TERRAIN DE FOOTBALL D'HONNEUR ENTRAINEMENT,1982.0,True,True,0.0,1982.0
15,TERRAIN DE FOOTBAL N°3,1993.0,True,True,0.0,1993.0
16,TERRAIN DE FOOTBALL (en stabilisé),1982.0,True,True,0.0,1982.0
17,COURT DE TENNIS COUVERT 2,1982.0,True,True,0.0,1982.0


In [None]:
# Conversion en bool des variables de qualité
df['equip_douche'] = (df['equip_douche'].astype(bool)).astype(int)
df['equip_sanit'] = (df['equip_sanit'].astype(bool)).astype(int)

In [56]:
# Variable représentant la dernière rénovation en date (equip_travaux_date vaut 0 s'il n'y a jamais eu de travaux)
df['derniere_renov'] = np.maximum(df['equip_travaux_date'], df['equip_service_date'])

In [58]:
# Min et max de la dernière rénovation
borne_min = df['derniere_renov'].min()
borne_max = df['derniere_renov'].max()

In [59]:
# Création des var. d'ancienneté
df['coef_recent'] = (df['derniere_renov'] - borne_min)/(borne_max - borne_min) # Coef. représentant l'ancienneté d'une infrastructure en comparaison aux autres de l'échantillon
df['récent'] = (df['derniere_renov']>=2000).astype(int)
df['moyen'] = ((df['derniere_renov']>=1980) & (df['derniere_renov']<2000)).astype(int)
df['ancien'] = ((df['derniere_renov'] >= borne_min) & (df['derniere_renov'] < 1980)).astype(int) # Années choisies arbitrairement ici,

In [61]:
#Test
df.iloc[1000]

equip_nom             MUR D'ESCALADE
equip_service_date            2013.0
equip_douche                       1
equip_sanit                        1
equip_travaux_date               0.0
derniere_renov                2013.0
coef_recent                 0.966387
récent                             1
moyen                              0
ancien                             0
Name: 1306, dtype: object

In [62]:
# Définition du coeff de vétusté
df['coef_vetuste'] = 0.6 * df['ancien'] + 0.3 * df['moyen'] + 0.2 * (1-df['equip_douche']) + 0.2 * (1-df['equip_sanit'])

In [63]:
#Test
df.head(10)

Unnamed: 0,equip_nom,equip_service_date,equip_douche,equip_sanit,equip_travaux_date,derniere_renov,coef_recent,récent,moyen,ancien,coef_vetuste
1,TERRAIN DE FOOTBALL SYNTHETIQUE,2009.0,1,1,0.0,2009.0,0.955182,1,0,0,0.0
2,PISCINE,1975.0,0,1,1995.0,1995.0,0.915966,0,1,0,0.5
3,GRANDE SALLE,2000.0,1,1,0.0,2000.0,0.929972,1,0,0,0.0
6,PLATEAU EPS,1994.0,1,1,0.0,1994.0,0.913165,0,1,0,0.3
11,Salle polyvalente,1960.0,1,1,0.0,1960.0,0.817927,0,0,1,0.6
13,COURT DE TENNIS COUVERT 1,1982.0,1,1,0.0,1982.0,0.879552,0,1,0,0.3
14,TERRAIN DE FOOTBALL D'HONNEUR ENTRAINEMENT,1982.0,1,1,0.0,1982.0,0.879552,0,1,0,0.3
15,TERRAIN DE FOOTBAL N°3,1993.0,1,1,0.0,1993.0,0.910364,0,1,0,0.3
16,TERRAIN DE FOOTBALL (en stabilisé),1982.0,1,1,0.0,1982.0,0.879552,0,1,0,0.3
17,COURT DE TENNIS COUVERT 2,1982.0,1,1,0.0,1982.0,0.879552,0,1,0,0.3


In [66]:
# Calcul moyenne et médiane d'ancienneté
moyenne = df['coef_recent'].mean()
mediane = df['coef_recent'].median()
[moyenne, mediane]
# On voit qu'en IDF, les infrastructures sont très récentes :)

[np.float64(0.9169621849945939), np.float64(0.9299719887955182)]

In [67]:
# Ne conserver que les anciennetés inférieures à un certain seuil
df_filtre = df[df['coef_recent'] < 0.7]
df_filtre.head(20)

Unnamed: 0,equip_nom,equip_service_date,equip_douche,equip_sanit,equip_travaux_date,derniere_renov,coef_recent,récent,moyen,ancien,coef_vetuste
1904,TERRAIN DE VOLLEY BALL N°2,1870.0,1,1,0.0,1870.0,0.565826,0,0,1,0.6
1976,PLATEAU MULTISPORTS,1898.0,1,1,0.0,1898.0,0.644258,0,0,1,0.6
2078,MANEGE,1906.0,1,1,0.0,1906.0,0.666667,0,0,1,0.6
2193,GYMNASE CHARCOT,1873.0,1,1,0.0,1873.0,0.57423,0,0,1,0.6
2232,SALLE DE CARDIOTRAINING,1913.0,1,1,0.0,1913.0,0.686275,0,0,1,0.6
4529,GYMNASE 1,1886.0,1,1,0.0,1886.0,0.610644,0,0,1,0.6
4570,SALLE DE JEU DE PAUME,1907.0,1,1,0.0,1907.0,0.669468,0,0,1,0.6
4593,PETIT MANEGE,1868.0,1,1,0.0,1868.0,0.560224,0,0,1,0.6
4619,TERRAIN DE POLO,1892.0,1,1,0.0,1892.0,0.627451,0,0,1,0.6
4670,TERRAIN MADRID,1894.0,1,1,0.0,1894.0,0.633053,0,0,1,0.6


In [21]:
# equip_nom, equip_service_date, equip_douche, equip_sanit, equip_travaux_date

In [26]:
# Lien json
# https://equipements.sports.gouv.fr/api/explore/v2.1/catalog/datasets/data-es/exports/json?lang=fr&timezone=Europe%2FParis&select=equip_nom%2C%20equip_service_date%2C%20equip_douche%2C%20equip_sanit%2C%20equip_travaux_date&where=reg_nom%20like%20%27%C3%8Ele-de-France%27

# Lien CSV
# https://equipements.sports.gouv.fr/api/explore/v2.1/catalog/datasets/data-es/exports/csv?lang=fr&timezone=Europe%2FParis&use_labels=true&delimiter=%3B&select=equip_nom%2C%20equip_service_date%2C%20equip_douche%2C%20equip_sanit%2C%20equip_travaux_date&where=reg_nom%20like%20%27%C3%8Ele-de-France%27