# Titre....

## Introduction ##

POI : Point Of Interest

## I. Collecte des données

Attention : le code peut être exécuté mais la partie concernant les POI est très longue à calculer (entre 30 minutes et 1 heure). C'est pourquoi tous les fichiers sont stockés dans le dépôt pour être utilisables directement.

### Importations

In [None]:
import os
import io
import zipfile
import logging
import requests
import pandas as pd
import geopandas as gpd
from datetime import datetime
from pathlib import Path

In [None]:
! pip install osmnx

### Set up

In [None]:
# Chemins
DATA_DIR = Path("Projet-data-science/data")
GTFS_DIR = DATA_DIR / "gtfs"
POI_DIR  = DATA_DIR / "poi"

# Délimitation de l'Ile-de-France
coos_IDF = {
    "south": 48.0,
    "north": 49.2,
    "west": 1.4,
    "east": 3.6,
}

# POI : sélection des POI les plus utiles pour notre analyse
POI_CATEGORIES = {
    "commerce": {"tags": {"shop": True}},
    "restaurants": {"tags": {"amenity": "restaurant"}},
    "bureaux": {"tags": {"office": True}},
    "administration": {"tags": {"amenity": ["townhall", "Government"]}},
    "culture": {"tags": {"amenity": ["theatre", "museum", "cinema", "arts_centre"]}},
    "education": {"tags": {"amenity": "school"}},
    "sante": {"tags": {"amenity": "hospital"}},
    "logement": {"tags": {"building": ["residential", "apartments", "house", "detached", "semi-detached"]}},
    "monument": {"tags": {"historic": "monument", "tourism": "attraction"}},
    "sports": {"tags": {"Leisure": ["sports_centre", "pitch", "stadium", "swimming_pool"]}},
    "commerce_proximite": {"tags":{"amenity": ["cafe","bar","fast_food"], "shop": ["bakery","supermarket","convenience"]}},
    "commerce_majeur": {"tags":{"shop": ["mall","department_store"]}},
    "education2": {"tags":{"amenity": ["school","kindergarten","college","university"]}},
    "sante2": {"tags":{"amenity": ["hospital", "clinic", "doctors"]}},
    "administration2": {"tags":{"amenity": ["townhall","courthouse","police","post_office"]}},
    "culture_tourisme": {"tags":{"amenity":["museum","cinema","theatre"], "tourism":["attraction","museum"], "historic":"monument"}},
    "bureaux2": {"tags":{"office": ["company", "corporate", "it", "administrative", "government"]}}, 
    "sports_loisirs": {"tags":{"leisure": ["sports_centre","stadium","pitch","swimming_pool"]}}
}

GTFS IDFM

In [None]:
def telecharger_gtfs_idfm():
    url = "https://eu.ftp.opendatasoft.com/stif/GTFS/IDFM-gtfs.zip"
    r = requests.get(url, timeout=120)
    r.raise_for_status()

    with zipfile.ZipFile(io.BytesIO(r.content)) as z:
        for f in ["agency.txt","calendar_dates.txt","stops.txt", "routes.txt", "trips.txt", "stop_times.txt","calendar.txt","transfers.txt","trips.txt"]:
            if f in z.namelist():
                z.extract(f, GTFS_DIR)
                print(f"GTFS extrait : {f}")

    gtfs = {}
    for f in ["agency.txt","calendar_dates.txt","stops.txt", "routes.txt", "trips.txt", "stop_times.txt","calendar.txt","transfers.txt","trips.txt"]:
        path = GTFS_DIR / f
        if path.exists():
            gtfs[f.split(".")[0]] = pd.read_csv(path)

    return gtfs

POI Open Street Map

In [None]:
def extraire_poi_osm(categorie):

    tags = POI_CATEGORIES[categorie]["tags"]
    bbox = (
        coos_IDF["west"],
        coos_IDF["south"],
        coos_IDF["east"],
        coos_IDF["north"],
    )

    gdf = ox.features_from_bbox(bbox, tags)

    # Simplification : on prend le centroïde pour tous les objets
    if "geometry" in gdf.columns:
        gdf["geometry"] = gdf.geometry.centroid

    # Sauvegarde GeoJSON
    geojson_path = POI_DIR / f"poi_{categorie}.geojson"
    gdf.to_file(geojson_path, driver="GeoJSON")

    # Sauvegarde Parquet optimisé, permet de les stocker sur GitHub et un traitement plus rapide
    parquet_path = POI_DIR / f"poi_{categorie}.parquet"
    gdf.to_parquet(parquet_path, index=False)

    print(f"POI {categorie} exporté : {geojson_path.name} + {parquet_path.name}")

    return gdf


Utilisation

In [None]:
if __name__ == "__main__":
    # GTFS
    telecharger_gtfs_idfm()
    stop_times = pd.read_csv("data/gtfs/stop_times.txt")
    # Conversion en parquet pour pouvoir push stop_times.txt (il était trop lourd)
    stop_times.to_parquet("data/gtfs/stop_times.parquet", compression="snappy") 
    
    # POI
    for cat in POI_CATEGORIES:
        out_file = POI_DIR / f"poi_{cat}.geojson"
        extraire_poi_osm(cat)

    print("=== Collecte terminée ===")


## II. Statistiques descriptives