Le but de ce code est de modifier comme il se doit le data set ytrain pour avoir des données par heures et dans un carré


In [1]:
import folium
import pandas as pd
from folium.plugins import TimestampedGeoJson
from shapely.geometry import Point
import numpy as np
from math import radians, sin, cos, sqrt, atan2
from branca.colormap import linear
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import box
from scipy.spatial import cKDTree

In [2]:
data = pd.read_csv("hourly_df_semaine2229.csv")
df_real = pd.DataFrame(data)
df_real['taux_dispo'] =  (df_real['Available'] + df_real['Offline']/3)/3
df_real = df_real[['timestamp','latitude','longitude','taux_occup','taux_dispo']]
df_real.head(2)

Unnamed: 0,timestamp,latitude,longitude,taux_occup,taux_dispo
0,2020-01-22 00:00:00+00:00,48.855667,2.354089,0.666667,0.333333
1,2020-01-22 00:00:00+00:00,48.86424,2.397724,0.666667,0.333333


In [3]:

data_traffic= pd.read_csv("traffic2229.csv")
df_traffic = pd.DataFrame(data_traffic)


df_traffic.head(5)


Unnamed: 0.1,Unnamed: 0,k,t_1h,lat,lon
0,0,0.28167,2020-01-22 01:00:00,48.820906,2.355258
1,1,0.14611,2020-01-22 02:00:00,48.820906,2.355258
2,2,0.10333,2020-01-22 03:00:00,48.820906,2.355258
3,3,0.02778,2020-01-22 04:00:00,48.820906,2.355258
4,4,0.11778,2020-01-22 05:00:00,48.820906,2.355258


In [4]:
# Fonction haversine
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Rayon de la Terre en kilomètres
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat / 2) ** 2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2) ** 2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c

In [5]:


# Définir les limites de la grille (basé sur Paris)
lat_min, lat_max = 48.81, 48.92
lon_min, lon_max = 2.255, 2.42
step = 0.005  # Taille d'un carré (grille de 0.005°)

# Créer les carrés de la grille
latitudes = np.arange(lat_min, lat_max, step)
longitudes = np.arange(lon_min, lon_max, step)

grille_data = []

In [6]:
# Charger le fichier GeoJSON des arrondissements de Paris
arrondissements_geojson = "arrondissements.geojson"  
arrondissements_gdf = gpd.read_file(arrondissements_geojson)  # Charger les arrondissements

In [7]:
# Coordonnées de Charenton-le-Pont et du Jardin d'Acclimatation
charenton_lat, charenton_lon = 48.8337, 2.4149
jardin_lat, jardin_lon = 48.8716, 2.2611



# Distance d'exclusion (en kilomètres)
exclusion_radius = 1  # 2 km autour de ces points

# Fonction pour vérifier si un carré est proche d'un des lieux d'exclusion
def is_near_exclusion(lat, lon):
    distance_to_charenton = haversine(lat, lon, charenton_lat, charenton_lon)
    distance_to_jardin = haversine(lat, lon, jardin_lat, jardin_lon)
    return distance_to_charenton < exclusion_radius or distance_to_jardin < exclusion_radius


In [8]:
def haversine_np(lat1, lon1, lat2, lon2):
    """
    Calcul vectorisé de la distance entre deux ensembles de points (lat1, lon1) et (lat2, lon2).
    :return: Tableau NumPy des distances.
    """
    R = 6371  # Rayon de la Terre en km
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
    c = 2 * np.arcsin(np.sqrt(a))
    return R * c


In [23]:


def calculate_distances_multiple_metrics_optimized_aveck(time_series, latitudes, longitudes, df_stations, arrondissements_gdf, df_k, square_size=0.005):
    """
    Calcule les distances et les métriques pour les stations les plus proches, les 2 plus proches, et les 3 plus proches,
    en utilisant des opérations optimisées avec NumPy et Pandas.
    La fonction calcule aussi la moyenne de k pour chaque carré défini par lat, lon.
    
    :param time_series: Liste ou tableau des timestamps.
    :param latitudes: Liste des latitudes de la grille.
    :param longitudes: Liste des longitudes de la grille.
    :param df_stations: DataFrame contenant les données des stations (avec un 'timestamp').
    :param arrondissements_gdf: GeoDataFrame des arrondissements de Paris.
    :param df_k: DataFrame contenant les valeurs de `k` (avec 't_1h', 'lat', 'lon', 'k').
    :param square_size: Taille approximative du carré autour de chaque centre (en degrés, ajustable).
    :return: DataFrame contenant les distances et métriques pour chaque carré à chaque timestamp.
    """
    # Grille de points (centres des carrés)
    grid_lat = np.array([(latitudes[i] + latitudes[i + 1]) / 2 for i in range(len(latitudes) - 1)])
    grid_lon = np.array([(longitudes[j] + longitudes[j + 1]) / 2 for j in range(len(longitudes) - 1)])
    grid_centers = np.array(np.meshgrid(grid_lat, grid_lon)).T.reshape(-1, 2)  # Format (N, 2)
    
    # Filtrer les points de la grille en fonction de Paris
    grid_points = pd.DataFrame(grid_centers, columns=['lat', 'lon'])
    grid_points['geometry'] = grid_points.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
    grid_points = grid_points[grid_points['geometry'].apply(lambda p: arrondissements_gdf.geometry.union_all().contains(p))]
    valid_grid_centers = grid_points[['lat', 'lon']].values  # Seulement les centres valides

    results = []

    for timestamp in time_series:
        # Stations disponibles pour l'instant donné
        df_stations_at_t = df_stations[df_stations['timestamp'] == timestamp].copy()
        df_stations_at_t = df_stations_at_t[df_stations_at_t['taux_dispo'] >= 1 / 3]

        if df_stations_at_t.empty:
            continue

        # Construire un arbre k-d pour rechercher efficacement les distances
        station_coords = df_stations_at_t[['latitude', 'longitude']].values
        kdtree = cKDTree(station_coords)

        # Trouver les 3 plus proches stations pour chaque point de la grille
        distances, indices = kdtree.query(valid_grid_centers, k=3)  # distances: (N, 3), indices: (N, 3)

        # Calcul des métriques et jointure avec le DataFrame contenant 'k'
        for i, (lat, lon) in enumerate(valid_grid_centers):
            closest_1 = distances[i, 0]
            closest_2 = distances[i, :2].mean()
            closest_3 = distances[i, :3].mean()

            taux_dispo_1 = df_stations_at_t.iloc[indices[i, 0]]['taux_dispo']
            taux_dispo_2 = df_stations_at_t.iloc[indices[i, :2]]['taux_dispo'].mean()
            taux_dispo_3 = df_stations_at_t.iloc[indices[i, :3]]['taux_dispo'].mean()

            # Filtrer les stations 'k' dans le carré (lat, lon)
            lat_min = lat - square_size / 2
            lat_max = lat + square_size / 2
            lon_min = lon - square_size / 2
            lon_max = lon + square_size / 2

            # Sélectionner les stations dans le carré
            stations_in_square = df_k[
                (df_k['lat'] >= lat_min) & (df_k['lat'] <= lat_max) &
                (df_k['lon'] >= lon_min) & (df_k['lon'] <= lon_max) &
                (df_k['t_1h'] == timestamp)
            ]

            if not stations_in_square.empty:
                moyenne_k = stations_in_square['k'].mean()  # Moyenne des valeurs de 'k' pour ce carré et timestamp
            else:
                moyenne_k = np.nan  # Si aucune station dans le carré, assigner NaN

            results.append({
                'timestamp': timestamp,
                'center_lat': lat,
                'center_lon': lon,
                'distance_to_closest_1': closest_1,
                'taux_dispo_closest_1': taux_dispo_1,
                'distance_to_closest_2': closest_2,
                'taux_dispo_closest_2': taux_dispo_2,
                'distance_to_closest_3': closest_3,
                'taux_dispo_closest_3': taux_dispo_3,
                'normalized_distance': closest_1 / distances[:, 0].max() if distances[:, 0].max() > 0 else 0,
                'moyenne_k': moyenne_k  # Ajouter la colonne 'moyenne_k'
            })

    return pd.DataFrame(results)



In [26]:


def calculate_distances_multiple_metrics_optimized_dispo(time_series, latitudes, longitudes, df_stations, arrondissements_gdf):
    """
    Calcule les distances et les métriques pour les stations les plus proches, les 2 plus proches, et les 3 plus proches,
    en utilisant des opérations optimisées avec NumPy et Pandas.
    
    :param time_series: Liste ou tableau des timestamps.
    :param latitudes: Liste des latitudes de la grille.
    :param longitudes: Liste des longitudes de la grille.
    :param df_stations: DataFrame contenant les données des stations (avec un 'timestamp').
    :param arrondissements_gdf: GeoDataFrame des arrondissements de Paris.
    :return: DataFrame contenant les distances et métriques pour chaque carré à chaque timestamp.
    """
    # Grille de points (centres des carrés)
    grid_lat = np.array([(latitudes[i] + latitudes[i + 1]) / 2 for i in range(len(latitudes) - 1)])
    grid_lon = np.array([(longitudes[j] + longitudes[j + 1]) / 2 for j in range(len(longitudes) - 1)])
    grid_centers = np.array(np.meshgrid(grid_lat, grid_lon)).T.reshape(-1, 2)  # Format (N, 2)
    
    # Filtrer les points de la grille en fonction de Paris
    grid_points = pd.DataFrame(grid_centers, columns=['lat', 'lon'])
    grid_points['geometry'] = grid_points.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
    grid_points = grid_points[grid_points['geometry'].apply(lambda p: arrondissements_gdf.geometry.union_all().contains(p))]
    valid_grid_centers = grid_points[['lat', 'lon']].values  # Seulement les centres valides

    results = []

    for timestamp in time_series:
        # Stations disponibles pour l'instant donné
        df_stations_at_t = df_stations[df_stations['timestamp'] == timestamp].copy()
        df_stations_at_t = df_stations_at_t[df_stations_at_t['taux_dispo'] >= 1 / 3]

        if df_stations_at_t.empty:
            continue

        # Construire un arbre k-d pour rechercher efficacement les distances
        station_coords = df_stations_at_t[['latitude', 'longitude']].values
        kdtree = cKDTree(station_coords)

        # Trouver les 3 plus proches stations pour chaque point de la grille
        distances, indices = kdtree.query(valid_grid_centers, k=3)  # distances: (N, 3), indices: (N, 3)

        # Calcul des métriques
        for i, (lat, lon) in enumerate(valid_grid_centers):
            closest_1 = distances[i, 0]
            closest_2 = distances[i, 1]
            closest_3 = distances[i, 2]

            taux_dispo_1 = df_stations_at_t.iloc[indices[i, 0]]['taux_dispo']
            taux_dispo_2 = df_stations_at_t.iloc[indices[i, 1]]['taux_dispo']
            taux_dispo_3 = df_stations_at_t.iloc[indices[i, 2]]['taux_dispo']

           

            results.append({
                'timestamp': timestamp,
                'center_lat': lat,
                'center_lon': lon,
                'distance_to_closest_1': closest_1,
                'taux_dispo_closest_1': taux_dispo_1,
                'distance_to_closest_2': closest_2,
                'taux_dispo_closest_2': taux_dispo_2,
                'distance_to_closest_3': closest_3,
                'taux_dispo_closest_3': taux_dispo_3,
                'normalized_distance': closest_1 / distances[:, 0].max() if distances[:, 0].max() > 0 else 0
            })

    return pd.DataFrame(results)


In [52]:
def calculate_distances_multiple_metrics_optimized_occup(time_series, latitudes, longitudes, df_stations, arrondissements_gdf):
    """
    Calcule les distances et les métriques pour les stations les plus proches, les 2 plus proches, et les 3 plus proches,
    en utilisant des opérations optimisées avec NumPy et Pandas.
    
    :param time_series: Liste ou tableau des timestamps.
    :param latitudes: Liste des latitudes de la grille.
    :param longitudes: Liste des longitudes de la grille.
    :param df_stations: DataFrame contenant les données des stations (avec un 'timestamp').
    :param arrondissements_gdf: GeoDataFrame des arrondissements de Paris.
    :return: DataFrame contenant les distances et métriques pour chaque carré à chaque timestamp.
    """
    # Grille de points (centres des carrés)
    grid_lat = np.array([(latitudes[i] + latitudes[i + 1]) / 2 for i in range(len(latitudes) - 1)])
    grid_lon = np.array([(longitudes[j] + longitudes[j + 1]) / 2 for j in range(len(longitudes) - 1)])
    grid_centers = np.array(np.meshgrid(grid_lat, grid_lon)).T.reshape(-1, 2)  # Format (N, 2)
    
    # Filtrer les points de la grille en fonction de Paris
    grid_points = pd.DataFrame(grid_centers, columns=['lat', 'lon'])
    grid_points['geometry'] = grid_points.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
    grid_points = grid_points[grid_points['geometry'].apply(lambda p: arrondissements_gdf.geometry.union_all().contains(p))]
    valid_grid_centers = grid_points[['lat', 'lon']].values  # Seulement les centres valides

    results = []

    for timestamp in time_series:
        # Stations disponibles pour l'instant donné
        df_stations_at_t = df_stations[df_stations['timestamp'] == timestamp].copy()

        if df_stations_at_t.empty:
            continue

        # Construire un arbre k-d pour rechercher efficacement les distances
        station_coords = df_stations_at_t[['latitude', 'longitude']].values
        kdtree = cKDTree(station_coords)

        # Trouver les 3 plus proches stations pour chaque point de la grille
        distances, indices = kdtree.query(valid_grid_centers, k=3)

        # Calcul des métriques
        for i, (lat, lon) in enumerate(valid_grid_centers):

            closest_1 = haversine(lat, lon, station_coords[indices[i, 0], 0], station_coords[indices[i, 0], 1])
            closest_2 = haversine(lat, lon, station_coords[indices[i, 1], 0], station_coords[indices[i, 1], 1])
            closest_3 = haversine(lat, lon, station_coords[indices[i, 2], 0], station_coords[indices[i, 2], 1])


            taux_occup_1 = df_stations_at_t.iloc[indices[i, 0]]['taux_occup']
            taux_occup_2 = df_stations_at_t.iloc[indices[i, 1]]['taux_occup']
            taux_occup_3 = df_stations_at_t.iloc[indices[i, 2]]['taux_occup']

           

            results.append({
                'timestamp': timestamp,
                'center_lat': lat,
                'center_lon': lon,
                'distance_to_closest_1': closest_1,
                'taux_occup_closest_1': taux_occup_1,
                'distance_to_closest_2': closest_2,
                'taux_occup_closest_2': taux_occup_2,
                'distance_to_closest_3': closest_3,
                'taux_occup_closest_3': taux_occup_3
            })

    return pd.DataFrame(results)


In [53]:
time_series = df_real['timestamp'].unique()

result_df_occup = calculate_distances_multiple_metrics_optimized_occup(
    time_series=time_series,
    latitudes=latitudes,
    longitudes=longitudes,
    df_stations=df_real,
    arrondissements_gdf=arrondissements_gdf
)

In [54]:
result_df_occup.head(20)

Unnamed: 0,timestamp,center_lat,center_lon,distance_to_closest_1,taux_occup_closest_1,distance_to_closest_2,taux_occup_closest_2,distance_to_closest_3,taux_occup_closest_3
0,2020-01-22 00:00:00+00:00,48.8175,2.3325,0.829681,0.666667,0.89572,0.333333,1.278762,0.333333
1,2020-01-22 00:00:00+00:00,48.8175,2.3375,0.767356,0.666667,0.58303,0.333333,1.089418,0.333333
2,2020-01-22 00:00:00+00:00,48.8175,2.3425,0.381517,0.333333,0.870231,0.666667,1.003205,0.333333
3,2020-01-22 00:00:00+00:00,48.8175,2.3475,0.468196,0.333333,1.045944,0.333333,1.092598,0.666667
4,2020-01-22 00:00:00+00:00,48.8175,2.3575,0.979265,0.166667,1.081931,0.333333,1.758763,0.333333
5,2020-01-22 00:00:00+00:00,48.8175,2.3625,0.735509,0.166667,1.712055,0.333333,1.431185,0.333333
6,2020-01-22 00:00:00+00:00,48.8175,2.3675,0.625296,0.166667,1.742692,0.333333,1.787184,0.333333
7,2020-01-22 00:00:00+00:00,48.8225,2.3175,1.012358,0.666667,1.114363,0.333333,1.615456,0.25
8,2020-01-22 00:00:00+00:00,48.8225,2.3225,0.962238,0.333333,1.010981,0.666667,1.071818,0.666667
9,2020-01-22 00:00:00+00:00,48.8225,2.3275,0.936985,0.333333,0.716461,0.666667,1.134564,0.666667


In [13]:
# Fonction pour vérifier si les points sont dans le même carré
def is_within_square(lat1, lon1, lat2, lon2, square_size=0.005):
    """
    Vérifie si les points (lat2, lon2) se trouvent dans le carré centré sur (lat1, lon1)
    de taille square_size.
    """
    return abs(lat1 - lat2) <= square_size / 2 and abs(lon1 - lon2) <= square_size / 2

# Ajouter une nouvelle colonne 'mean_k' à result_df
result_df['mean_k'] = np.nan

# Calculer la moyenne des k pour chaque carré et chaque timestamp
for index, row in result_df.iterrows():
    # Filtrer les lignes de df_traffic correspondant au même timestamp
    df_traffic_filtered = df_traffic[df_traffic['t_1h'] == row['timestamp']]

    # Filtrer les points dans le même carré
    df_traffic_filtered = df_traffic_filtered[df_traffic_filtered.apply(
        lambda x: is_within_square(row['center_lat'], row['center_lon'], x['lat'], x['lon']), axis=1
    )]

    # Calculer la moyenne de k si des points existent dans le carré
    if not df_traffic_filtered.empty:
        result_df.at[index, 'mean_k'] = df_traffic_filtered['k'].mean()

# Remplacer les NaN par 0 dans la colonne 'mean_k'
result_df['mean_k'] = result_df['mean_k'].fillna(0)

# Afficher le résultat final
print(result_df)


KeyboardInterrupt: 

In [49]:
result_df['mean_k'] = result_df['mean_k'].fillna(0)
result_df.head(2)

KeyError: 'mean_k'

On définit un taux d'occupation moyen par carré, selon le nombre de stations voisines comptabilisés. 

Le taux d'occupation moyen $occup_{moy,p}$ d'un carré à l'instant $t$ en comptant ses $p$ stations les plus proches est 
 $$ occup_{moy,p}(t)= \frac{1}{p} \sum_{i=1}^p occup_i(t)  \frac{distance_i}{l_{carré}} $$
où $distance_i$ est la distance du centre du carré à la $i$ème station la plus proche et $l_{carré}$ est la demie-largeur d'un carré. On calcule $l_{carré}$ ci-dessous.

In [55]:
l_carre = haversine (48.8225,2.3725,48.8225,2.375)
print('La demie-largeur d\'un carré est égale à environ {} m.'.format(int(l_carre*1000)))


La demie-largeur d'un carré est égale à environ 183 m.


In [56]:
result_df_occup['occup_moy_1']=result_df_occup['taux_occup_closest_1']*(result_df_occup['distance_to_closest_1']/l_carre)	

result_df_occup['occup_moy_2']=(1/2)*(result_df_occup['taux_occup_closest_1']*(result_df_occup['distance_to_closest_1']/l_carre) 
+ result_df_occup['taux_occup_closest_2']*(result_df_occup['distance_to_closest_2']/l_carre))	

result_df_occup['occup_moy_3']=(1/3)*(result_df_occup['taux_occup_closest_1']*(result_df_occup['distance_to_closest_1']/l_carre) 
+ result_df_occup['taux_occup_closest_2']*(result_df_occup['distance_to_closest_2']/l_carre)	
+result_df_occup['taux_occup_closest_3']*(result_df_occup['distance_to_closest_3']/l_carre))

result_df_occup.head(5)

Unnamed: 0,timestamp,center_lat,center_lon,distance_to_closest_1,taux_occup_closest_1,distance_to_closest_2,taux_occup_closest_2,distance_to_closest_3,taux_occup_closest_3,occup_moy_1,occup_moy_2,occup_moy_3
0,2020-01-22 00:00:00+00:00,48.8175,2.3325,0.829681,0.666667,0.89572,0.333333,1.278762,0.333333,3.022103,2.326714,2.327455
1,2020-01-22 00:00:00+00:00,48.8175,2.3375,0.767356,0.666667,0.58303,0.333333,1.089418,0.333333,2.795084,1.928462,1.947006
2,2020-01-22 00:00:00+00:00,48.8175,2.3425,0.381517,0.333333,0.870231,0.666667,1.003205,0.333333,0.694834,1.932319,1.897239
3,2020-01-22 00:00:00+00:00,48.8175,2.3475,0.468196,0.333333,1.045944,0.333333,1.092598,0.666667,0.852698,1.378808,2.245796
4,2020-01-22 00:00:00+00:00,48.8175,2.3575,0.979265,0.166667,1.081931,0.333333,1.758763,0.333333,0.89174,1.4311,2.021778


In [58]:
#Importation des données de trafic pour la semaine 22 janvier - 29 janvier 
df_trafic = pd.read_csv('moyennes_par_carre_et_t1h2229.csv')
df_trafic.rename(columns={'t_1h': 'timestamp'}, inplace=True)
df_trafic['timestamp'] = pd.to_datetime(df_trafic['timestamp'])

if df_trafic['timestamp'].dt.tz is None:
    df_trafic['timestamp'] = df_trafic['timestamp'].dt.tz_localize('UTC')

df_trafic.head(20)


Unnamed: 0,timestamp,carre_id,moyenne_k,somme_k,count_k,lati,long
0,2020-01-22 01:00:00+00:00,0_16,0.325,0.65,2,48.8125,2.3375
1,2020-01-22 01:00:00+00:00,0_20,0.0,0.0,2,48.8125,2.3575
2,2020-01-22 01:00:00+00:00,10_1,1.478148,8.86889,6,48.8625,2.2625
3,2020-01-22 01:00:00+00:00,10_10,1.640304,18.04334,11,48.8625,2.3075
4,2020-01-22 01:00:00+00:00,10_11,2.367567,49.7189,21,48.8625,2.3125
5,2020-01-22 01:00:00+00:00,10_12,2.206557,33.09835,15,48.8625,2.3175
6,2020-01-22 01:00:00+00:00,10_13,2.294744,29.83167,13,48.8625,2.3225
7,2020-01-22 01:00:00+00:00,10_14,1.412666,7.06333,5,48.8625,2.3275
8,2020-01-22 01:00:00+00:00,10_15,2.18628,21.8628,10,48.8625,2.3325
9,2020-01-22 01:00:00+00:00,10_16,1.849908,11.09945,6,48.8625,2.3375


In [61]:
result_df_occup['timestamp'] = pd.to_datetime(result_df_occup['timestamp'], errors='coerce')
result_df_occup['timestamp']=result_df_occup['timestamp']+ pd.Timedelta(hours=1)

df_joined = pd.merge(df_trafic, result_df_occup, left_on=['lati', 'long','timestamp'], right_on=['center_lat', 'center_lon','timestamp'], how='inner')
df_final=df_joined[['timestamp','carre_id','lati','long','moyenne_k','occup_moy_1','occup_moy_2','occup_moy_3']]
df_final.head(20)

Unnamed: 0,timestamp,carre_id,lati,long,moyenne_k,occup_moy_1,occup_moy_2,occup_moy_3
0,2020-01-22 02:00:00+00:00,10_10,48.8625,2.3075,0.713635,1.899358,2.172283,2.166594
1,2020-01-22 02:00:00+00:00,10_12,48.8625,2.3175,0.958076,1.331156,1.693438,1.711232
2,2020-01-22 02:00:00+00:00,10_13,48.8625,2.3225,1.130856,1.294264,1.764377,1.969675
3,2020-01-22 02:00:00+00:00,10_14,48.8625,2.3275,0.950666,1.089232,1.904581,2.711427
4,2020-01-22 02:00:00+00:00,10_15,48.8625,2.3325,1.148279,1.259086,1.866099,1.341779
5,2020-01-22 02:00:00+00:00,10_18,48.8625,2.3475,3.56861,0.0,0.132873,0.37679
6,2020-01-22 02:00:00+00:00,10_19,48.8625,2.3525,2.70889,0.0,1.399873,0.933249
7,2020-01-22 02:00:00+00:00,10_20,48.8625,2.3575,0.0,0.0,1.45649,0.970994
8,2020-01-22 02:00:00+00:00,10_23,48.8625,2.3725,0.995742,0.0,1.217158,0.811439
9,2020-01-22 02:00:00+00:00,10_24,48.8625,2.3775,1.368693,1.570103,0.785052,2.998575


In [62]:
df_final.to_csv('k_et_occup_carre.csv')

In [87]:
#result_df.to_csv("test2.csv")


In [None]:

# Convertir le DataFrame en GeoDataFrame avec une colonne 'geometry'
df_stations['geometry'] = df_stations.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)

# Créer un GeoDataFrame
gdf_stations = gpd.GeoDataFrame(df_stations, geometry='geometry')

# Créer un index spatial avec 'rtree' pour accélérer la recherche
gdf_stations = gdf_stations.set_index('geometry')

# Appliquer un index spatial pour rechercher les stations dans un carré
def filter_stations_in_square(gdf_stations, lat_min, lat_max, lon_min, lon_max):
    # Créer un polygone représentant les limites du carré
   
    square_polygon = box(lon_min, lat_min, lon_max, lat_max)
    
    # Rechercher les stations dans le carré en utilisant l'index spatial
    return gdf_stations[gdf_stations.geometry.within(square_polygon)]

# Exemple d'utilisation pour un carré
lat_min = 48.85
lat_max = 48.86
lon_min = 2.35
lon_max = 2.36

stations_in_square = filter_stations_in_square(gdf_stations, lat_min, lat_max, lon_min, lon_max)
print(stations_in_square)
