<a href="https://colab.research.google.com/github/cedric-bit/Geospatial-Data-Science-2023/blob/main/loadDATA509.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install osmium
!pip install geopandas
!pip install shapely
!pip install matplotlib




In [None]:
!pip install gtfs-realtime-bindings


In [None]:
!pip install osmnx

!pip install osmread






In [None]:
!pip install --upgrade protobuf



In [None]:
from google.colab import drive
drive.mount('/content/drive')
import os
from datetime import datetime
import osmnx as ox
from google.transit import gtfs_realtime_pb2
from google.protobuf import text_format
import pandas as pd
import os
import zipfile
import osmium
import datetime
import matplotlib.pyplot as plt
import folium
import geopandas as gpd
from shapely.geometry import Point
import matplotlib.pyplot as plt



# Chemin d'accès au dossier contenant les fichiers GTFS-RT et GTFS
shortcut_path = '/content/drive/MyDrive/Geospatial Data Science 2023'


def read_gtfs_static_file(file_path):
    #print(f"Ouverture du fichier {file_path}")
    with zipfile.ZipFile(file_path, 'r') as z:
        stops_df = pd.read_csv(z.open('stops.txt'))
        stop_times_df = pd.read_csv(z.open('stop_times.txt'))
        trips_df = pd.read_csv(z.open('trips.txt'))
        routes_df = pd.read_csv(z.open('routes.txt'))
    
    return stops_df, stop_times_df, trips_df, routes_df

def extract_trip_updates(feed_realtime):
    trip_updates = []

    for entity in feed_realtime.entity:
        if entity.HasField('trip_update'):
            trip_update = entity.trip_update
            trip_id = trip_update.trip.trip_id
            route_id = trip_update.trip.route_id
            for stop_time_update in trip_update.stop_time_update:
                stop_id = stop_time_update.stop_id
                arrival_time = stop_time_update.arrival.time
                departure_time = stop_time_update.departure.time
                delay = stop_time_update.arrival.delay

                trip_updates.append({
                    'trip_id': trip_id,
                    'route_id': route_id,
                    'stop_id': stop_id,
                    'arrival_time': arrival_time,
                    'departure_time': departure_time,
                    'delay': delay
                })

    return trip_updates    

def process_gtfsrt_file(file_path):
    feed = gtfs_realtime_pb2.FeedMessage()
    
    try:
        with open(file_path, 'rb') as f:
            content = f.read()
            feed.ParseFromString(content)
    except Exception as e:
        #print(f"Erreur lors du traitement du fichier {file_path}: {e}")
        return None

    return feed

# Liste de tous les fichiers GTFS et GTFS-realtime
gtfs_files = []
gtfsrt_files = []

# Parcourez tous les sous-dossiers
for subdir, dirs, files in os.walk(shortcut_path):
    for file in files:
        # Vérifiez si le fichier est un fichier GTFS zip
        if file.endswith('.zip'):
            gtfs_file_path = os.path.join(subdir, file)
            gtfs_files.append(gtfs_file_path)
        # Vérifiez si le fichier est un fichier GTFS-realtime
        elif file.endswith('.gtfsrt'):
            gtfsrt_file_path = os.path.join(subdir, file)
            gtfsrt_files.append(gtfsrt_file_path)




def merge_trip_updates_and_schedule(trip_updates, stop_times_df, trips_df):
    trip_updates_with_schedule = []

    for update in trip_updates:
        trip_id = update['trip_id']
        stop_id = update['stop_id']

        stop_time = stop_times_df.loc[(stop_times_df['trip_id'] == trip_id) & (stop_times_df['stop_id'] == stop_id), 'arrival_time'].values
        if len(stop_time) > 0:
            scheduled_arrival_time = stop_time[0]

            trip_updates_with_schedule.append({
                'trip_id': trip_id,
                'route_id': update['route_id'],
                'stop_id': stop_id,
                'scheduled_arrival_time': scheduled_arrival_time,
                'actual_arrival_time': update['arrival_time'],
                'delay': update['delay']
            })

    return trip_updates_with_schedule

def estimate_train_positions(trip_updates_with_schedule, stop_times_df, stops_df):
    train_positions = []

    for update in trip_updates_with_schedule:
        trip_id = update['trip_id']
        stop_id = update['stop_id']
        scheduled_arrival_time = update['scheduled_arrival_time']
        actual_arrival_time = update['actual_arrival_time']
        delay = update['delay']

        # Convertir les horaires en objets datetime pour les manipuler plus facilement
        scheduled_arrival_dt = datetime.datetime.strptime(scheduled_arrival_time, '%H:%M:%S')
        actual_arrival_dt = scheduled_arrival_dt + datetime.timedelta(seconds=delay)

        # Trouver les arrêts précédent et suivant dans l'horaire pour ce trip_id
        stop_times_trip = stop_times_df[stop_times_df['trip_id'] == trip_id]
        previous_stop_time = stop_times_trip[stop_times_trip['arrival_time'] < scheduled_arrival_time].iloc[-1]
        next_stop_time = stop_times_trip[stop_times_trip['arrival_time'] > scheduled_arrival_time].iloc[0]

        # Obtenir les coordonnées des arrêts précédent et suivant
        previous_stop_coordinates = stops_df.loc[stops_df['stop_id'] == previous_stop_time['stop_id'], ['stop_lat', 'stop_lon']].values[0]
        next_stop_coordinates = stops_df.loc[stops_df['stop_id'] == next_stop_time['stop_id'], ['stop_lat', 'stop_lon']].values[0]

        # Calculer le pourcentage de progression entre les arrêts précédent et suivant
        previous_to_next_duration = (next_stop_time['arrival_time'] - previous_stop_time['arrival_time']).seconds
        progress = (actual_arrival_dt - previous_stop_time['arrival_time']).seconds / previous_to_next_duration

        # Estimer les coordonnées actuelles du train en fonction de la progression
        estimated_lat = previous_stop_coordinates[0] + (next_stop_coordinates[0] - previous_stop_coordinates[0]) * progress
        estimated_lon = previous_stop_coordinates[1] + (next_stop_coordinates[1] - previous_stop_coordinates[1]) * progress

        train_positions.append({
            'trip_id': trip_id,
            'route_id': update['route_id'],
            'stop_id': stop_id,
            'latitude': estimated_lat,
            'longitude': estimated_lon
        })

    return train_positions

# Utilisez les fonctions pour extraire les informations
for gtfs_file in gtfs_files:
    stops_df, stop_times_df, trips_df, routes_df = read_gtfs_static_file(gtfs_file)
    # Utilisez les DataFrames pour effectuer vos analyses et traitements

for gtfsrt_file in gtfsrt_files:
    feed_realtime = process_gtfsrt_file(gtfsrt_file)
    if feed_realtime is not None:
        trip_updates = extract_trip_updates(feed_realtime)
        trip_updates_with_schedule = merge_trip_updates_and_schedule(trip_updates, stop_times_df, trips_df)
        train_positions = estimate_train_positions(trip_updates_with_schedule, stop_times_df, stops_df)
            # train_positions_gdf = gpd.GeoDataFrame(train_positions,

# Chemin vers le fichier belgium-latest.osm.pbf
osm_file_path = '/content/drive/MyDrive/OSM/belgium-latest.osm.pbf'

# Fonction pour vérifier si un élément est une voie ferrée
def is_railway(element):
    return 'railway' in element.tags

def extract_railways(osm_file_path):
    # Liste pour stocker les rails extraits
    railways = []
    # Lire le fichier OSM et extraire les rails
    src = osmread.parse_file(osm_file_path)
    for entity in src:
        if (isinstance(entity, Way) or isinstance(entity, Relation)) and is_railway(entity):
            railways.append(entity)
    src.close()
    #print(f"Nombre de voies ferrées extraites: {len(railways)}")
    return railways

# Utiliser la fonction pour extraire les rails


def railways_to_geodataframe(railways, nodes):
    # Préparer les données pour le GeoDataFrame
    railway_data = []

    for railway in railways:
        if isinstance(railway, Way):
            coords = [nodes[node_id] for node_id in railway.nodes if node_id in nodes]
            if len(coords) >= 2:
                geometry = LineString(coords)
                railway_data.append({
                    'id': railway.id,
                    'tags': railway.tags,
                    'geometry': geometry
                })

    # Créer le GeoDataFrame
    railways_gdf = gpd.GeoDataFrame(railway_data, crs="EPSG:4326")
    return railways_gdf

def train_positions_to_geodataframe(train_positions):
    train_positions_df = pd.DataFrame(train_positions)
    print(train_positions_df.columns)
    train_positions_gdf = gpd.GeoDataFrame(
        train_positions_df,
        geometry=gpd.points_from_xy(train_positions_df.longitude, train_positions_df.latitude),
        crs="EPSG:4326"
    )
    return train_positions_gdf
# Charger les nœuds pour la conversion des coordonnées


def extract_nodes(osm_file_path):
    nodes = {}
    for entity in osmread.parse_file(osm_file_path):
        if isinstance(entity, osmread.Node):
            nodes[entity.id] = (entity.lon, entity.lat)
    return nodes


def stops_to_geodataframe(stops_df):
    stops_gdf = gpd.GeoDataFrame(
        stops_df,
        geometry=gpd.points_from_xy(stops_df.stop_lon, stops_df.stop_lat),
        crs="EPSG:4326"
    )
    return stops_gdf
# Convertir les rails en un GeoDataFrame
nodes = extract_nodes(osm_file_path)
railways = extract_railways(osm_file_path)
railways_gdf = railways_to_geodataframe(railways, nodes)
stops_gdf = stops_to_geodataframe(stops_df)
train_positions_gdf = train_positions_to_geodataframe(train_positions)


# Étape 6 : Tracez les rails, les arrêts et les positions des trains sur une carte
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(12, 12))
railways_gdf.plot(ax=ax, color='gray', linewidth=1)
stops_gdf.plot(ax=ax, color='red', markersize=10)
train_positions_gdf.plot(ax=ax, color='blue', markersize=20)
ax.set_title("Trains en mouvement sur les rails en Belgique")
plt.show()



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
