In [3]:
import pandas as pd
import gtfs_kit as gk
import geopandas as gpd
import pyproj as pj
import matplotlib as mpl
import matplotlib.pyplot as plot
import folium
from folium import plugins

### Functions definition

In [5]:
def route_and_stop_visualization:
    ### We create a map with a view on the zone we are interested in
    map = folium.Map(location=[43.9941, 10.2301], tiles="OpenStreetMap", zoom_start=9)
    
    ### We Circle The Stops belonging to the route
    for point in stops.geometry:
        folium.CircleMarker((point.xy[1][0],point.xy[0][0]), color="blue", weight=0.5, opacity=1).add_to(map)
    
    ### The lists of shapes have duplicates because each shape describe the trip for both direction
    ### so for drawing purposes we want to select only one of the way. Then we draw the shapes
    shape_id_list_duplicates = shapes["shape_id"].to_list()
    shape_id_list = list(set(shape_id_list_duplicates))
    
    for shape_id in shape_id_list:
        shape_df_list = []
        shape_to_draw = shapes.loc[(shapes['shape_id'] == shape_id)]
        shape_to_draw.drop_duplicates(subset='shape_pt_sequence', keep="first",inplace=True)
        for point in shape_to_draw.geometry:
            shape_df_list.append((point.xy[1][0],point.xy[0][0]))
        folium.PolyLine(shape_df_list, color="red", weight=1.5, opacity=1).add_to(map)
    
    # We show the map
    map

SyntaxError: invalid syntax (603879272.py, line 1)

In [6]:
# Load the gtfs we generated with the various csv file created in the preprocessing
feed = gk.feed.read_feed('datasets/pisa_lucca_aulla_df.gtfs',dist_units="km")
routes = gk.routes.get_routes(feed)
stop_times = feed.get_stop_times()
trips = feed.get_trips()
stops = feed.get_stops()
shapes = feed.shapes
calendar_dates = feed.calendar_dates
calendar = feed.calendar

In [7]:
stops

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,stop_code
0,S06404_1,Lucca,43.837397,10.506153,S06404
1,S06402_1,Ripafratta,43.821737,10.415844,S06402
2,S06400_1,S.Giuliano Terme,43.764855,10.436916,S06400
3,S06501_1,Pisa S.Rossore,43.723446,10.387754,S06501
4,S06500_1,Pisa C.Le,43.708231,10.398389,S06500
5,S06401_1,Rigoli,43.789761,10.41981,S06401
6,S06502_1,Navacchio,43.686018,10.487235,S06502
7,S06504_1,S.Frediano A S.,43.680915,10.516213,S06504
8,S06505_1,Cascina,43.674657,10.545433,S06505
9,S06506_1,Pontedera C.T.,43.662342,10.628906,S06506


### Initial Exploration

In [None]:
### Checking dimensions
print(f" Routes Dimension: {routes.shape}")
print('-' * 50)
print(f" Trips Dimension: {trips.shape}")
print('-' * 50)
print(f" Stops Dimension: {stops.shape}")
print('-' * 50)
print(f" Stop Times Dimension: {stop_times.shape}")
print('-' * 50)
print(f" Shapes Dimension: {shapes.shape}")
print('-' * 50)
print(f" Calenda Dates Dimension: {calendar_dates.shape}")
print('-' * 50)
print(f" Calend Dimension: {calendar.shape}")

In [None]:
### Look at the head of each table
print("Route Head of Dataframe")
print(routes.head())
print('-' * 50)
print("Trips Head of Dataframe")
print(trips.head())
print('-' * 50)
print("Stops Head of Dataframe")
print(stops.head())
print('-' * 50)
print("Stop Times Head of Dataframe")
print(stop_times.head())
print('-' * 50)
print("Shape Head of Dataframe")
print(shapes.head())
print('-' * 50)
print("Calendar Dates Head of Dataframe")
print(calendar_dates.head())
print('-' * 50)
print("Calendar Head of Dataframe")
print(calendar.head())

In [None]:
### Our dataframe contains geographical coordinates and we convert them 
### in GeoDataFrame for better visualization
shapes = gpd.GeoDataFrame(shapes, 
        geometry=gpd.points_from_xy(shapes.shape_pt_lon, shapes.shape_pt_lat)).set_crs(epsg=4326)

stops = gpd.GeoDataFrame(stops, 
        geometry=gpd.points_from_xy(stops.stop_lon, stops.stop_lat)).set_crs(epsg=4326)

In [None]:
### We plot the shape which will result as the route
rt_df = pd.merge(routes, trips, on=['route_id','route_id']).reset_index(drop=True)
rts_df = pd.merge(rt_df, shapes, on=['shape_id','shape_id']).reset_index(drop=True)
rts_df.plot.scatter(x="shape_pt_lon",y="shape_pt_lat")

In [None]:
### Previuos code show the route but knowing the route it was not correct so we check if 
### the data is not clear
map = folium.Map(location=[43.9941, 10.2301], tiles="OpenStreetMap", zoom_start=9)
map

In [None]:
shape_df_list = []
for point in shapes.geometry:
    shape_df_list.append((point.xy[1][0],point.xy[0][0]))
folium.PolyLine(shape_df_list, color="red", weight=0.5, opacity=1).add_to(map)
map

In [None]:
### There were inpurities and duplicates that cause problems in the dataset
### we also check if there are stops not in the route
for point in stops.geometry:
    folium.CircleMarker((point.xy[1][0],point.xy[0][0]), color="blue", weight=0.5, opacity=1).add_to(map)
map    

## Data cleaning

In [None]:
### We have to deal with stops not inside the predefined route Pisa-Lucca-Aulla
### and with duplicates in sequence of the shapes that will cause the problems of straight lines
### on the map

In [None]:
stops

In [None]:
stops_id_to_delete = ["S06725_1","S06506_1","S06505_1","S06504_1","S06502_1","S06351_1","S06350_1","S06040_1"]
for stops_to_delete in stops_id_to_delete:
    stops = stops[stops.stop_id != stops_to_delete]

In [None]:
trip_ids_to_delete = []
indexes_stops_to_delete = []
for ind in stop_times.index:
    for stops_to_delete in stops_id_to_delete:
        if stop_times["stop_id"][ind] == stops_to_delete:
            trip_ids_to_delete.append(stop_times["trip_id"][ind])
            indexes_stops_to_delete.append(ind)

### For safety reasons we delete rows from the dataframe after the complete for sequence
for ind_to_delete in indexes_stops_to_delete:
    stop_times = stop_times[stop_times.index != ind_to_delete]

In [None]:
shape_ids_to_delete = []

for ind in trips.index:
    for trip_to_delete in trip_ids_to_delete:
        if trips["trip_id"][ind] == trip_to_delete:
            shape_ids_to_delete.append(trips["shape_id"][ind])

### For safety reasons we delete rows from the dataframe after the complete for sequence
for trip_to_delete in trip_ids_to_delete:
    trips = trips[trips.trip_id != trip_to_delete]
    
## We do deletion for shapes too
for shape_to_delete in shape_ids_to_delete:
    shapes = shapes[shapes.shape_id != shape_to_delete]

In [None]:
### Previuos code show the route but knowing the route it was not correct so we check if 
### the data is not clear
map = folium.Map(location=[43.9941, 10.2301], tiles="OpenStreetMap", zoom_start=9)
map



In [None]:
shape_df_list = []
for point in shapes.geometry:
    shape_df_list.append((point.xy[1][0],point.xy[0][0]))
folium.PolyLine(shape_df_list, color="red", weight=0.5, opacity=1).add_to(map)
map

In [None]:
### There were inpurities and duplicates that cause problems in the dataset
### we also check if there are stops not in the route
for point in stops.geometry:
    folium.CircleMarker((point.xy[1][0],point.xy[0][0]), color="blue", weight=0.5, opacity=1).add_to(map)
map    

In [None]:
### We plot the shape which will result as the route
rt_df = pd.merge(routes, trips, on=['route_id','route_id']).reset_index(drop=True)
rts_df = pd.merge(rt_df, shapes, on=['shape_id','shape_id']).reset_index(drop=True)
rts_df.plot.scatter(x="shape_pt_lon",y="shape_pt_lat")

In [None]:


### Previuos code show the route but knowing the route it was not correct so we check if 
### the data is not clear
map = folium.Map(location=[43.9941, 10.2301], tiles="OpenStreetMap", zoom_start=9)

### There were inpurities and duplicates that cause problems in the dataset
### we also check if there are stops not in the route
for point in stops.geometry:
    folium.CircleMarker((point.xy[1][0],point.xy[0][0]), color="blue", weight=0.5, opacity=1).add_to(map) 

In [None]:
for shape_id in shape_id_list:
    shape_df_list = []
    shape_to_draw = shapes.loc[(shapes['shape_id'] == shape_id)]
    shape_to_draw.drop_duplicates(subset='shape_pt_sequence', keep="first",inplace=True)
    for point in shape_to_draw.geometry:
        shape_df_list.append((point.xy[1][0],point.xy[0][0]))
    folium.PolyLine(shape_df_list, color="red", weight=1.5, opacity=1).add_to(map)

In [None]:
map

In [None]:
def route_and_stop_visualization:
    ### We create a map with a view on the zone we are interested in
    map = folium.Map(location=[43.9941, 10.2301], tiles="OpenStreetMap", zoom_start=9)
    
    ### We Circle The Stops belonging to the route
    for point in stops.geometry:
        folium.CircleMarker((point.xy[1][0],point.xy[0][0]), color="blue", weight=0.5, opacity=1).add_to(map)
    
    ### The lists of shapes have duplicates because each shape describe the trip for both direction
    ### so for drawing purposes we want to select only one of the way. Then we draw the shapes
    shape_id_list_duplicates = shapes["shape_id"].to_list()
    shape_id_list = list(set(shape_id_list_duplicates))
    
    for shape_id in shape_id_list:
        shape_df_list = []
        shape_to_draw = shapes.loc[(shapes['shape_id'] == shape_id)]
        shape_to_draw.drop_duplicates(subset='shape_pt_sequence', keep="first",inplace=True)
        for point in shape_to_draw.geometry:
            shape_df_list.append((point.xy[1][0],point.xy[0][0]))
        folium.PolyLine(shape_df_list, color="red", weight=1.5, opacity=1).add_to(map)
    
    # We show the map
    map