In [6]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from shapely.geometry import LineString
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


# Load data
shapes = pd.read_csv('gtfs/shapes.txt')
trips = pd.read_csv('gtfs/trips.txt')
routes = pd.read_csv('gtfs/routes.txt')
stop_times = pd.read_csv('gtfs/stop_times.txt')
stops = pd.read_csv('gtfs/stops.txt')

# Convert 24h+ times
def convert_time(t):
    hours, mins, secs = map(int, t.split(':'))
    return f"{hours%24:02d}:{mins:02d}:{secs:02d}"

stop_times['arrival_time'] = stop_times['arrival_time'].apply(convert_time)
stop_times['hour'] = pd.to_datetime(stop_times['arrival_time'], format='%H:%M:%S').dt.hour

# Create route geometry# Create route geometry
def create_line(group):
    group = group.sort_values('shape_pt_sequence')
    return LineString(zip(group['shape_pt_lon'], group['shape_pt_lat']))

route_geoms = shapes.groupby('shape_id').apply(create_line).reset_index(name='geometry')

# Merge with trip data
route_data = trips.merge(routes, on='route_id')\
                 .merge(route_geoms, on='shape_id')

# Group by sortable columns and aggregate trip counts
route_data = route_data.groupby(['shape_id', 'route_short_name', 'direction_id']).agg({
    'geometry': 'first',  # Keep the first geometry for each group
    'trip_id': 'count'    # Count trips
}).reset_index()

route_data.rename(columns={'trip_id': 'trip_count'}, inplace=True)





In [32]:
# from sklearn.preprocessing import MinMaxScaler
# import plotly.colors

# # Normalize trip counts for color mapping
# scaler = MinMaxScaler()
# route_data['trip_norm'] = scaler.fit_transform(route_data[['trip_count']])

# # Create color gradient from yellow to red
# colors = plotly.colors.sample_colorscale('YlOrRd', route_data['trip_norm'].tolist())

# # Create base figure
# fig = go.Figure()

# # Add route paths with color intensity
# for idx, row in route_data.iterrows():
#     coords = list(row['geometry'].coords)
#     fig.add_trace(go.Scattermapbox(
#         lon=[c[0] for c in coords],
#         lat=[c[1] for c in coords],
#         mode='lines',
#         line=dict(width=2, color=colors[idx]),
#         name=f"{row['route_short_name']}",
#         hoverinfo='text',
#         hovertext=f"""
#         Route: {row['route_short_name']}<br>
#         Direction: {row['direction_id']}<br>
#         Total Trips: {row['trip_count']}
#         """
#     ))

# # Add stops
# fig.add_trace(go.Scattermapbox(
#     lat=stops['stop_lat'],
#     lon=stops['stop_lon'],
#     mode='markers',
#     marker=dict(size=4, color='rgba(200, 200, 200, 0.6)'),
#     hovertext=stops['stop_name'],
#     name='Bus Stops'
# ))

# # Add color bar using dummy trace
# fig.add_trace(go.Scattermapbox(
#     lat=[None],
#     lon=[None],
#     mode='markers',
#     marker=dict(
#         colorscale='YlOrRd',
#         cmin=route_data['trip_count'].min(),
#         cmax=route_data['trip_count'].max(),
#         colorbar=dict(
#             title='Daily Trips',
#             thickness=20,
#             tickvals=[
#                 route_data['trip_count'].min(),
#                 route_data['trip_count'].max()
#             ],
#             ticktext=['Low Traffic', 'High Traffic']
#         ),
#         showscale=True
#     ),
#     hoverinfo='none'
# ))

# # Final layout configuration
# fig.update_layout(
#     mapbox_style="carto-darkmatter",
#     margin={"r":0,"t":0,"l":0,"b":0},
#     legend=dict(
#         title='Bus Routes',
#         yanchor="top",
#         y=0.99,
#         xanchor="left",
#         x=0.01
#     ),
#     mapbox=dict(
#         center=dict(lat=50.06, lon=19.94),
#         zoom=11
#     )
# )

# fig.show()

In [33]:
# # Base map with all routes
# fig = px.line_mapbox(lat=[50.06], lon=[19.94], zoom=11, height=800)
# fig.update_traces(line_width=0)  # Hide dummy trace

# # Add route paths with traffic intensity
# for idx, row in route_data.iterrows():
#     coords = list(row['geometry'].coords)
#     fig.add_trace(go.Scattermapbox(
#         lon=[c[0] for c in coords],
#         lat=[c[1] for c in coords],
#         mode='lines',
#         line=dict(width=2 + row['trip_count']/5, 
#                  color='red' if row['direction_id'] == 0 else 'blue'),
#         name=f"{row['route_short_name']} - {'Dir1' if row['direction_id'] else 'Dir0'}",
#         hoverinfo='text',
#         hovertext=f"Route {row['route_short_name']}<br>Trips: {row['trip_count']}"
#     ))

# # Add stops
# fig.add_trace(go.Scattermapbox(
#     lat=stops['stop_lat'],
#     lon=stops['stop_lon'],
#     mode='markers',
#     marker=dict(size=4, color='grey'),
#     hovertext=stops['stop_name'],
#     name='Stops'
# ))

# fig.update_layout(
#     mapbox_style="carto-darkmatter",
#     margin={"r":0,"t":0,"l":0,"b":0},
#     legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)
# )

# fig.show()

In [10]:
# Hourly traffic per route
hourly_traffic = stop_times.merge(trips, on='trip_id')\
                           .groupby(['shape_id', 'hour'])\
                           .size().reset_index(name='trips')

# Create animation frames
frames = []
for hour in sorted(hourly_traffic['hour'].unique()):
    frame_data = hourly_traffic[hourly_traffic['hour'] == hour]
    
    # Map traffic data to route_data and fill NaN with 0
    traffic_counts = route_data['shape_id'].map(frame_data.set_index('shape_id')['trips']).fillna(0)
    
    frame = go.Frame(data=[
        go.Scattermapbox(
            lon=[c[0] for c in geom.coords],
            lat=[c[1] for c in geom.coords],
            line=dict(width=2 + count / 5),  # Use traffic count for line width
            mode='lines'
        ) for geom, count in zip(route_data['geometry'], traffic_counts)
    ], name=str(hour))
    
    frames.append(frame)

# Create animated figure
fig = go.Figure(
    data=[],
    frames=frames,
    layout=go.Layout(
        mapbox=dict(
            style="carto-darkmatter",
            zoom=11,
            center=dict(lat=0, lon=0)  # Adjust center coordinates as needed
        ),
        updatemenus=[dict(
            type="buttons",
            buttons=[dict(label="Play",
                          method="animate",
                          args=[None])]
        )]
    )
)

fig.show()


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/


*scattermapbox* is deprecated! Use *sca

KeyboardInterrupt: 

In [7]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import plotly.graph_objects as go
import plotly.colors

# Initialize scaler for color mapping
scaler = MinMaxScaler()
scaler.fit(np.array([0, route_data['trip_count'].max()]).reshape(-1, 1))  # Fit scaler with range [0, max trips]

# Create base figure with initial data
fig = go.Figure(
    data=[
        go.Scattermapbox(
            lon=[c[0] for c in route_data.iloc[0]['geometry'].coords],
            lat=[c[1] for c in route_data.iloc[0]['geometry'].coords],
            mode='lines',
            line=dict(width=2, color='yellow'),
            name='Temp'
        )
    ],
    frames=[],
    layout=go.Layout(
        mapbox=dict(
            style="carto-darkmatter",
            zoom=11,
            center=dict(lat=50.06, lon=19.94)  # Kraków coordinates
        )
    )
)

# Create animation frames with proper color mapping
frames = []
for hour in sorted(hourly_traffic['hour'].unique()):
    frame_data = hourly_traffic[hourly_traffic['hour'] == hour]
    
    # Merge with route data and calculate colors
    merged = route_data.merge(frame_data, on='shape_id', how='left')
    merged['trips'] = merged['trips'].fillna(0).astype(float)  # Ensure numeric values
    merged['color_intensity'] = scaler.transform(merged[['trips']])
    colors = plotly.colors.sample_colorscale('YlOrRd', merged['color_intensity'].flatten())

    frame = go.Frame(
        data=[
            go.Scattermapbox(
                lon=[c[0] for c in row['geometry'].coords],
                lat=[c[1] for c in row['geometry'].coords],
                mode='lines',
                line=dict(width=2, color=colors[idx]),
                name=f"{row['route_short_name']}"
            ) for idx, row in merged.iterrows()
        ],
        name=str(hour)
    )
    frames.append(frame)

# Update figure with proper configuration
fig.frames = frames
fig.update_layout(
    margin={"r": 0, "t": 0, "l": 0, "b": 0},
    updatemenus=[dict(
        type="buttons",
        buttons=[dict(label="▶️ Play",
                      method="animate",
                      args=[None, {"frame": {"duration": 500}}])]
    )],
    showlegend=False
)

# Add color bar
fig.add_trace(go.Scattermapbox(
    lat=[None],
    lon=[None],
    mode='markers',
    marker=dict(
        colorscale='YlOrRd',
        cmin=0,
        cmax=route_data['trip_count'].max(),
        colorbar=dict(title='Hourly Trips'),
    ),
    showscale=True
))

fig.show()


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



NameError: name 'hourly_traffic' is not defined

In [8]:
from dash import Dash, html, Input, Output, callback, dcc
import dash_leaflet as dl

app = Dash(__name__)

app.layout = html.Div([
    dl.Map(children=[
        dl.TileLayer(),
        dl.LayerGroup(id="route-layer")
    ], style={'width': '100%', 'height': '90vh'}),
    html.Div([
        html.Label("Disable Routes:"),
        dcc.Dropdown(
            id='route-selector',
            options=[{'label': f"Route {r}", 'value': r} 
                    for r in routes['route_short_name'].unique()],
            multi=True
        )
    ])
])

@callback(
    Output('route-layer', 'children'),
    Input('route-selector', 'value')
)
def update_routes(disabled_routes):
    active_routes = route_data[~route_data['route_short_name'].isin(disabled_routes)]
    
    return [
        dl.Polyline(
            positions=[(lat, lon) for lon, lat in route.coords],
            color='red' if dir_id == 0 else 'blue',
            weight=2 + count/5
        ) for route, dir_id, count in zip(
            active_routes['geometry'],
            active_routes['direction_id'],
            active_routes['trip_count']
        )
    ]

app.run(mode='inline')

[2025-04-06 13:48:50,237] ERROR in app: Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "/Users/howyoudoing/anaconda3/envs/datascience/lib/python3.10/site-packages/flask/app.py", line 1473, in wsgi_app
    response = self.full_dispatch_request()
  File "/Users/howyoudoing/anaconda3/envs/datascience/lib/python3.10/site-packages/flask/app.py", line 882, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/Users/howyoudoing/anaconda3/envs/datascience/lib/python3.10/site-packages/flask/app.py", line 880, in full_dispatch_request
    rv = self.dispatch_request()
  File "/Users/howyoudoing/anaconda3/envs/datascience/lib/python3.10/site-packages/flask/app.py", line 865, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**view_args)  # type: ignore[no-any-return]
  File "/Users/howyoudoing/anaconda3/envs/datascience/lib/python3.10/site-packages/dash/dash.py", line 1405, in dispatch
    ctx.run(
  File "

In [9]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# Prepare data
hourly_features = hourly_traffic.merge(
    routes[['route_id', 'route_type']], left_on='shape_id', right_on='route_id'
)
X = hourly_features[['hour', 'route_type']]
y = hourly_features['trips']

# Train model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = RandomForestRegressor(n_estimators=100)
model.fit(X_train, y_train)

# Predict function
def predict_traffic(route_type, hour):
    return model.predict([[hour, route_type]])[0]

NameError: name 'hourly_traffic' is not defined

1. Graph Construction with Centrality Calculation


In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import networkx as nx

# Create a graph from stop connections
G = nx.DiGraph()
for _, row in stop_times.iterrows():
    G.add_edge(row['stop_id'], row['next_stop_id'])

# Calculate eigenvector centrality
centrality = nx.eigenvector_centrality_numpy(G)

# Add centrality to stops DataFrame
stops['centrality'] = stops['stop_id'].map(centrality)

# Ensure centrality values are normalized (optional)
stops['centrality'] = stops['centrality'] / stops['centrality'].max()
# Create base map
fig = make_subplots(rows=1, cols=1, specs=[[{'type': 'scattermapbox'}]])
fig.update_layout(mapbox_style="carto-darkmatter", height=800, margin={"r":0,"t":0,"l":0,"b":0})

# Add route geometries from shapes.txt
for shape_id, group in shapes.groupby('shape_id'):
    fig.add_trace(go.Scattermapbox(
        lon=group['shape_pt_lon'],
        lat=group['shape_pt_lat'],
        mode='lines',
        line=dict(width=1, color='rgba(100, 100, 100, 0.4)'),
        hoverinfo='none',
        showlegend=False
    ))

# Add bus stops with centrality coloring
fig.add_trace(go.Scattermapbox(
    lon=stops['stop_lon'],
    lat=stops['stop_lat'],
    mode='markers',
    marker=dict(
        size=8,
        color=stops['centrality'],
        colorscale='YlOrRd',
        showscale=True,
        colorbar=dict(title='Eigenvector Centrality')
    ),
    text=stops.apply(lambda x: f"{x['stop_name']}<br>Centrality: {x['centrality']:.4f}", axis=1),
    hoverinfo='text',
    name='Bus Stops'
))

# Add animated buses (simulated)
# Add animated buses (simulated)
bus_positions = trips.merge(stop_times, on='trip_id')\
                        .merge(stops, on='stop_id')  # Merge with stops_df to get stop_lon and stop_lat
bus_positions = bus_positions.sample(50)  # Randomly sample 50 bus positions

fig.add_trace(go.Scattermapbox(
    lon=bus_positions['stop_lon'],
    lat=bus_positions['stop_lat'],
    mode='markers+text',
    marker=dict(size=12, color='#00FF00', symbol='bus'),
    text='🚌',
    textposition='middle center',
    name='Active Buses'
))
fig.add_trace(go.Scattermapbox(
    lon=bus_positions['stop_lon'],
    lat=bus_positions['stop_lat'],
    mode='markers+text',
    marker=dict(size=12, color='#00FF00', symbol='bus'),
    text='🚌',
    textposition='middle center',
    name='Active Buses'
))

fig.update_layout(
    mapbox=dict(
        zoom=11,
        center=dict(lat=50.06, lon=19.94)
    )
)

fig.show()

KeyError: 'next_stop_id'

In [None]:
# Top 10 central nodes
top_nodes = stops.nlargest(10, 'centrality')[['stop_id', 'stop_name', 'centrality']]

# Route betweenness analysis
edge_betweenness = nx.edge_betweenness_centrality(G, weight='weight')
busiest_routes = pd.Series(edge_betweenness).nlargest(5)

print("Most Important Stops:")
print(top_nodes)
print("\nBusiest Route Segments:")
print(busiest_routes)

Most Important Stops:
               stop_id             stop_name  centrality
647     stop_432_61003      Rondo Matecznego    0.379496
661     stop_439_62001          Kamieńskiego    0.357972
2320  stop_2108_333803    Rondo Grunwaldzkie    0.314712
1714  stop_1200_280801              Ludwinów    0.262912
659     stop_438_61901  Kamieńskiego Wiadukt    0.255800
363     stop_240_32601          Konopnickiej    0.236389
1248   stop_892_125501               Bonarka    0.222272
681     stop_447_63009           Bieżanowska    0.221429
670     stop_444_62601                Makowa    0.184729
358     stop_237_31904               Jubilat    0.169533

Busiest Route Segments:
stop_118_15101    stop_3160_384002    0.141113
stop_2260_344101  stop_1362_303804    0.138197
stop_1215_286101  stop_1355_301202    0.132401
stop_1355_301202  stop_126_16101      0.130871
stop_126_16101    stop_118_15101      0.130866
dtype: float64
