In [13]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from keplergl import KeplerGl
from scipy.interpolate import griddata
import pandas as pd
from keplergl import KeplerGl

In [14]:
# Load the data
train_ride_df = pd.read_csv('./DBtrainrides_final_result.csv')
train_ride_df.head()

Unnamed: 0,ID_Base,ID_Timestamp,stop_number,IBNR,long,lat,arrival_plan,departure_plan,arrival_delay_m,transformed_info_message,prev_arrival_delay_m,prev_departure_delay_m,weighted_avg_prev_delay,max_station_number,station_progress
0,-1001326572688500578,2407082041,2,8011118.0,13.375988,52.509379,2024-07-08 20:44:00,2024-07-08 20:45:00,0.0,No message,0.0,0.0,0.0,7,0.285714
1,-1001326572688500578,2407082041,3,8011160.0,9.095851,48.849792,,,,No message,0.0,0.0,0.0,7,0.428571
2,-1001326572688500578,2407082041,4,8011167.0,13.299437,52.530276,2024-07-08 20:55:00,2024-07-08 20:56:00,0.0,No message,0.0,0.0,0.0,7,0.571429
3,-1001326572688500578,2407082041,5,8010404.0,13.196898,52.534648,2024-07-08 21:00:00,2024-07-08 21:03:00,2.0,No message,0.0,0.0,0.0,7,0.714286
4,-1001326572688500578,2407082041,6,8080040.0,13.128917,52.549396,2024-07-08 21:06:00,2024-07-08 21:07:00,1.0,No message,2.0,0.0,0.666667,7,0.857143


In [15]:
# Drop rows with NaN values in the relevant columns
filtered_df = train_ride_df.dropna(subset=['long', 'lat', 'arrival_delay_m'])

# Select only the necessary columns
filtered_df = filtered_df[['long', 'lat', 'arrival_delay_m']]

# Filter rows where arrival_delay_m is not zero
filtered_df = filtered_df[filtered_df['arrival_delay_m'] != 0]
filtered_df['arrival_delay_m'] = filtered_df['arrival_delay_m'].replace(0, 0.1)

# Save filtered data to a CSV for easier import into Kepler
filtered_df.to_csv('./filtered_data.csv', index=False)

In [16]:
# Extract x, y, and z columns
x = filtered_df['long']
y = filtered_df['lat']
z = filtered_df['arrival_delay_m']

# Create a grid for contour plot
x_grid, y_grid = np.meshgrid(
    np.linspace(x.min(), x.max(), 300),  # Higher resolution grid
    np.linspace(y.min(), y.max(), 300)
)

# Interpolate z values on the grid
z_grid = griddata((x, y), z, (x_grid, y_grid), method='cubic')

# Clip the z values for better readability
z_min, z_max = 0, 60  # Define the range for arrival delays
z_grid = np.clip(z_grid, z_min, z_max)

# Create the contour plot using Plotly
fig = go.Figure()

fig.add_trace(go.Contour(
    z=z_grid,
    x=x_grid[0],
    y=y_grid[:, 0],
    colorscale='Turbo',  # Improved color scale
    colorbar=dict(title="Arrival Delay (minutes)", tickfont=dict(size=12)),
    contours=dict(
        start=z_min,
        end=z_max,
        size=5  # Contour interval
    ),
    line_smoothing=1.0  # Smooth contour lines
))

# Enhance layout and labels
fig.update_layout(
    title=dict(
        text='Arrival Delays Overlayed on Map',
        font=dict(size=20, family='Arial Bold'),  # Use a bold font
        x=0.5
    ),
    xaxis=dict(title='Longitude', titlefont=dict(size=16)),
    yaxis=dict(title='Latitude', titlefont=dict(size=16)),
    template='plotly_white',  # Improve readability with a light background
)

# Show the plot
fig.show()

In [17]:
# Initialize Kepler map
map_ = KeplerGl(height=800)

# Add the raw filtered data
map_.add_data(data=filtered_df, name='Raw Arrival Delays')

# Save to HTML for inspection
map_.save_to_html(file_name='raw_arrival_delays_map.html')

# Display the map
map_

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to raw_arrival_delays_map.html!


KeplerGl(data={'Raw Arrival Delays': {'index': [3, 4, 5, 9, 10, 11, 15, 16, 17, 26, 40, 41, 45, 49, 53, 57, 61…

In [18]:
# Optionally, remove zero arrival delays if they are not informative
# filtered_df = filtered_df[filtered_df['arrival_delay_m'] != 0]

# Initialize Kepler map
map_ = KeplerGl(height=800)

# Add the filtered data
map_.add_data(data=filtered_df, name='Arrival Delays Data')

# Define the configuration with Heatmap and Cluster layers
config = {
    "version": "v1",
    "config": {
        "visState": {
            "layers": [
                {
                    "id": "heatmap_layer",
                    "type": "heatmap",
                    "config": {
                        "dataId": "Arrival Delays Data",
                        "label": "Arrival Delays Heatmap",
                        "color": [255, 203, 153],
                        "columns": {"lat": "lat", "lng": "long"},
                        "isVisible": True,
                        "visConfig": {
                            "opacity": 0.8,
                            "intensity": 1,
                            "radius": 20,
                            "threshold": 0.05,
                            "colorRange": {
                                "name": "Global Warming",
                                "type": "sequential",
                                "category": "Uber",
                                "colors": [
                                    "#5A1846",
                                    "#900C3F",
                                    "#C70039",
                                    "#E3611C",
                                    "#F1920E",
                                    "#FFC300"
                                ]
                            },
                            "coverage": 1
                        }
                    },
                    "visualChannels": {
                        "weightField": {"name": "arrival_delay_m", "type": "real"},
                        "weightScale": "linear"
                    }
                },
                {
                    "id": "cluster_layer",
                    "type": "cluster",
                    "config": {
                        "dataId": "Arrival Delays Data",
                        "label": "Arrival Delays Clusters",
                        "color": [18, 147, 154],
                        "columns": {"lat": "lat", "lng": "long"},
                        "isVisible": True,
                        "visConfig": {
                            "opacity": 0.8,
                            "clusterRadius": 40,
                            "colorRange": {
                                "name": "ColorBrewer RdYlBu-6",
                                "type": "diverging",
                                "category": "ColorBrewer",
                                "colors": [
                                    "#D73027",
                                    "#FC8D59",
                                    "#FEE090",
                                    "#E0F3F8",
                                    "#91BFDB",
                                    "#4575B4"
                                ]
                            }
                        }
                    },
                    "visualChannels": {
                        "colorField": {"name": "arrival_delay_m", "type": "real"},
                        "colorScale": "quantile",
                        "sizeField": {"name": "arrival_delay_m", "type": "real"},
                        "sizeScale": "linear"
                    }
                }
            ],
            "interactionConfig": {
                "tooltip": {
                    "fieldsToShow": {
                        "Arrival Delays Data": [
                            {"name": "ID_Base", "format": None},
                            {"name": "stop_number", "format": None},
                            {"name": "arrival_delay_m", "format": None}
                        ]
                    },
                    "enabled": True
                }
            }
        },
        "mapState": {
            "latitude": filtered_df['lat'].mean(),
            "longitude": filtered_df['long'].mean(),
            "zoom": 6,
            "bearing": 0,
            "pitch": 0
        },
        "mapStyle": {
            "styleType": "dark",
            "topLayerGroups": {},
            "visibleLayerGroups": {
                "label": True,
                "road": True,
                "border": False,
                "building": True,
                "water": True,
                "land": True,
                "3d building": False
            }
        }
    }
}

# Assign the configuration to the map
map_.config = config

# Save the map to an HTML file
map_.save_to_html(file_name='arrival_delays_map.html')

# Display the map (uncomment if using a Jupyter notebook)
# map_

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter
Map saved to arrival_delays_map.html!
