In [1]:
#pip3 install geopandas
#pip3 install folium

import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
import numpy as np

In [28]:
# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

# 1. Load CSVs
ee = pd.read_csv("TrainStationEntriesExits/train-station-entries-exits-data-may-2025.csv")
stations = pd.read_csv("TrainStationEntranceLocations/stationentrances2020_v4.csv")


In [29]:
print(ee.columns)
ee = ee.rename(columns={'Station_Type ' : 'Station_Type', 'MonthYear' : 'Month_Year'})
print(ee.columns)

Index(['MonthYear', 'Station', 'Station_Type ', 'Entry_Exit', 'Trip'], dtype='object')
Index(['Month_Year', 'Station', 'Station_Type', 'Entry_Exit', 'Trip'], dtype='object')


In [30]:
# 2. Clean and filter
stations_to_ignore = ["Rosehill", "Camellia", "Rydalmere", "Dundas", "Telopea", "Carlingford"]

ee = ee[ee["Station_Type"].isin(["train", "Metro Shared"])].copy()
ee["Train_Station"] = ee["Station"].str.replace(" Station", "", regex=False).str.strip()
ee = ee[~ee["Train_Station"].isin(stations_to_ignore)]
ee["TripNumber"] = ee["Trip"].replace("Less than 50", 50).astype(float)

# 3. Keep one location per station
stations = stations[~stations.duplicated("Train_Station")]
stations = stations[stations["Train_Station"].isin(ee["Train_Station"])]

# 4. Merge coordinates
ee = ee.merge(stations[["Train_Station", "LAT", "LONG"]], on="Train_Station", how="left")

# 5. Filter to Entry + May-25
ee_map = ee[(ee["Month_Year"] == "May-25") & (ee["Entry_Exit"] == "Entry")].copy()
ee_map["log10Trips"] = np.log10(ee_map["TripNumber"])

# 6. Convert to GeoDataFrame
gdf_points = gpd.GeoDataFrame(
    ee_map,
    geometry=gpd.points_from_xy(ee_map["LONG"], ee_map["LAT"]),
    crs="EPSG:4326"
)

print(len(gdf_points))

# 7. Read Sydney train routes shapefile
train_routes = gpd.read_file("SydneyTrainRoutes/sydneytrains/SydneyTrains.shp")
train_routes = train_routes.to_crs("EPSG:4326")

# 8. Base map with Folium (interactive)
m = folium.Map(location=[-33.87, 151.1], zoom_start=11, tiles='cartodbpositron')

# 9. Add train routes
for _, row in train_routes.iterrows():
    if row.geometry.type == "LineString":
        coords = [(lat, lon) for lon, lat in row.geometry.coords]
        folium.PolyLine(coords, color="red", weight=2).add_to(m)

print(gdf_points[["Train_Station", "LAT", "LONG"]].isna().sum())

# Import matplotlib for colormap
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

# 10. Add stations with color-coded markers
# Remove any rows with missing coordinates
gdf_points_clean = gdf_points.dropna(subset=['LAT', 'LONG'])

if len(gdf_points_clean) > 0:
    for _, row in gdf_points_clean.iterrows():
        # Normalize and convert to hex color
        norm_value = row["log10Trips"] / gdf_points_clean["log10Trips"].max()
        rgba = plt.cm.viridis(norm_value)
        hex_color = mcolors.to_hex(rgba)

        folium.CircleMarker(
            location=[row["LAT"], row["LONG"]],
            radius=8,  # Made slightly larger for better visibility
            popup=f"{row['Train_Station']}<br>Trips: {int(row['TripNumber'])}<br>Log10 Trips: {row['log10Trips']:.2f}",
            color=hex_color,
            fill=True,
            fill_color=hex_color,
            fill_opacity=0.8,
            weight=2
        ).add_to(m)
else:
    print("Warning: No valid station coordinates found!")

# 11. Show the map
m.save("train_station_map.html")
m

291
Train_Station     0
LAT              11
LONG             11
dtype: int64
