In [None]:
import multiprocessing
import psutil
import sys

import geopandas as gpd
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from IPython.display import display

from railrailrail.network.stage import Stage
from railrailrail.railgraph import RailGraph

from analysis.analysis_helpers import (
    get_stage_journeys,
    make_stage_journeys_dataframe,
    contains_point,
    get_station_agg_stats,
    get_planning_area_to_region_df,
)

# Do not truncate DataFrames
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

minimum_available_ram_GB = 3
if psutil.virtual_memory().available < minimum_available_ram_GB * 1e9:
    print(
        "Warning: Less than %dGB RAM available. This program may crash with an Out-Of-Memory error."
        % minimum_available_ram_GB,
        file=sys.stderr,
    )


planning_areas = gpd.read_file("MasterPlan2019PlanningAreaBoundaryNoSea.geojson")
planning_areas["Planning Area"] = planning_areas["Description"].apply(
    lambda description: description.split("<td>", 1)[-1].split("</td>", 1)[0].strip()
)

planning_area_to_region_df = get_planning_area_to_region_df()


stages = list(Stage.stages)
journeys_by_stage: dict[str, dict[str, pd.DataFrame | RailGraph]] = dict()
station_agg_stats_by_stage: dict[str, pd.DataFrame] = dict()


with multiprocessing.get_context("fork").Pool(
    None
) as pool:  # https://bugs.python.org/issue33725 https://stackoverflow.com/a/72605750
    for stage, journeys, rail_graph in pool.imap_unordered(get_stage_journeys, stages):
        journeys_by_stage[stage] = {
            "df": make_stage_journeys_dataframe(stage, journeys),
            "rail_graph": rail_graph,
        }
        station_agg_stats_by_stage[stage] = get_station_agg_stats(
            df=journeys_by_stage[stage]["df"],
            rail_graph=journeys_by_stage[stage]["rail_graph"],
        )
        # Stations within interchanges have different geographic coordinates.
        # This causes them to have different average circuity. For example, see BP1/NS4/JS1 Choa Chu Kang.

        station_agg_stats_by_stage[stage]["planning_area"] = station_agg_stats_by_stage[
            stage
        ].apply(
            lambda row: next(
                (
                    feature["Planning Area"]
                    for _, feature in planning_areas.iterrows()
                    if contains_point(
                        feature["geometry"],
                        row["latitude"],
                        row["longitude"],
                    )
                ),
                None,
            ),
            axis=1,
        )

In [None]:
def plot_planning_area_agg_stats_scatter(stage: str, show=False):
    stage_description, stage_timestamp = Stage.stages_info[stage]
    planning_area_agg_stats = (
        station_agg_stats_by_stage[stage]
        .copy()
        .groupby(by="planning_area")
        .mean(numeric_only=True)
        .reset_index(0)
        .sort_values(by=["path_distance_mean"])
        .merge(planning_area_to_region_df, on="planning_area", how="left")
    )
    planning_area_agg_stats["size"] = 10  # Chart dot size
    # print(
    #     "Number of active planning areas:",
    #     planning_area_agg_stats["planning_area"].nunique(),
    # )

    fig = px.scatter(
        planning_area_agg_stats,
        title=(
            "Station Mean Circuity against Station Mean Shortest Path Distance (Averaged By Planning Area)"
            "<br />"
            f"{stage_description} @ {stage_timestamp.strftime("%d %B %Y")}"
        ),
        x="path_distance_mean",
        y="circuity_mean",
        range_x=(
            int(planning_area_agg_stats["path_distance_mean"].min() * 0.95),
            int(planning_area_agg_stats["path_distance_mean"].max() * 1.05),
        ),
        range_y=(
            planning_area_agg_stats["circuity_mean"].min() - 0.1,
            planning_area_agg_stats["circuity_mean"].max() + 0.1,
        ),
        labels={
            "path_distance_mean": "Mean Shortest Path Distance",
            "circuity_mean": "Mean Circuity",
            "planning_area": "Planning Area",
            "region": "Region",
        },
        size="size",
        color="region",
        color_discrete_map={
            "CENTRAL": "#ffa15a",
            "WEST": "#636efa",
            "EAST": "#00cc96",
            "NORTH-EAST": "fuchsia",
            "NORTH": "#ef553b",
        },
        text="planning_area",
        hover_name="planning_area",
        hover_data={"size": False},
        # trendline="lowess",  # Locally WEighted Scatterplot Smoothing (LOWESS)
        # trendline_scope="overall",
        log_x=False,
        log_y=False,
        size_max=10,
        width=1200,
        height=900,
        template="plotly_dark",
    )
    fig.update_layout(
        title={"x": 0.5},
        xaxis=dict(
            tickformat="",
            ticksuffix="m",
            tickfont=dict(color="white", family="Arial Black"),
        ),
        yaxis=dict(
            tickfont=dict(color="white", family="Arial Black"),
        ),
    )
    fig.update_traces(
        textposition="top center", textfont=dict(size=13, color="white", family="Arial")
    )

    if show:
        fig.show()
    return planning_area_agg_stats, fig


for stage in ["nel", "dtl_2", "tel_4", "cg_tel_c"]:
    planning_area_agg_stats, fig = plot_planning_area_agg_stats_scatter(stage)
    print(fig.to_html(full_html=False, include_plotlyjs="cdn"))
    print()

In [None]:
df = journeys_by_stage["phase_1_2"]["df"]
print(
    "%.1f%% of journeys as of Phase 1b are between adjacent stations."
    % (len(df.query("total_cost != 0 and circuity == 1.0")) / len(df) * 100,)
)

df = journeys_by_stage["tel_4"]["df"]
print(
    "%.1f%% of journeys as of TEL4 are between adjacent stations."
    % (len(df.query("total_cost != 0 and circuity == 1.0")) / len(df) * 100,)
)

In [None]:
def plot_agg_stats_map(
    stage: str, color: str, hover_data: list[str], show: bool = False
):
    station_agg_stats = pd.DataFrame()
    df = station_agg_stats_by_stage[stage].copy()
    df["full_name"] = df.apply(
        lambda row: row["start"] + " " + row["station_name"], axis=1
    )
    df["stage_description"] = Stage.stages_info[stage][0]
    df["stage_timestamp"] = Stage.stages_info[stage][1].strftime("%b-%y")
    station_agg_stats = pd.concat([station_agg_stats, df], axis=0)

    max_circuity_mean = station_agg_stats["circuity_mean"].max()
    max_circuity_median = station_agg_stats["circuity_median"].max()
    max_circuity = max(max_circuity_mean, max_circuity_median)

    max_path_distance_mean = station_agg_stats["path_distance_mean"].max()
    max_path_distance_median = station_agg_stats["path_distance_median"].max()
    max_path_distance = max(max_path_distance_mean, max_path_distance_median)

    station_agg_stats["marker_size"] = 1
    fig = px.scatter_mapbox(
        station_agg_stats,
        lat="latitude",
        lon="longitude",
        hover_name="full_name",
        hover_data=hover_data,
        color=color,
        color_continuous_scale=px.colors.sequential.Jet,
        range_color=(
            (1, max_circuity) if "circuity" in color else (8000, max_path_distance)
        ),
        mapbox_style="carto-darkmatter",
        template="plotly_dark",
        zoom=10,
        height=550,
        width=1200,
        size="marker_size",
        size_max=8,
        opacity=1,
        center={"lat": 1.330270, "lon": 103.851959},
    )
    fig["layout"].pop("updatemenus")
    fig.update_layout(
        sliders=[
            {
                "currentvalue": {
                    "font": {"size": 20},
                    "prefix": None,
                    "visible": True,
                    "xanchor": "center",
                },
                "transition": {"duration": 0, "easing": "linear"},
                "pad": {"t": 50, "l": 5},
            }
        ],
        margin={"r": 0, "t": 0, "l": 0, "b": 0},
    )
    if "circuity" in color:
        fig.update_coloraxes(colorbar_title="Mean Circuity")
    else:
        fig.update_coloraxes(colorbar_ticksuffix="m", colorbar_title="Mean Path Distance")
    fig.update_coloraxes(showscale=True)

    if show:
        fig.show()

    return station_agg_stats, fig

station_agg_stats, fig = plot_agg_stats_map(
    "teck_lee",
    color="path_distance_mean",
    hover_data=["circuity_mean", "path_distance_mean"],
    show=True,
)
print(fig.to_html(full_html=False, include_plotlyjs="cdn", auto_play=False))

In [None]:
def plot_agg_stats_grid(color: str, hover_data: list[str], show: bool = False):
    station_agg_stats = pd.DataFrame()
    for stage in stages:
        df = station_agg_stats_by_stage[stage].copy()
        df["full_name"] = df.apply(
            lambda row: row["start"] + " " + row["station_name"], axis=1
        )
        df["stage_description"] = Stage.stages_info[stage][0]
        df["stage_timestamp"] = Stage.stages_info[stage][1].strftime("%b %Y")
        station_agg_stats = pd.concat([station_agg_stats, df], axis=0)

    max_circuity_mean = station_agg_stats["circuity_mean"].max()
    max_circuity_median = station_agg_stats["circuity_median"].max()
    max_circuity = max(max_circuity_mean, max_circuity_median)

    max_path_distance_mean = station_agg_stats["path_distance_mean"].max()
    max_path_distance_median = station_agg_stats["path_distance_median"].max()
    max_path_distance = max(max_path_distance_mean, max_path_distance_median)

    station_agg_stats["marker_size"] = 1
    fig = px.scatter(
        station_agg_stats,
        y="latitude",
        x="longitude",
        range_y=[1.25, 1.46],
        range_x=[103.625, 104.02],
        animation_frame="stage_timestamp",
        hover_name="full_name",
        hover_data=hover_data,
        color=color,
        color_continuous_scale=px.colors.sequential.Jet,
        range_color=(
            (1, max_circuity) if "circuity" in color else (8000, max_path_distance)
        ),
        template="plotly_dark",
        height=550,
        width=1200,
        size="marker_size",
        size_max=8,
        opacity=1,
    )
    fig["layout"].pop("updatemenus")
    fig.update_layout(
        sliders=[
            {
                "currentvalue": {
                    "font": {"size": 20},
                    "prefix": None,
                    "visible": True,
                    "xanchor": "center",
                },
                "transition": {"duration": 0, "easing": "linear"},
                "pad": {"t": 50, "l": 5},
            }
        ],
        margin={"r": 0, "t": 0, "l": 0, "b": 0},
    )
    if "circuity" in color:
        fig.update_coloraxes(colorbar_title="Mean Circuity")
    else:
        fig.update_coloraxes(colorbar_ticksuffix="m", colorbar_title="Mean Path Distance")
    fig.update_coloraxes(showscale=True)

    # Change initial frame
    frame_num = next(i for i, frame in enumerate(fig.frames) if frame["name"] == "Aug 2024")
    fig.layout['sliders'][0]['active'] = frame_num
    fig = go.Figure(data=fig['frames'][frame_num]['data'], frames=fig['frames'], layout=fig.layout)

    if show:
        fig.show()

    return station_agg_stats, fig


for color in ["path_distance_mean", "circuity_mean"]:
    station_agg_stats, fig = plot_agg_stats_grid(
        color=color,
        hover_data=["circuity_mean", "path_distance_mean"],
        show=True,
    )
    print(fig.to_html(full_html=False, include_plotlyjs="cdn", auto_play=False))

# Remove <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
#        <script charset="utf-8" src="https://cdn.plot.ly/plotly-2.34.0.min.js"></script>
# before adding to webpage.

In [None]:
for stage in journeys_by_stage:
    stage_df = journeys_by_stage[stage]["df"].query("total_cost != 0")
    stage_mean_total_cost = int(stage_df["total_cost"].mean())
    stage_max_total_cost = int(stage_df["total_cost"].max())
    stage_total_cost_std_dev = int(stage_df["total_cost"].std())
    print(stage, stage_mean_total_cost, stage_max_total_cost, stage_total_cost_std_dev)