<a href="https://colab.research.google.com/github/emilyberlinghoff/BikeShare/blob/main/flow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from collections import defaultdict

def add_running_inflow_outflow_flow(input_csv: str, output_csv: str) -> None:
    """
    Reads a bike-share CSV (with columns including Start Station Id, End Station Id, Start Time, etc.),
    sorts rows by Start Time, and processes each trip in chronological order to maintain a running tally.

    For each row:
      - Increment outflow for that row's start station by 1
      - Increment inflow for that row's end station by 1

    Then add four columns to show, as of that row/trip:
      1) 'Total outflow of start station'
      2) 'Total inflow of end station'
      3) 'Total flow of start station' = inflow - outflow for the start station
      4) 'Total flow of end station'   = inflow - outflow for the end station

    Writes the updated DataFrame to output_csv.
    """

    # Read the CSV
    df = pd.read_csv(input_csv)

    # Sort by Start Time (assumes it's a valid datetime or sortable string).
    # Remove or adjust if you do not want chronological sorting.
    df.sort_values(by="Start Time", inplace=True)

    # Dictionaries to track how many bikes have flowed in/out for each station
    station_outflow = defaultdict(int)
    station_inflow = defaultdict(int)

    # We'll store these four columns per row in lists, then attach them to df
    total_outflow_of_start_station_list = []
    total_inflow_of_end_station_list = []
    total_flow_of_start_station_list = []
    total_flow_of_end_station_list = []

    # Go row by row in chronological order
    for _, row in df.iterrows():
        start_id = row["Start Station Id"]
        end_id = row["End Station Id"]

        # This trip departs start station -> increment that station's outflow
        station_outflow[start_id] += 1
        # This trip arrives at end station -> increment that station's inflow
        station_inflow[end_id] += 1

        # Now record the cumulative totals for these two stations
        outflow_of_start_station = station_outflow[start_id]
        inflow_of_end_station = station_inflow[end_id]

        # The total flow for a station is (inflow - outflow)
        flow_of_start_station = station_inflow[start_id] - station_outflow[start_id]
        flow_of_end_station = station_inflow[end_id] - station_outflow[end_id]

        # Append these values for the row
        total_outflow_of_start_station_list.append(outflow_of_start_station)
        total_inflow_of_end_station_list.append(inflow_of_end_station)
        total_flow_of_start_station_list.append(flow_of_start_station)
        total_flow_of_end_station_list.append(flow_of_end_station)

    # Attach the new columns
    df["Total outflow of start station"] = total_outflow_of_start_station_list
    df["Total inflow of end station"]   = total_inflow_of_end_station_list
    df["Total flow of start station"]   = total_flow_of_start_station_list
    df["Total flow of end station"]     = total_flow_of_end_station_list

    # Write out to a new CSV
    df.to_csv(output_csv, index=False)


if __name__ == "__main__":
    input_file = "Bike share ridership 2024-01.csv"
    output_file = "Flow 2024-01.csv"
    add_running_inflow_outflow_flow(input_file, output_file)
    print(f"Saved updated file with per-row inflow/outflow/flow columns to {output_file}")

Saved updated file with per-row inflow/outflow/flow columns to Flow 2024-01.csv
