In [2]:
using Plots, Gurobi, CSV, DataFrames

In [4]:
"""
    load_repo_data(repo_path::String)

Recursively walks through the specified repository path, identifies all `.csv` and `.txt` files
(even in subfolders), and parses them into a Dictionary of DataFrames.

# Arguments
- `repo_path::String`: The local path to the cloned repository.

# Returns
- `Dict{String, DataFrame}`: A dictionary where keys are unique filenames and values are DataFrames.
"""
function load_repo_data(repo_path::String)
    # Dictionary to store the parsed data
    data_store = Dict{String, DataFrame}()
    
    # CSV options (assume headers exist)
    csv_options = (header=true, stringtype=String)

    if !isdir(repo_path)
        @error "Directory not found: $repo_path"
        return data_store
    end

    @info "Recursively scanning directory: $repo_path"
    
    files_found = 0
    
    # walkdir allows us to search subdirectories (e.g., /data, /src)
    for (root, dirs, files) in walkdir(repo_path)
        for file in files
            # Check for valid extensions
            if endswith(lowercase(file), ".csv") || endswith(lowercase(file), ".txt")
                
                files_found += 1
                full_path = joinpath(root, file)
                dataset_name = splitext(file)[1]
                
                # Handle duplicate filenames in different folders by appending parent folder name
                if haskey(data_store, dataset_name)
                    parent_folder = basename(root)
                    dataset_name = "$(parent_folder)_$(dataset_name)"
                end

                try
                    @info "Parsing: $full_path"
                    df = CSV.read(full_path, DataFrame; csv_options...)
                    data_store[dataset_name] = df
                catch e
                    # Only warn, don't crash, if a file is malformed
                    @warn "Skipping $file: Unable to parse as CSV table."
                end
            end
        end
    end

    if files_found == 0
        @warn "No CSV or TXT files were found in $repo_path or its subdirectories."
        @info "Current working directory contains: $(readdir(repo_path))"
    else
        @info "Successfully loaded $(length(data_store)) datasets."
    end

    return data_store
end
"""
    summarize_data(data::Dict{String, DataFrame})

Prints a brief summary of the loaded datasets.
"""
function summarize_data(data::Dict{String, DataFrame})
    println("\n--- Data Summary ---")
    for (name, df) in data
        println("Dataset: '$name'")
        println("  Shape: $(nrow(df)) rows × $(ncol(df)) columns")
        println("  Cols:  $(join(names(df), ", "))")
        println("--------------------")
    end
end

repo_path = "." 

# 2. Load the data

subway_data = load_repo_data(repo_path)

# 3. Print summary
summarize_data(subway_data)



[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mRecursively scanning directory: .
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mParsing: .\datasets\MTA_Subway_Hourly_Ridership__Oct_21_2024_Evening.csv
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mParsing: .\datasets\MTA_Subway_Hourly_Ridership__Oct_21_2024_Morning.csv
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mParsing: .\datasets\MTA_Subway_Stations_20251204.csv
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mParsing: .\datasets\agency.txt
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mParsing: .\datasets\calendar.txt
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mParsing: .\datasets\calendar_dates.txt
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mParsing: .\datasets\linecapacity.csv
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mParsing: .\datasets\linelength.csv
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mParsing: .\datasets\routes.txt
[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mParsing: .\datasets\shapes.txt
[36m[1m[ [22m[39m[3


--- Data Summary ---
Dataset: 'routes'
  Shape: 29 rows × 10 columns
  Cols:  route_id, agency_id, route_short_name, route_long_name, route_desc, route_type, route_url, route_color, route_text_color, route_sort_order
--------------------
Dataset: 'nodes_with_ridership'
  Shape: 475 rows × 9 columns
  Cols:  node_idx, stop_id, stop_name, stop_lon, stop_lat, station_complex_id, ridership_morning, ridership_evening, net_ridership
--------------------
Dataset: 'stops'
  Shape: 1488 rows × 6 columns
  Cols:  stop_id, stop_name, stop_lat, stop_lon, location_type, parent_station
--------------------
Dataset: 'trips'
  Shape: 20304 rows × 6 columns
  Cols:  route_id, trip_id, service_id, trip_headsign, direction_id, shape_id
--------------------
Dataset: 'MTA_Subway_Aggregated_Ridership_Oct_21_2024_Evening'
  Shape: 424 rows × 2 columns
  Cols:  station_complex_id, ridership
--------------------
Dataset: 'agency'
  Shape: 1 rows × 6 columns
  Cols:  agency_id, agency_name, agency_url, agency_

In [None]:
function build_subway_model(
    V,
    E_track,
    E_transfer,
    L,
    L_ij;
    s,
    t,
    t_tr,
    C_train,   # <= NOW: Dict{Line,Float64}, not scalar
    Δ,
    τ,
    energy,
    T_max,
    β,
    γ,
    λ,
    shared_track_constraint::Bool = true,
)

    model = Model(Gurobi.Optimizer)

    # -------------------------
    # Index sets for variables
    # -------------------------
    track_triplets = Tuple{eltype(V),eltype(V),eltype(L)}[]
    for (i, j) in E_track
        lines_ij = get(L_ij, (i, j), Vector{eltype(L)}())
        for ℓ in lines_ij
            push!(track_triplets, (i, j, ℓ))
        end
    end

    # -------------------------
    # Decision variables
    # -------------------------
    @variable(model, x[track_triplets] >= 0)
    @variable(model, y[E_transfer] >= 0)
    @variable(model, f[L] >= 0)
    @variable(model, overflow[track_triplets] >= 0)

    # -------------------------
    # (1) Flow conservation
    # -------------------------
    @constraint(model, [i in V], begin
        out_track = sum(
            x[(i, j, ℓ)]
            for (ii, j, ℓ) in track_triplets
            if ii == i
        )
        out_transfer = sum(
            y[(i, j)]
            for (u, j) in E_transfer
            if u == i
        )
        in_track = sum(
            x[(k, i, ℓ)]
            for (k, jj, ℓ) in track_triplets
            if jj == i
        )
        in_transfer = sum(
            y[(k, i)]
            for (k, v) in E_transfer
            if v == i
        )

        out_track + out_transfer - in_track - in_transfer == s[i]
    end)

    # -------------------------
    # (2) Capacity on track edges
    # -------------------------
    @constraint(model, [triplet in track_triplets], begin
        (i, j, ℓ) = triplet
        # CHANGED: C_train[ℓ] instead of scalar C_train
        x[triplet] - overflow[triplet] <= C_train[ℓ] * f[ℓ] * Δ
    end)

    # -------------------------
    # (3) Optional shared physical track constraints
    # -------------------------
    if shared_track_constraint
        @constraint(model, [e in E_track], begin
            i, j = e
            lines_ij = get(L_ij, (i, j), Vector{eltype(L)}())
            if isempty(lines_ij)
                0.0 <= 0.0
            else
                # CHANGED: use C_train[ℓ] for each line
                sum(x[(i, j, ℓ)] - overflow[(i, j, ℓ)] for ℓ in lines_ij) <=
                sum(C_train[ℓ] * f[ℓ] * Δ for ℓ in lines_ij)
            end
        end)
    end

    # -------------------------
    # (4) Fleet / energy limit
    # -------------------------
    @constraint(model,
        sum(f[ℓ] * τ[ℓ] for ℓ in L) <= T_max
    )

    # -------------------------
    # Objective
    # -------------------------
    passenger_time_expr =
        sum(t[(i, j, ℓ)] * x[(i, j, ℓ)] for (i, j, ℓ) in track_triplets) +
        sum(t_tr[(i, j)] * y[(i, j)] for (i, j) in E_transfer)

    overflow_expr = β * sum(overflow[triplet] for triplet in track_triplets)
    energy_expr   = γ * sum(energy[ℓ] * f[ℓ] for ℓ in L)

    @objective(model, Min,
        (1 - λ) * (passenger_time_expr + overflow_expr) +
        λ * energy_expr
    )

    return model
end


In [6]:
using CSV
using DataFrames

nodes_df      = CSV.read("generated_graphs\\nodes_with_balanced_integer_net_ridership.csv", DataFrame)
routes_df     = CSV.read("generated_graphs\\routes.csv", DataFrame)
edges_df      = CSV.read("generated_graphs\\edges_by_route.csv", DataFrame)
transfers_df  = CSV.read("generated_graphs\\transfer_edges.csv", DataFrame)


Row,transfer_edge_id,from_stop_id,to_stop_id,from_idx,to_idx,transfer_type,min_transfer_time,cost
Unnamed: 0_level_1,Int64,String3,String3,Int64,Int64,Int64,Int64,Int64
1,0,112,A09,9,183,2,180,1
2,1,125,A24,22,196,2,180,1
3,2,127,725,24,174,2,180,1
4,3,127,902,24,177,2,180,1
5,4,127,A27,24,198,2,300,1
6,5,127,R16,24,444,2,180,1
7,6,132,D19,29,263,2,300,1
8,7,132,L02,29,379,2,180,1
9,8,222,415,57,105,2,180,1
10,9,228,A36,62,205,2,180,1


In [7]:
const Station = Int
const Line    = Int

Int64

In [10]:
V = sort(unique(Station.(nodes_df.node_idx)))      # stations
L = sort(unique(Line.(routes_df.route_idx)))       # lines
println(size(V))
println(size(L))

(475,)
(26,)


In [12]:
E_track = unique([(Station(row.from_idx), Station(row.to_idx)) 
                  for row in eachrow(edges_df)])
print(size(E_track))

(556,)

In [17]:
L_ij = Dict{Tuple{Station,Station}, Vector{Line}}()

for row in eachrow(edges_df)
    i = Station(row.from_idx)
    j = Station(row.to_idx)
    ℓ = Line(row.route_idx)
    key = (i, j)
    if haskey(L_ij, key)
        push!(L_ij[key], ℓ)
    else
        L_ij[key] = [ℓ]
    end
end

# Remove duplicates within each vector, in case the same route appears multiple times
for lines in values(L_ij)
    unique!(lines)
end
L_ij


Dict{Tuple{Int64, Int64}, Vector{Int64}} with 556 entries:
  (126, 127) => [5]
  (359, 360) => [9, 23]
  (55, 56)   => [1, 4]
  (117, 118) => [5, 6]
  (213, 214) => [9, 11]
  (7, 8)     => [0]
  (223, 224) => [9]
  (14, 15)   => [0]
  (397, 398) => [18]
  (433, 434) => [20, 24]
  (178, 179) => [9]
  (267, 268) => [10, 21]
  (368, 370) => [17]
  (121, 122) => [5, 6]
  (436, 437) => [20, 24]
  (473, 474) => [23]
  (151, 152) => [3, 5, 6]
  (26, 27)   => [0, 1]
  (154, 155) => [7, 8]
  (308, 309) => [14, 15]
  (336, 337) => [16]
  (171, 172) => [7, 8]
  (345, 346) => [16]
  (261, 262) => [14, 15, 19]
  (444, 445) => [20, 24, 21, 22]
  ⋮          => ⋮

In [21]:
E_transfer = unique([(Station(row.from_idx), Station(row.to_idx))
                     for row in eachrow(transfers_df)])
println(size(E_transfer))

(150,)


In [24]:
t_tr = Dict((i, j) => 1.0 for (i, j) in E_transfer)

Dict{Tuple{Int64, Int64}, Float64} with 150 entries:
  (416, 429) => 1.0
  (63, 418)  => 1.0
  (472, 212) => 1.0
  (429, 152) => 1.0
  (172, 144) => 1.0
  (24, 177)  => 1.0
  (212, 472) => 1.0
  (416, 451) => 1.0
  (198, 174) => 1.0
  (453, 205) => 1.0
  (453, 62)  => 1.0
  (429, 451) => 1.0
  (29, 263)  => 1.0
  (335, 294) => 1.0
  (456, 66)  => 1.0
  (177, 444) => 1.0
  (422, 238) => 1.0
  (169, 335) => 1.0
  (198, 177) => 1.0
  (173, 260) => 1.0
  (176, 172) => 1.0
  (73, 474)  => 1.0
  (107, 63)  => 1.0
  (379, 263) => 1.0
  (148, 448) => 1.0
  ⋮          => ⋮

In [25]:
t = Dict{Tuple{Station,Station,Line}, Float64}()

for row in eachrow(edges_df)
    i = Station(row.from_idx)
    j = Station(row.to_idx)
    ℓ = Line(row.route_idx)
    key = (i, j, ℓ)
    
    # If every segment has the same unit cost:
    t[key] = 1.0

    # Later you can replace that with actual in-train travel time per edge
    # t[key] = real_travel_time_in_hours
end


In [29]:
s = Dict{Station, Float64}()

for row in eachrow(nodes_df)
    i = Station(row.node_idx)
    s[i] = Float64(row.balanced_net_ridership_int)
end

# Optionally, ensure every station in V has an entry, even if 0:
for i in V
    if !haskey(s, i)
        s[i] = 0.0
    end
end


In [36]:
const Line = Int  # same as you used elsewhere

linecap_df = CSV.read("datasets\\linecapacity.csv", DataFrame)

C_train = Dict{Line, Float64}()

for row in eachrow(linecap_df)
    ℓ = Line(row.route_idx)  # line index used in the model
    # capacity per train on that line:
    C_train[ℓ] = Float64(row.total_rush_hour_capacity)
end

In [38]:
Δ = 4

4

In [39]:
model = build_subway_model(
    V,
    E_track,
    E_transfer,
    L,
    L_ij;
    s      = s,
    t      = t,
    t_tr   = t_tr,
    C_train = C_train,
    Δ       = Δ,
    τ       = τ,
    energy  = energy,
    T_max   = T_max,
    β       = β,
    γ       = γ,
    λ       = λ,
)

optimize!(model)

LoadError: UndefVarError: `τ` not defined in `Main`
Suggestion: check for spelling errors or missing imports.