In [3]:
using Plots, Gurobi, CSV, DataFrames

In [18]:
"""
    load_repo_data(repo_path::String)

Walks through the specified repository path, identifies all `.csv` and `.txt` files,
and parses them into a Dictionary of DataFrames.

# Arguments
- `repo_path::String`: The local path to the cloned repository.

# Returns
- `Dict{String, DataFrame}`: A dictionary where keys are filenames (without extensions)
  and values are the parsed DataFrames.
"""
function load_repo_data(repo_path::String)
    # Dictionary to store the parsed data
    data_store = Dict{String, DataFrame}()
    
    # specific file options can be adjusted here if certain files have no headers, etc.
    # common for GTFS or simple network text files
    csv_options = (header=true, stringtype=String)

    if !isdir(repo_path)
        @error "Directory not found: $repo_path"
        return data_store
    end

    @info "Scanning directory: $repo_path"
    
    # Get all files in the directory
    files = readdir(repo_path)
    
    for file in files
        file_path = joinpath(repo_path, file)
        
        # Skip directories
        if isdir(file_path)
            continue
        end

        # Check for valid extensions
        if endswith(lowercase(file), ".csv") || endswith(lowercase(file), ".txt")
            dataset_name = splitext(file)[1]
            
            try
                @info "Parsing file: $file"
                
                # Attempt to read the file
                # We use specific delimiter inference provided by CSV.jl, 
                # but typically these are comma-delimited.
                df = CSV.read(file_path, DataFrame; csv_options...)
                
                # Store in dictionary
                data_store[dataset_name] = df
                
                @info "Successfully loaded $dataset_name with $(nrow(df)) rows."
                
            catch e
                @warn "Failed to parse $file. It might not be a standard CSV/Table." exception=e
            end
        end
    end

    if isempty(data_store)
        @warn "No CSV or TXT files were found or parsed in $repo_path."
    end

    return data_store
end

"""
    summarize_data(data::Dict{String, DataFrame})

Prints a brief summary of the loaded datasets.
"""
function summarize_data(data::Dict{String, DataFrame})
    println("\n--- Data Summary ---")
    for (name, df) in data
        println("Dataset: '$name'")
        println("  Shape: $(nrow(df)) rows × $(ncol(df)) columns")
        println("  Cols:  $(join(names(df), ", "))")
        println("--------------------")
    end
end

repo_path = "." 

# 2. Load the data

subway_data = load_repo_data(repo_path)

# 3. Print summary
summarize_data(subway_data)



[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mScanning directory: .
[33m[1m└ [22m[39m[90m@ Main In[18]:64[39m



--- Data Summary ---
