In [146]:
# import Pkg; Pkg.add("JLD2")
using DataFrames, CSV, Dates, JLD2, Distances

In [147]:
station_information = CSV.read("../../data/stations/station_information.csv", DataFrame);

## Trips and stations_match 

In [148]:
trips = CSV.read("../../data/trips/202210-bluebikes-tripdata.csv", DataFrame);

In [149]:
# find unique station_id
station_ids = unique(station_information[:, :station_id]);
nb_stations = length(station_ids);
stations_match = Dict(zip(station_ids, 1:nb_stations));

# eliminate trips with station_id not in station_ids
trips = trips[in.(trips[:, :"start station id"], [Set(station_ids)]), :];
trips = trips[in.(trips[:, :"end station id"], [Set(station_ids)]), :];

# convert starttime and stoptime to DateTime
trips[!, :starttime] = DateTime.(trips[!, :starttime], "yyyy-mm-dd HH:MM:SS.ssss");
trips[!, :stoptime] = DateTime.(trips[!, :stoptime], "yyyy-mm-dd HH:MM:SS.ssss");

# extract day and hour from starttime and stoptime
trips[!, :startday] = Dates.day.(trips[!, :starttime]);
trips[!, :starthour] = Dates.hour.(trips[!, :starttime]);
trips[!, :stopday] = Dates.day.(trips[!, :stoptime]);
trips[!, :stophour] = Dates.hour.(trips[!, :stoptime]);

# create matrix of hourly trips between stations_match
nb_days = maximum(trips[:, :startday]);
trips_matrix = zeros(Int, nb_stations, nb_stations, nb_days*24);
for i in 1:size(trips, 1)
    trips_matrix[stations_match[trips[i, :"start station id"]], stations_match[trips[i, :"end station id"]], (trips[i, :startday]-1)*24+trips[i, :starthour]+1] += 1
end

In [150]:
jldsave("../../data/parameters/global/202210-trips-matrix.jld2", true; trips_matrix)

In [151]:
trips_matrix = load("../../data/parameters/global/202210-trips-matrix.jld2", "trips_matrix");

# Capacity

In [152]:
capacity=select(station_information, [:station_id, :capacity]);
C=capacity.capacity;

In [153]:
# save capacity to CSV
CSV.write("../../data/parameters/global/capacity.csv", C |> Tables.table, header=false)

"../../data/parameters/global/capacity.csv"

# Initial state (midnight)

In [154]:
function create_y0(file,station_information)
    station_status=CSV.read(file, DataFrame);
    station_status_n = select(leftjoin(station_information, station_status, on=:station_id, makeunique=true),["station_id","num_bikes_available"])
    # replace missing values in num_bikes_available with 0
    station_status_n.num_bikes_available = coalesce.(station_status_n.num_bikes_available, 0)
    return station_status_n.num_bikes_available
end

create_y0 (generic function with 1 method)

In [155]:
y0_00=create_y0("../../data/stations/station_status_12062022_0000.csv",station_information);
y0_58=create_y0("../../data/stations/station_status_12052022_0058.csv",station_information);
y0_55=create_y0("../../data/stations/station_status_12042022_2355.csv",station_information);

In [156]:
# save station_status00_n to csv
CSV.write("../../data/parameters/global/y0_00.csv", y0_00 |> Tables.table, header=false)
CSV.write("../../data/parameters/global/y0_58.csv", y0_58 |> Tables.table, header=false)
CSV.write("../../data/parameters/global/y0_55.csv", y0_55 |> Tables.table, header=false)

"../../data/parameters/global/y0_55.csv"

# Demand

In [157]:
function estimate_demand(trips_matrix,capacity)
    # if number of trips is more than 75% of capacity, corresponding number is quadrupled
    nb_stations, _, nb_hours = size(trips_matrix)
    demand = copy(trips_matrix)
    for k in 1:nb_hours

        # outgoing demand
        for i in 1:nb_stations
            if sum(trips_matrix[i, :, k]) > capacity[i]
                total_outgoing = sum(trips_matrix[i, :, k])
                for j in 1:nb_stations
                    demand[i, j, k] += ceil(trips_matrix[i, j, k] - trips_matrix[i, j, k] * capacity[i]/total_outgoing)
                end
            end
        end

        # incoming demand
        for j in 1:nb_stations
            if sum(trips_matrix[:, j, k]) > capacity[j]
                total_incoming = sum(trips_matrix[:, j, k])
                for i in 1:nb_stations
                    demand[i, j, k] += ceil(trips_matrix[i, j, k] - trips_matrix[i, j, k] * capacity[j]/total_incoming)
                end
            end
        end
    end
    return demand
end

estimate_demand (generic function with 1 method)

In [158]:
demand = estimate_demand(trips_matrix,capacity[!, :capacity]);

In [159]:
jldsave("../../data/parameters/global/202210-demand.jld2", true; demand)

In [160]:
demand = load("../../data/parameters/global/202210-demand.jld2", "demand");

In [161]:
# lundi 3 octobre
d1 = demand[:,:,49:72];
# mardi 4 octobre
d2 = demand[:,:,73:96];
# mercredi 5 octobre
d3 = demand[:,:,97:120];

In [162]:
sum(d1)

14314

# Order stations by demand

In [163]:
function sort_stations(demand)
    n_stations, _, nb_hours = size(demand)
    # create empty list of size n_stations_test
    stations_score = zeros(n_stations)
    # for each station i, sum the number of bikes that gets in an out ot the station over the month
    for i in 1:n_stations
        stations_score[i] += sum(demand[i,:,:]) + sum(demand[:,i,:])
    end
    # sort stations by score
    stations_score_sorted = sortperm(stations_score, rev=true);
    return stations_score_sorted
end

create_match (generic function with 1 method)

In [166]:
# invert stations_match
stations_match_inv = Dict() # give the station_id of a line in the station info file
for (k,v) in stations_match
    stations_match_inv[v] = k
end

In [172]:
stations_sorted=sort_stations(d1) # stations lines number sorted
stations_sorted;

In [173]:
stations_id_sorted=zeros(Int,length(stations_sorted))
for i=1:length(stations_sorted)
    stations_id_sorted[i]=stations_match_inv[stations_sorted[i]]
end
stations_id_sorted;

In [174]:
# save stations ordering to CSV for Python maps
CSV.write("../../data/parameters/stations_id_sorted.csv", stations_id_sorted |> Tables.table, header=false)

"../../data/parameters/stations_id_sorted.csv"

In [175]:
# save stations ordering to JLD2 for Julia
jldsave("../../data/parameters/stations_match.jld2", true; stations_match)

# Distances

In [176]:
n_stations = size(station_information, 1)
D = zeros(n_stations, n_stations)

for i in 1:n_stations
    for j in 1:n_stations
        D[i, j] = haversine([station_information[i, :lon], station_information[i, :lat]], [station_information[j, :lon], station_information[j, :lat]])
    end
end
D = D ./ 1000; # convert to km

In [177]:
jldsave("../../data/parameters/global/distances.jld2", true; D)

# Feasibility

In [178]:
threshold=0.7
X = zeros(n_stations, n_stations)
for i=1:n_stations
    for j=1:i-1
        X[i,j]=(D[i,j]<threshold)
    end
    for j=i+1:n_stations
        X[i,j]=(D[i,j]<threshold)
    end
end

In [179]:
jldsave("../../data/parameters/global/X.jld2", true; X)

In [180]:
function make_all_trips_feasible(X)
    X_new=X
    # Make all trips feasible
    for i in 1:size(X_new,1)
        for j in 1:size(X_new,2)
            X_new[i,j] =1
        end
    end
    return X_new
end

make_all_trips_feasible (generic function with 1 method)

In [181]:
X_feas=make_all_trips_feasible(X);

In [182]:
jldsave("../../data/parameters/global/X_feasible.jld2", true; X)

# Problem size reduction and sink

In [186]:
function update_demand(d,d_selected,nb,selected_stations,not_selected_stations)
    d_new = zeros(nb+1,nb+1,24)
    d_new[1:nb,1:nb,:] = d_selected
    for i =1:30
        for t in 1:24
            d_new[nb+1,i,t] = sum(d[k,selected_stations[i],t] for k in not_selected_stations)
            d_new[i,nb+1,t] = sum(d[selected_stations[i],k,t] for k in not_selected_stations)
        end
    end
    for t in 1:24
        d_new[nb+1,nb+1,t] = sum(d[not_selected_stations,not_selected_stations,t])
    end
    return d_new
end

update_demand (generic function with 4 methods)

In [187]:
function update_distances(D_selected,nb)
    D_new = zeros(nb+1,nb+1)
    D_new[1:nb,1:nb] = D_selected
    for i in 1:nb
        D_new[nb+1,i] = 1000 # infinite distance to sink
        D_new[i,nb+1] = 1000
    end
    return D_new
end

update_distances (generic function with 2 methods)

In [188]:
function update_feasibility(X_selected,nb)
    X_new=zeros(nb+1,nb+1)
    X_new[1:nb,1:nb] = X_selected
    for i in 1:nb
        X_new[i,i] = 1
    end
    return X_new
end

update_feasibility (generic function with 2 methods)

In [197]:
function reduce_problem_and_create_sink(d,C,y0,D,X,nb,stations_sorted)
    selected_stations = stations_sorted[1:nb]
    not_selected_stations = stations_sorted[nb+1:end]
    # reorder index of columns and rows according to the sorted stations
    d_sel = d[selected_stations, selected_stations, :];
    C_sel = C[selected_stations,:];
    y0_sel = y0[selected_stations,:];
    D_sel = D[selected_stations, selected_stations];
    X_sel = X[selected_stations, selected_stations];
    # Update : create a sink station 
    # demand matrix update
    d_new=update_demand(d,d_sel,nb,selected_stations,not_selected_stations)
    # capacity matrix update
    C_new = [C_sel; sum(C[k,:] for k in not_selected_stations)]
    # initial state update
    y0_new = [y0_sel; sum(y0[k,:] for k in not_selected_stations)]
    # distance matrix update
    D_new=update_distances(D_sel,nb)
    # feasibility matrix update
    X_new=update_feasibility(X_sel,nb)
    return d_new, C_new, y0_new, D_new, X_new
end

reduce_problem_and_create_sink (generic function with 1 method)

# Inputs construction - Reduced problem

### Keep top 30

Day 1

In [198]:
d_30_1, C_30, y0_00_30, D_30, X_30 = reduce_problem_and_create_sink(d1,C,y0_00,D,X,30,stations_sorted);

Day 2

In [199]:
d_30_2, _, y0_58_30, _, Xfeas_30 = reduce_problem_and_create_sink(d2,C,y0_58,D,X_feas,30,stations_sorted);

Day 3

In [200]:
d_30_3, _, y0_55_30, _, _ = reduce_problem_and_create_sink(d3,C,y0_55,D,X,30,stations_sorted);

In [193]:
jldsave("../../data/parameters/top30/d_1.jld2", true; d_30_1)
jldsave("../../data/parameters/top30/d_2.jld2", true; d_30_2)
jldsave("../../data/parameters/top30/d_3.jld2", true; d_30_3)


In [201]:
CSV.write("../../data/parameters/top30/C.csv", C_30|> Tables.table, header=false)
CSV.write("../../data/parameters/top30/y0_00.csv", y0_00_30|> Tables.table, header=false)
CSV.write("../../data/parameters/top30/y0_55.csv", y0_55_30|> Tables.table, header=false)
CSV.write("../../data/parameters/top30/y0_58.csv", y0_58_30|> Tables.table, header=false)
jldsave("../../data/parameters/top30/X.jld2", true; X_30)
jldsave("../../data/parameters/top30/X_feasible.jld2", true; Xfeas_30)
jldsave("../../data/parameters/top30/D.jld2", true; D_30)

### Keep top 50

Day 1

In [202]:
d_50_1, C_50, y0_00_50, D_50, X_50 = reduce_problem_and_create_sink(d1,C,y0_00,D,X,50,stations_sorted);

Day 2

In [203]:
d_50_2, _, y0_55_50, _, Xfeas_50 = reduce_problem_and_create_sink(d2,C,y0_55,D,X_feas,50,stations_sorted);

Day 3

In [204]:
d_50_3, _, y0_58_50, _, _ = reduce_problem_and_create_sink(d3,C,y0_58,D,X,50,stations_sorted);

In [205]:
jldsave("../../data/parameters/top50/d_1.jld2", true; d_50_1)
jldsave("../../data/parameters/top50/d_2.jld2", true; d_50_2)
jldsave("../../data/parameters/top50/d_3.jld2", true; d_50_3)

In [206]:
CSV.write("../../data/parameters/top50/C.csv", C_50|> Tables.table, header=false)
CSV.write("../../data/parameters/top50/y0_00.csv", y0_00_50|> Tables.table, header=false)
CSV.write("../../data/parameters/top50/y0_55.csv", y0_55_50|> Tables.table, header=false)
CSV.write("../../data/parameters/top50/y0_58.csv", y0_58_50|> Tables.table, header=false)
jldsave("../../data/parameters/top50/X.jld2", true; X_50)
jldsave("../../data/parameters/top50/X_feasible.jld2", true; Xfeas_50)
jldsave("../../data/parameters/top50/D.jld2", true; D_50)