In [1]:
import numpy as np
import pandas as pd
from collections import deque
import random

# Softmax by group (each state s in row_idx)
from scipy.special import softmax

from scipy.sparse import coo_matrix, csr_matrix

from scipy.sparse import lil_matrix
from scipy.optimize import minimize
import matplotlib.pyplot as plt
import time

In [95]:

    
def extract_state_action_pairs(outgoing_dict):
    row_idx, col_idx = [], []
    for s, actions in outgoing_dict.items():
        row_idx.extend([s] * len(actions))
        col_idx.extend(actions)
    return np.array(row_idx), np.array(col_idx)


def compute_softmax_policy_by_action_features(action_features, theta, n_links, row_idx, col_idx):


    vals = sum(theta[i] * f[col_idx] for i, f in enumerate(action_features))


    sort_idx = np.argsort(row_idx)
    row_idx_sorted = row_idx[sort_idx]
    col_idx_sorted = col_idx[sort_idx]
    vals_sorted = vals[sort_idx]


    pi_data = np.zeros_like(vals_sorted)
    i = 0
    while i < len(row_idx_sorted):
        s = row_idx_sorted[i]
        j = i
        while j < len(row_idx_sorted) and row_idx_sorted[j] == s:
            j += 1
        scores = vals_sorted[i:j]
        exp_scores = np.exp(scores - np.max(scores))  # for numerical stability
        probs = exp_scores / np.sum(exp_scores)
        pi_data[i:j] = probs
        i = j


    pi_sparse = coo_matrix((pi_data, (row_idx_sorted, col_idx_sorted)), shape=(n_links, n_links)).tocsr()
    return pi_sparse


def sample_policy(state, pi):

    row = pi.getrow(state)
    actions = row.indices       # nonzero action a
    probs = row.data            
    return np.random.choice(actions, p=probs)

def sample_destination(state):
    return np.random.choice(n_links, p=dest[state])

def initialize_fleet():
    fleet = []
    for _ in range(n_vehicles):
        #loc = np.random.choice(n_links)  ### random distribution
        loc = np.random.choice(n_links, p=init_dist_new)  ### observed distribution
        fleet.append({
            'status': 'empty',
            'location': loc,
            'time_left': tau[loc],
            'residual': 0,
            'destination': None,
            'matched': False
        })
    return fleet

def smooth_zero_entries(mat, eps=1e-8):
    mat = mat.copy()
    n, d = mat.shape

    for i in range(n):
        row = mat[i]
        row_sum = np.sum(row)

        if row_sum == 0:
            continue  # skip zero rows

        zero_mask = (row == 0)
        nonzero_mask = (row > 0)

        num_zeros = np.sum(zero_mask)
        if num_zeros == 0:
            continue  # no change needed

        mat[i, zero_mask] = eps

        nonzero_sum = np.sum(row[nonzero_mask])
        adjusted_total = row_sum - num_zeros * eps

        if nonzero_sum > 0:
            mat[i, nonzero_mask] = row[nonzero_mask] * (adjusted_total / nonzero_sum)

    return mat


# Main simulation function
def run_simulation(sim_hours=2):
    fleet = initialize_fleet()
    z_snapshots = [deque(maxlen=snapshot_window) for _ in range(n_links)]
    m_s = np.ones(n_links)
    total_reward = 0
    total_matched = 0

    n_ticks = int(sim_hours * ticks_per_hour)
    #print('n_ticks:',n_ticks)

    for tick in range(n_ticks):
        #print('tick',tick)
        empty_count = np.zeros(n_links)  

        # go over each vehicle and update 
        for car in fleet:
            car['time_left'] -= tick_duration

            # handle overshoot if finish link wihin this tick
            while car['time_left'] <= 0:
                overshoot = -car['time_left']

                if car['status'] == 'empty':
                    s = car['location']
                    # mathced or not
                    if random.random() < m_s[s]:
                        car['status'] = 'occupied'
                        j = sample_destination(s)  
                        car['destination'] = j
                        car['location'] = j
                        car['time_left'] = c_sec[s, j] - overshoot  
                        total_reward += R_matrix[s, j]
                        total_matched += 1
                    else:
                        next_link = sample_policy(s,pi)  ### next link
                        car['location'] = next_link
                        car['time_left'] = tau[next_link] - overshoot
                else:
                    # finish trip, switch to empty
                    car['status'] = 'empty'
                    next_link = sample_policy(car['location'],pi)
                    car['location'] = next_link
                    car['time_left'] = tau[next_link] - overshoot
                    car['destination'] = None

            if car['status'] == 'empty':
                empty_count[car['location']] += 1

        # record the snapshot
        for s in range(n_links):
            z_snapshots[s].append(empty_count[s])

        # update mathcing prob (m_s) every 5 min
        if (tick + 1) % snapshot_window == 0:
            for s in range(n_links):
                z_avg = np.mean(z_snapshots[s]) if z_snapshots[s] else 1.0 
     
                if lambda_vec[s] == 0:
                    m_s[s] = 0.0
                elif z_avg == 0:
                    m_s[s] = 1.0
                else:
                    m_s[s] = 1 - np.exp(-alpha * lambda_vec[s] * tau[s] / 3600 / z_avg)  
                # lambda: per hour per link  tau: second
            #print('m:',m_s)
                

    avg_reward = total_reward / sim_hours / n_vehicles
    return avg_reward, total_matched, z_snapshots


3194

In [129]:
n_vehicles = 5000

alpha =1

unit = 1
c = np.load('shortest_link_to_link_start_of_j_time_hour.npy')
c_sec = c * 3600
c = c * unit
c_time = c ### hour

tau = np.load('tau.npy') ### sec


outgoing_links = pd.read_pickle('outgoing_dict.pkl')
v = np.load('fare_link_to_link_start_of_j.npy')
R_matrix = v


dest = np.load('destination_prob_link_to_link.npy')

n = 29644
n_links = 29644

tick_duration = 20  # seconds
ticks_per_hour = int(3600 / tick_duration)
snapshot_window = 15  # number of ticks in 5 minutes

row_sums = dest.sum(axis=1)
zero_row_mask = (row_sums == 0)

dest[zero_row_mask, :] = 1/n
dest = smooth_zero_entries(dest, eps=1e-8)

lambda_vec = np.load('passenger_arrival_rate_link_730_930_with_zero_new.npy')  ### per link
#lambda_vec_param = np.load('passenger_arrival_rate_link_730_930_with_zero.npy')  ### per km

lambda_vec /= unit

#fare_vec = np.load('f_evaluation_M10000_mid_morning_Optimal.npy')
#cost_vec = np.load('c_evaluation_M10000_mid_morning_Optimal.npy')

fare_vec = np.load('f_evaluation_mid_morning_Competition_Free.npy')
cost_vec = np.load('c_evaluation_mid_morning_Competition_Free.npy')
profit_vec = fare_vec - cost_vec

beta = 30   # unit cost  per hour


features = [profit_vec]

init_dist = np.load('inital_distribution.npy')
init_dist_new = init_dist / np.sum(init_dist)

#init_dist_new = np.ones(n) / n  #### zhuyi
#np.sum(init_dist_new)

zero_row_indices = np.array([   16,   264,  7557, 10264, 10471, 10500, 12531, 16000, 16313,
       18341, 19267, 23546, 23548, 23720, 23844, 23881, 23883, 24477,
       24563, 24644, 24790, 24858, 25343, 25599, 25652, 25672, 25693,
       25701, 25718, 25737, 25739, 25740, 25742, 25769, 25770, 25771,
       25772, 25776, 26677, 26691])

for i in zero_row_indices:
    outgoing_links[i].append(i) 

In [130]:
n_links = len(profit_vec)
row_idx, col_idx = extract_state_action_pairs(outgoing_links)

# compute policy
theta_vec= [0.0]
#theta_vec = [0.038625]
theta_vec = [0.069880]
theta_vec = [0.038397]
theta_vec = [1.12]

pi = compute_softmax_policy_by_action_features(
    features, theta_vec, n_links, row_idx, col_idx
)

In [133]:
avg_P_array = []
matched_array = []
z_array = []
t1 = time.time()
for i in range(10):
    
    avg_R, matched, z = run_simulation(sim_hours=12)
    avg_P = avg_R - beta*2
    #print(f'Hourly profit: {avg_P:.2f}')
    #print(f'Total matching times: {matched}')
    avg_P_array.append(avg_P)
    matched_array.append(matched)
    z_array.append(z)
    t2 = time.time()
    
    print(f'Average profit up to ite {i+1}: {np.mean(avg_P_array)}')



Average profit up to ite 1: 53.844506501134234
Average profit up to ite 2: 52.9401758397897
Average profit up to ite 3: 52.634499673159915
Average profit up to ite 4: 52.40084059054453
Average profit up to ite 5: 52.35619927493731
Average profit up to ite 6: 52.45881214614443
Average profit up to ite 7: 52.51316336296626
Average profit up to ite 8: 52.50989465233484
Average profit up to ite 9: 52.593454543039535
Average profit up to ite 10: 52.64011802328268
