In [1]:
import os
import gc
import warnings
import numpy as np
import pandas as pd
from pandarallel import pandarallel
from itertools import product
from tqdm import tqdm

from simulation.wrapper import wrapper
from simulation.dataloader.load_snapshots import get_snapshots
from simulation.solver.matching import (
    greedy_sequential_matching,
    composite_sequential_matching,
    k_hungarian_matching,
    k_hungarian_capacity_exhaust,
    k_hungarian_m_capacity_exhaust
)
# from simulation.solver.matching.random_process import k_hungarian_capacity_exhaust_random, k_hungarian_m_capacity_exhaust_random

In [2]:
np.random.seed(123)
warnings.filterwarnings('ignore')
pandarallel.initialize(progress_bar=False, nb_workers=9)

INFO: Pandarallel will run on 9 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [3]:
ms = list(range(1, 11))
cs = list(range(1, 11))
ps = list(range(1, 6))

drs = [15.0]
fracs = [0.0, 0.2, 0.4, 0.6, 0.8, 0.95]
funcs = [
    greedy_sequential_matching,
    composite_sequential_matching,
    k_hungarian_matching,
    k_hungarian_capacity_exhaust,
    k_hungarian_m_capacity_exhaust
]
comb = list(product(ms, cs, drs, fracs, ps, funcs))
columns = ['m', 'capacity', 'dr', 'dropout', 'n_possible', 'func']
comb_df = pd.DataFrame([{k: v for k, v in zip(columns, x)} for x in comb])

In [4]:
len(comb_df)

15000

In [5]:
types = ['riyadh_10min', 'riyadh_5min', 'jeddah_5min', 'jeddah_10min']
        

for CITY in types:
    DIR = f'data/snapshots_{CITY}'
    filenames = [x for x in os.listdir(DIR) if '.pq' in x]
    
    results = []

    for snapshot in tqdm(filenames, f'Processing {CITY} ...'):
        data = pd.read_parquet(os.path.join(DIR, snapshot))
        matrix = pd.pivot_table(data, values='distance', index='userid', columns='driver_id').values
        matrix[np.isnan(matrix)] = 9999.0
        
        results += comb_df.parallel_apply(
            lambda row: wrapper(
                snapshot=snapshot,
                matrix=matrix,
                m=int(row['m']),
                capacity=int(row['capacity']),
                n_possible=int(row['n_possible']),
                dr=row['dr'],
                dropout=row['dropout'],
                matching_fn=row['func']
            ),
            axis=1
        ).tolist()

        del data
        del matrix
        gc.collect()

    df = pd.DataFrame.from_dict(results)
    df['datetime'] = pd.to_datetime(df['datetime'])
    df['city'] = CITY.split('_')[0]
    df['horizon'] = CITY.split('_')[1]
    df = df.sort_values(by=['datetime', 'dr', 'm', 'capacity'])
    df.to_parquet(f'sim_res/simulation_results_{CITY}_v4.0.pq')

Processing riyadh_10min ...: 100%|██████████| 100/100 [1:00:52<00:00, 36.52s/it]
Processing riyadh_5min ...: 100%|█████████████| 100/100 [27:52<00:00, 16.73s/it]
Processing jeddah_5min ...: 100%|█████████████| 100/100 [13:28<00:00,  8.08s/it]
Processing jeddah_10min ...: 100%|████████████| 100/100 [23:15<00:00, 13.96s/it]


In [6]:
df.sample(10)

Unnamed: 0,datetime,num_clients,num_captains,matching_fn,m,capacity,n_possible,dr,driver_frac,num_clients_with_no_reach,num_captains_with_no_requests,mean_distance_to_client,num_clients_with_no_asks,num_clients_with_no_handshake_options,city,horizon
835904,2023-02-25 14:55:00,245,81,k_hungarian_m_capacity_exhaust,8,3,1,15.0,0.2,102,0,1.523335,182,182,jeddah,10min
295928,2023-02-25 04:40:00,90,7,k_hungarian_capacity_exhaust,8,3,1,15.0,0.05,72,0,3.930195,84,84,jeddah,10min
566034,2023-02-25 05:28:00,136,61,k_hungarian_m_capacity_exhaust,8,4,2,15.0,0.4,30,2,2.413547,64,85,jeddah,10min
390660,2023-02-25 22:56:00,59,157,greedy_sequential_matching,1,5,3,15.0,0.6,0,112,0.859597,2,14,jeddah,10min
491924,2023-02-25 02:56:00,35,78,k_hungarian_m_capacity_exhaust,8,10,5,15.0,0.6,0,8,3.7971,0,0,jeddah,10min
1107469,2023-02-25 11:21:00,201,208,k_hungarian_m_capacity_exhaust,9,4,4,15.0,1.0,0,2,4.491908,0,17,jeddah,10min
1185437,2023-02-25 03:41:00,64,8,k_hungarian_matching,1,3,3,15.0,0.05,56,0,2.032179,56,56,jeddah,10min
88070,2023-02-25 05:37:00,129,141,greedy_sequential_matching,9,8,5,15.0,1.0,17,10,4.359957,24,37,jeddah,10min
237052,2023-02-25 22:34:00,67,175,k_hungarian_matching,9,1,1,15.0,0.6,0,6,3.870899,0,0,jeddah,10min
710663,2023-02-25 14:49:00,206,80,k_hungarian_capacity_exhaust,4,8,3,15.0,0.2,28,1,2.361224,83,134,jeddah,10min
