In [2]:
%load_ext autoreload

In [4]:
import pandas as pd
from pathlib import Path
from typing import List, Dict
import darpinstances.instance
from darpinstances.db import db

16:46:28 [INFO] Connecting to ssh server


Successfully read db config from: ['C:\\Workspaces\\AIC\\Ridesharing_DARP_instances/config.ini']


16:46:29 [INFO] Connected (version 2.0, client OpenSSH_7.9p1)
16:46:29 [INFO] Auth banner: b'Debian GNU/Linux \n _ _          __      _                   _\n(_) |_ ___   / _| ___| |  _____   ___   _| |_   ___ ____\n| | __/ __| | |_ / _ \\ | / __\\ \\ / / | | | __| / __|_  /\n| | |_\\__ \\_|  _|  __/ || (__ \\ V /| |_| | |_ | (__ / /\n|_|\\__|___(_)_|  \\___|_(_)___| \\_/  \\__,_|\\__(_)___/___|\n\nWelcome to our ultra super duper cool server.\n\n'
16:46:29 [INFO] Authentication (publickey) successful!
16:46:29 [INFO] SSH tunnel established from ('127.0.0.1', 1113) to its.fel.cvut.cz/5432
16:46:29 [INFO] Starting _psycopg2 connection
16:46:31 [INFO] Starting sql_alchemy connection


In [8]:
# darp_dir_path = Path(r"C:/Google Drive/AIC Experiment Data/DARP")
itsc_dir_path = Path(r"C:/Google Drive/AIC Experiment Data/DARP/ITSC_instance_paper/Instances")
# itsc_dir_path = Path(r"D:/Google Drive/AIC Experiment Data/DARP/ITSC_instance_paper/Instances")

area_dirs  = [
    'NYC',
    'Manhattan',
    'Chicago',
    'DC'
]

In [30]:
def get_data_for_dir(area_dir: str) -> Dict:
    area_dir_path = itsc_dir_path / Path(area_dir)
    map_dir = area_dir_path / Path('map')
    node_path = map_dir / Path('nodes.csv')
    edge_path = map_dir / Path('edges.csv')

    nodes = pd.read_csv(node_path)
    edges = pd.read_csv(edge_path)

    area_data = {
        'area': area_dir,
        'node_count': len(nodes),
        'edge_count': len(edges)
    }


    # request count
    config = darpinstances.instance.load_instance_config(str(next(area_dir_path.rglob('config.yaml'))))
    demand_sets = config['demand']['dataset']
    area_id = config['area_id']
    # SELECT count(1) FROM demand
    #     WHERE
    #         dataset IN ({','.join((str(s) for s in demand_sets))})
    #         AND origin_time BETWEEN '2022-04-05 18:00:00' AND '2022-04-05 18:59:59'
    sql = f"""
    SELECT count(1) FROM demand
    JOIN trip_locations
        on demand.id = trip_locations.request_id
        AND dataset IN ({','.join((str(s) for s in demand_sets))})
        AND origin_time BETWEEN '2022-04-05 18:00:00' AND '2022-04-05 18:59:59'
    JOIN nodes origin_nodes
        ON origin_nodes.id = trip_locations.origin
    JOIN areas
        ON areas.id = {area_id}
        AND st_within(origin_nodes.geom, areas.geom)
    """
    req_count = db.execute_count_query(sql)
    area_data['Requests per hour'] = req_count

    # area
    sql = f"""
    SELECT st_area(st_transform(geom, 32618)) AS area
        FROM areas
        WHERE id = {area_id}
    """
    area = db.execute_count_query(sql)
    area_data['Area [km2]'] = area / 1000000

    return area_data

In [33]:
area_data_list = []
for area_dir in area_dirs:
    area_data = get_data_for_dir(area_dir)
    area_data_list.append(area_data)
area_data = pd.DataFrame(area_data_list)
area_data.rename(
    columns={
        'area': 'Area',
        'node_count': 'Node Count',
        'edge_count': 'Edge Count'
    },
    inplace=True
)
area_data

17:09:07 [INFO] Loading instance config from C:\Google Drive\AIC Experiment Data\DARP\ITSC_instance_paper\Instances\NYC\instances\start_18-00\duration_005_min\max_delay_03_min\config.yaml
17:09:07 [INFO] Loading instance config from C:\Google Drive\AIC Experiment Data\DARP\ITSC_instance_paper\Instances\Manhattan\instances\start_18-00\duration_005_min\max_delay_03_min\config.yaml
17:09:08 [INFO] Loading instance config from C:\Google Drive\AIC Experiment Data\DARP\ITSC_instance_paper\Instances\Chicago\instances\start_18-00\duration_005_min\max_delay_03_min\config.yaml
17:09:08 [INFO] Loading instance config from C:\Google Drive\AIC Experiment Data\DARP\ITSC_instance_paper\Instances\DC\instances\start_18-00\duration_005_min\max_delay_03_min\config.yaml


Unnamed: 0,Area,Node Count,Edge Count,Requests per hour,Area [km2]
0,NYC,113411,281278,40294,1508.106916
1,Manhattan,6382,13455,23310,87.344297
2,Chicago,152653,413830,1138,1004.421323
3,DC,33230,84788,660,181.038363


In [34]:
area_data['Req./hour and km2'] = area_data['Requests per hour'] / area_data['Area [km2]']
area_data

Unnamed: 0,Area,Node Count,Edge Count,Requests per hour,Area [km2],Req./hour and km2
0,NYC,113411,281278,40294,1508.106916,26.718265
1,Manhattan,6382,13455,23310,87.344297,266.874894
2,Chicago,152653,413830,1138,1004.421323,1.132991
3,DC,33230,84788,660,181.038363,3.645636


In [39]:
s = area_data.style.format(escape='latex', precision=0)
s = s.hide(axis='index')
s = s.hide(['Requests per hour'], axis='columns')
print(s.to_latex())

\begin{tabular}{lrrrr}
Area & Node Count & Edge Count & Area [km2] & Req./hour and km2 \\
NYC & 113411 & 281278 & 1508 & 27 \\
Manhattan & 6382 & 13455 & 87 & 267 \\
Chicago & 152653 & 413830 & 1004 & 1 \\
DC & 33230 & 84788 & 181 & 4 \\
\end{tabular}

