In [1]:
import sys
import pathlib
import os

def find_root(path):
    if os.path.split(path)[-1] != "amazon-routing-challenge":
        return find_root(os.path.split(path)[0])
    return path


ROOT = find_root(pathlib.Path().absolute())
sys.path.append(ROOT)


In [2]:

import json
import pandas as pd
import plotly.graph_objects as go
from dotenv import load_dotenv

_ = load_dotenv()


In [3]:
data_path = os.path.join(
    ROOT,
    "data",
    "almrrc2021-data-training",
    "model_build_inputs",
)


## Open the Package Data

In [4]:
with open(os.path.join(data_path, "package_data.json"), 'r') as f:
    package_data = json.load(f)

## Create the Route DataFrame


In [5]:
with open(os.path.join(data_path, "route_data.json"), "r") as f:
    route_data = json.load(f)


In [6]:
len(route_data)

6112

In [7]:
route_df = pd.DataFrame.from_records(
    (
        {
            "stop_id": k,
            "route_id": route_id,
            "station_code": route_data[route_id]["station_code"],
            "departure_datetime": route_data[route_id]["date_YYYY_MM_DD"] + " " + route_data[route_id]["departure_time_utc"],
            "executor_capacity_cm3": route_data[route_id]["executor_capacity_cm3"],
            "route_score": route_data[route_id]["route_score"],
            **v,
        }
        for route_id in route_data.keys()
        for k, v in route_data[route_id]["stops"].items()
    )
)


## Open the Sequence Data

In [8]:
with open(os.path.join(data_path, "actual_sequences.json"), "r") as f:
    actual_sequences = json.load(f)

In [9]:
# actual_sequences["RouteID_00143bdd-0a6b-49ec-bb35-36593d303e77"]['actual']

## Investigate the Stations


In [10]:
route_df["station_code"].unique()


array(['DLA3', 'DSE4', 'DSE5', 'DLA9', 'DLA7', 'DCH4', 'DBO2', 'DBO3',
       'DLA8', 'DLA5', 'DCH3', 'DCH1', 'DAU1', 'DCH2', 'DLA4', 'DSE2',
       'DBO1'], dtype=object)

## Number of Routes Missing Zone ID


All papers (except 1) solve by replacing missing Zone ID with euclidean distance nearest zone id


In [11]:
bad_routes = route_df.loc[
    ((route_df.zone_id.isna()) & (route_df.type != "Station"))
    | (route_df.lat.isna() | route_df.lng.isna())
    | (route_df.route_score != "High")
].route_id.unique()
len(bad_routes)


4484

In [12]:
good_route_df = route_df.loc[~route_df.route_id.isin(bad_routes)].copy()
len(good_route_df.route_id.unique())

1628

In [13]:
_dup_stops = good_route_df.groupby(["lat", "lng"]).agg({"route_id": "nunique"})
_dup_stops.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,route_id
lat,lng,Unnamed: 2_level_1
30.129045,-97.967559,1
30.129676,-97.96375,1
30.130242,-97.956682,1
30.130253,-97.956702,1
30.131237,-97.970785,1


In [14]:
_dup_stops.loc[(_dup_stops.route_id > 1)]

Unnamed: 0_level_0,Unnamed: 1_level_0,route_id
lat,lng,Unnamed: 2_level_1
30.136697,-97.960858,2
30.152007,-97.950174,2
30.152088,-97.938182,2
30.152178,-97.935603,2
30.152222,-97.943326,2
...,...,...
47.952122,-122.232850,2
47.954786,-122.229638,2
47.956335,-122.233446,2
47.956545,-122.235289,2


### Calculate the Datetime Specifics

In [15]:
good_route_df.departure_datetime = pd.to_datetime(good_route_df.departure_datetime)
# good_route_df.sort_values(by="departure_datetime", inplace=True)
good_route_df.departure_datetime.min(), good_route_df.departure_datetime.max()

(Timestamp('2018-07-19 14:53:32'), Timestamp('2018-08-26 16:14:30'))

### Plotting where the Stations are at


In [16]:
fig = go.Figure()

_filtered_df = good_route_df.loc[good_route_df["type"] == "Station"]


for station_code in _filtered_df["station_code"].unique():
    _df = _filtered_df.loc[_filtered_df["station_code"] == station_code]
    fig.add_trace(
        go.Scattermapbox(
            name=station_code,
            lat=[_df["lat"].mean()],
            lon=[_df["lng"].mean()],
            mode="markers",
            marker=dict(
                size=10,
            ),
            text=_df["station_code"],
            textfont=dict(
                family="sans serif",
                size=22,
            ),
        )
    )

fig.update_layout(
    # autosize=True,
    height=600,
    width=1000,
    hovermode="closest",
    mapbox=go.layout.Mapbox(
        accesstoken=os.environ["MAPBOX_KEY"],
        style="mapbox://styles/max-schrader/ck8t1cmmc02wk1it9rv28iyte",
        # style="mapbox://styles/max-schrader/cl6lhvrfw001516pkh3s6iv7l",
        bearing=0,
        center=go.layout.mapbox.Center(
            lat=_filtered_df["lat"].mean(), lon=_filtered_df["lng"].mean()
        ),
        pitch=0,
        zoom=2,
    ),
    # margin=go.layout.Margin(l=0, r=0, t=0, b=0),
)


### Creating a Route ID to Station ID Dictionary

In [17]:
station_route_id = dict(good_route_df.loc[good_route_df["type"] == "Station"][["route_id", "stop_id"]].values)

## Investigating Duplicate Dropoffs


In [18]:
good_route_df["duplicates"] = good_route_df.duplicated(
    subset=["lat", "lng"], keep=False
) 


In [19]:
good_route_df["duplicates"] = good_route_df.duplicated(
    subset=["lat", "lng"], keep=False
) #& (good_route_df.type != "Station")


lat_lon_dup = good_route_df.loc[good_route_df.duplicates].groupby(["lat", "lng"])['route_id'].unique().reset_index()
# lat_lon_dup.iloc[10].route_id

In [20]:
lat_lon_dup.route_id = lat_lon_dup.route_id.apply(lambda x: tuple(x))

In [21]:
lat_lon_dup

Unnamed: 0,lat,lng,route_id
0,30.136697,-97.960858,"(RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b,..."
1,30.145343,-97.804156,"(RouteID_5d3893c7-c410-486c-8f51-42f70942c376,)"
2,30.152007,-97.950174,"(RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b,..."
3,30.152088,-97.938182,"(RouteID_59f3a83d-63b5-42fe-a18e-9eb145ef1ca1,..."
4,30.152178,-97.935603,"(RouteID_59f3a83d-63b5-42fe-a18e-9eb145ef1ca1,..."
...,...,...,...
16533,47.952122,-122.232850,"(RouteID_09d597c9-ae5c-4fe8-8379-710bf99f3184,..."
16534,47.954786,-122.229638,"(RouteID_09d597c9-ae5c-4fe8-8379-710bf99f3184,..."
16535,47.956335,-122.233446,"(RouteID_09d597c9-ae5c-4fe8-8379-710bf99f3184,..."
16536,47.956545,-122.235289,"(RouteID_09d597c9-ae5c-4fe8-8379-710bf99f3184,..."


In [22]:
lat_lon_map = {}

def create_map(row):
    for route_id in row.route_id:
        if route_id not in lat_lon_map:
            lat_lon_map[route_id] = []
        lat_lon_map[route_id].append(tuple(row[['lat', 'lng']]))

_ = lat_lon_dup.apply(
    create_map, axis=1
)

for route_id in lat_lon_map:
    lat_lon_map[route_id] = list(set(lat_lon_map[route_id]))

### Create a DataFrame with the matched pairs

In [24]:
import itertools

In [29]:
match_pairs = []

# this isn't computationaly efficient, but it's a small enough dataset that doesn't matter
covered_pairs = set()
for r_1 in lat_lon_map:
    # for r_2 in lat_lon_map:
    #     if r_1 != r_2 and (r_1, r_2) not in covered_pairs and (r_2, r_1) not in covered_pairs:
    #         covered_pairs.add((r_1, r_2))
    #         if match_locations := set(lat_lon_map[r_1]).intersection(lat_lon_map[r_2]):
        for match_location in itertools.product(lat_lon_map[r_1], lat_lon_map[r_1]):
            if match_location[0] != match_location[1]:
                # if (r_1, *match_location) not in match_pairs:
                match_pairs.append((r_1, *match_location))
                # if (r_2, *match_location) not in match_pairs:
                #         match_pairs.append((r_2, *match_location))
                    # match_pairs.append((r_1, match_ location))
                    # match_pairs.append((r_2, match_location))
                
    #             # match_pairs.append((r_1, r_2, match_locations))
    #             match_pairs.extend((r_1, r_2, l1, l2) for l1, l2 in itertools.combinations(match_locations, 2)) 
    # lat_lon_map[r_1]

In [82]:
_tmp_df.head()

Unnamed: 0,route_id,from,to
0,RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b,"(30.156748, -97.947387)","(30.154643, -97.935299)"
1,RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b,"(30.156748, -97.947387)","(30.156254, -97.948389)"
2,RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b,"(30.156748, -97.947387)","(30.178509, -97.953482)"
3,RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b,"(30.156748, -97.947387)","(30.152222, -97.943326)"
4,RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b,"(30.156748, -97.947387)","(30.445236, -97.709418)"


In [75]:
_tmp_df = pd.DataFrame(match_pairs, columns=["route_id", "from", "to"])
_loc_pairs = _tmp_df.groupby(["from", "to"]).agg({"route_id": "nunique"}).reset_index()
# keep only the ones that have more than one route
_loc_pairs = _loc_pairs.loc[_loc_pairs.route_id > 1]
_loc_pairs.head()

Unnamed: 0,from,to,route_id
0,"(30.136697, -97.960858)","(30.152007, -97.950174)",2
3,"(30.136697, -97.960858)","(30.152222, -97.943326)",2
8,"(30.136697, -97.960858)","(30.156254, -97.948389)",2
10,"(30.136697, -97.960858)","(30.158755, -97.939515)",2
11,"(30.136697, -97.960858)","(30.160057, -97.94169)",2


In [77]:
okay_tuples = tuple(tuple(v) for v in _loc_pairs[['to', 'from']].itertuples(index=False))

In [83]:
_tmp_df = _tmp_df.loc[_tmp_df[['from', 'to']].apply(tuple, axis=1).isin(okay_tuples)].reset_index(drop=True)

In [None]:
_tmp_df['tt'] = 

In [22]:
match_pairs = []

# this isn't computationaly efficient, but it's a small enough dataset that doesn't matter
covered_pairs = set()
for r_1 in lat_lon_map:
    for r_2 in lat_lon_map:
        if r_1 != r_2 and (r_1, r_2) not in covered_pairs and (r_2, r_1) not in covered_pairs:
            covered_pairs.add((r_1, r_2))
            if match_locations := set(lat_lon_map[r_1]).intersection(lat_lon_map[r_2]):
                match_pairs.extend((r_1, r_2, l1, l2) for l1, l2 in itertools.combinations(match_locations, 2)) 

In [23]:
tuple_index = pd.MultiIndex.from_tuples(match_pairs, names=["route_1", "route_2", "from", "to"])
column_index = pd.MultiIndex.from_product([["r1", "r2"], ["to_id", "from_id", "tt", "is_station", "date"]],)

tt_df = pd.DataFrame(index=tuple_index, columns=column_index)
tt_df.reset_index(inplace=True)
# tt_df

#### Add Information to the DF

In [24]:
# build a map of id -> lat, lon -> stop_id
lat_lon_2_id = {r_id: {
    (stop_info['lat'], stop_info['lng']): stop_id for stop_id, stop_info in r_data['stops'].items()
} for r_id, r_data in route_data.items()}

# build a map of route_id -> depature_datetime
route_id_2_departure_datetime = good_route_df.groupby('route_id').first()['departure_datetime'].to_dict()

In [25]:
# find the matching stop id for each route
def to_stop_id(row, ):
    return lat_lon_2_id[row[0]].get(row[1])


tt_df[('r1', "from_id")] = tt_df[[("route_1", ""), ("from", "")]].apply(to_stop_id, axis=1, raw=True)
tt_df[('r1', "to_id")] =  tt_df[[("route_1", ""), ("to", "")]].apply(to_stop_id, axis=1, raw=True)
tt_df[('r2', "from_id")] =  tt_df[[("route_2", ""), ("from", "")]].apply(to_stop_id, axis=1, raw=True)
tt_df[('r2', "to_id")] =  tt_df[[("route_2", ""), ("to", "")]].apply(to_stop_id, axis=1, raw=True)

#### Apply Travel Time

In [26]:

tt_json = json.loads(open(os.path.join(data_path, "travel_times.json"), "r").read())


In [27]:

# remove ids that are note in route_location_df
keep_routes = set(tt_df[("route_1", "")].unique()) | set(tt_df[("route_2", "")].unique())
tt_json = {k: v for k, v in tt_json.items() if k in keep_routes}

In [28]:
def add_travel_time(row,):
    return tt_json[row[0]][row[2]][row[1]]


tt_df[('r1', 'tt')] = tt_df[[('route_1', ''), ('r1', "to_id"), ('r1', "from_id")]].apply(add_travel_time, axis=1, raw=True).values
tt_df[('r2', 'tt')] = tt_df[[('route_2', ''), ('r2', "to_id"), ('r2', "from_id")]].apply(add_travel_time, axis=1, raw=True).values
# tt_df[('r1', 'tt)]] = tt_df[[('route_1', ''), ('r1', "to_id"), ('r1', "from_id")]].apply(add_travel_time, axis=1, raw=True)

#### Add Start Datetime 

In [29]:
tt_df[('r1', 'date')] = tt_df[('route_1', '')].apply(lambda x: route_id_2_departure_datetime[x])
tt_df[('r2', 'date')] = tt_df[('route_2', '')].apply(lambda x: route_id_2_departure_datetime[x])

#### Add if Station

In [30]:
tt_df[('r1', 'is_station')] = tt_df[[('route_1', ''), ('r1', 'from_id')]].apply(lambda x: x[1] == station_route_id[x[0]], axis=1, raw=True)
tt_df[('r2', 'is_station')] = tt_df[[('route_2', ''), ('r2', 'from_id')]].apply(lambda x: x[1] == station_route_id[x[0]], axis=1, raw=True)

In [31]:
# check to make sure that station is the same for both
sum(tt_df[('r1', 'is_station')] != tt_df[('r2', 'is_station')])

0

In [32]:
tt_df['tt_diff'] = (tt_df[('r1', 'tt')] - tt_df[('r2', 'tt')]).abs()
tt_df.sort_values(('tt_diff', ''), ascending=False)

Unnamed: 0_level_0,route_1,route_2,from,to,r1,r1,r1,r1,r1,r2,r2,r2,r2,r2,tt_diff
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,to_id,from_id,tt,is_station,date,to_id,from_id,tt,is_station,date,Unnamed: 15_level_1
477,RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b,RouteID_80e69c6f-0817-46d9-ae76-926d2afc6fa7,"(30.161271, -97.9378)","(30.185226, -97.948886)",YX,PT,509.0,False,2018-07-21 15:30:00,CY,HZ,56.9,False,2018-08-01 14:12:20,452.1
916,RouteID_59f3a83d-63b5-42fe-a18e-9eb145ef1ca1,RouteID_80e69c6f-0817-46d9-ae76-926d2afc6fa7,"(30.152505, -97.936925)","(30.161271, -97.9378)",BP,VB,286.6,False,2018-07-22 14:32:00,HZ,DC,721.2,False,2018-08-01 14:12:20,434.6
467,RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b,RouteID_80e69c6f-0817-46d9-ae76-926d2afc6fa7,"(30.161199, -97.941299)","(30.161271, -97.9378)",PT,RS,173.1,False,2018-07-21 15:30:00,HZ,YG,545.4,False,2018-08-01 14:12:20,372.3
161684,RouteID_2ddf94c5-f531-48b6-9d65-d8cc514285b2,RouteID_c383fc71-1659-4a1e-8421-c8a440d9f0f3,"(47.687389, -122.292834)","(47.687208, -122.294943)",BN,EG,404.3,False,2018-08-24 15:36:43,IM,LI,65.7,False,2018-08-17 15:30:28,338.6
14815,RouteID_1102698e-232b-46cb-b52f-ec3f97a11dbe,RouteID_5461ac15-4445-49d2-997e-29ceb4d32335,"(33.634959, -117.754555)","(33.629099, -117.755589)",ZO,FZ,148.3,False,2018-07-20 14:32:08,VF,ZV,458.7,False,2018-08-14 15:00:00,310.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61826,RouteID_caad9503-c10a-42c4-b27e-7838c236fafe,RouteID_cbc69890-8d9d-45b2-927a-75ffb82ae25b,"(34.001752, -118.438659)","(34.008297, -118.433829)",BL,ZD,268.9,False,2018-08-02 16:21:35,II,AZ,268.9,False,2018-08-05 15:20:05,0.0
61827,RouteID_caad9503-c10a-42c4-b27e-7838c236fafe,RouteID_cbc69890-8d9d-45b2-927a-75ffb82ae25b,"(34.020099, -118.432625)","(34.011189, -118.437637)",TA,ZA,282.3,False,2018-08-02 16:21:35,QO,JG,282.3,False,2018-08-05 15:20:05,0.0
61828,RouteID_caad9503-c10a-42c4-b27e-7838c236fafe,RouteID_cbc69890-8d9d-45b2-927a-75ffb82ae25b,"(34.020099, -118.432625)","(34.008297, -118.433829)",BL,ZA,351.2,False,2018-08-02 16:21:35,II,JG,351.2,False,2018-08-05 15:20:05,0.0
61829,RouteID_caad9503-c10a-42c4-b27e-7838c236fafe,RouteID_cbc69890-8d9d-45b2-927a-75ffb82ae25b,"(34.011189, -118.437637)","(34.008297, -118.433829)",BL,TA,96.3,False,2018-08-02 16:21:35,II,QO,96.3,False,2018-08-05 15:20:05,0.0


In [33]:
info_df = tt_df.groupby([("from", ""), ("to", "")]).agg({('r1', 'tt'): ('mean',), ('r2', 'tt'): ('mean',), ('tt_diff', ''): 'mean', ('route_1', ''): 'nunique', ('route_2', ''): 'nunique'})
info_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,r1,r2,tt_diff,route_1,route_2
Unnamed: 0_level_1,Unnamed: 1_level_1,tt,tt,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Unnamed: 0_level_2,Unnamed: 1_level_2,mean,mean,mean,nunique,nunique
"(from, )","(to, )",Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
"(30.136697, -97.960858)","(30.152007, -97.950174)",770.3,770.3,0.0,1,1
"(30.136697, -97.960858)","(30.158755, -97.939515)",707.8,707.8,0.0,1,1
"(30.136697, -97.960858)","(30.160057, -97.94169)",721.9,721.9,0.0,1,1
"(30.136697, -97.960858)","(30.160904, -97.939523)",767.9,767.9,0.0,1,1
"(30.136697, -97.960858)","(30.163421, -97.944896)",627.5,627.5,0.0,1,1


In [34]:
len(info_df.loc[info_df[('tt_diff', '', 'mean')] > 0]) / len(info_df)

0.13410526848909204

In [67]:
route_data["RouteID_80e69c6f-0817-46d9-ae76-926d2afc6fa7"]['date_YYYY_MM_DD'], route_data["RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b"]['date_YYYY_MM_DD']

('2018-08-01', '2018-07-21')

In [68]:
package_data["RouteID_80e69c6f-0817-46d9-ae76-926d2afc6fa7"]['HZ'],

# (route_data["RouteID_80e69c6f-0817-46d9-ae76-926d2afc6fa7"]['stops']['HZ'], route_data["RouteID_80e69c6f-0817-46d9-ae76-926d2afc6fa7"]['stops']['CY']), \
#     (route_data["RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b"]['stops']['PT'], route_data["RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b"]['stops']['YX'])

({'PackageID_5f9292f4-3792-4207-9146-691db8270505': {'scan_status': 'DELIVERED',
   'time_window': {'start_time_utc': nan, 'end_time_utc': nan},
   'planned_service_time_seconds': 25.0,
   'dimensions': {'depth_cm': 27.0, 'height_cm': 6.0, 'width_cm': 24.5}},
  'PackageID_e897bba2-6851-490a-9b2b-13f73e1999e3': {'scan_status': 'DELIVERY_ATTEMPTED',
   'time_window': {'start_time_utc': nan, 'end_time_utc': nan},
   'planned_service_time_seconds': 25.0,
   'dimensions': {'depth_cm': 29.0, 'height_cm': 6.6, 'width_cm': 16.3}}},
 {'PackageID_51c115d3-3b0f-4954-b420-166e60db672b': {'scan_status': 'DELIVERED',
   'time_window': {'start_time_utc': nan, 'end_time_utc': nan},
   'planned_service_time_seconds': 45.0,
   'dimensions': {'depth_cm': 33.8, 'height_cm': 9.4, 'width_cm': 18.5}}})

In [42]:
tt_json['RouteID_80e69c6f-0817-46d9-ae76-926d2afc6fa7']['HZ']['CY'], tt_json['RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b']['PT']['YX']

(56.9, 509.0)

In [36]:
import openrouteservice as ors

ors_client = ors.Client(key=os.environ["ORS_KEY"])


def get_ors_travel_time(locations, destination_indexes):
    return ors_client.distance_matrix(
        locations,
        destinations=destination_indexes,
        profile="driving-hgv",
        metrics=["duration", "distance"],
    )


In [37]:
# _r[("to", "")][::-1], _r[("from", "")][::-1]
# https://www.google.com/maps/dir/30.185226,+-97.948886/30.161271,+-97.9378/@30.1844003,-97.954117,3824m/data=!3m1!1e3!4m14!4m13!1m5!1m1!1s0x0:0x58774e0e18241f40!2m2!1d-97.948886!2d30.185226!1m5!1m1!1s0x0:0xc8a5fe56814b73e4!2m2!1d-97.9378!2d30.161271!3e0

In [38]:

def make_fig(trace):

    fig = go.Figure()

    fig.update_layout(
        # autosize=True,
        # showlegend=False,
        height=600,
        width=1000,
        hovermode="closest",
        mapbox=go.layout.Mapbox(
            accesstoken=os.environ["MAPBOX_KEY"],
            # style="mapbox://styles/max-schrader/ck8t1cmmc02wk1it9rv28iyte",
            style="mapbox://styles/max-schrader/cl6lhvrfw001516pkh3s6iv7l",
            bearing=0,
            center=go.layout.mapbox.Center(
                lat=route_df["lat"].mean(), lon=route_df["lng"].mean()
            ),
            pitch=0,
            zoom=5,
        ),
        # margin=go.layout.Margin(l=0, r=0, t=0, b=0),
    )

    fig.add_trace(
        trace
    )

    return fig


for j in range(5):
    i = 0
    _r = tt_df.sort_values(('tt_diff', ''), ascending=False).iloc[j]
    for r_id, r_stops in [[_r.route_1.values[0], (_r[('r1', 'to_id')], _r[('r1', 'from_id')])], [_r.route_2.values[0], (_r[('r2', 'to_id')], _r[('r2', 'from_id')])]]:
        _df = route_df.loc[(route_df.route_id == r_id) & (route_df.stop_id.isin(r_stops))]
        if i == 0:
            res = get_ors_travel_time([_r[("to", "")][::-1], _r[("from", "")][::-1]], [1])
            print("ORS Estimated TT", res['durations'][0])
            print("ORS Estimated Distance", res['distances'][0])
            i += 1
            fig = make_fig(
            go.Scattermapbox(
                name=r_id,
                lat=_df["lat"],
                lon=_df["lng"],
                mode="markers+text",
                marker=dict(
                    size=10,
                ),
                text=r_stops,
                textfont=dict(
                    family="sans serif",
                    size=22,
                    # weight=5,
                    color="white"
                ),
            )
            )
        print(r_id, _df.departure_datetime.unique()[0], tt_json[r_id][r_stops[0]][r_stops[1]], tt_json[r_id][r_stops[1]][r_stops[0]])
    fig.show()

    print("----------------------------------------------------\n\n")



ORS Estimated TT [861.55]
ORS Estimated Distance [4030.56]
RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b 2018-07-21 15:30:00 535.9 509.0
RouteID_80e69c6f-0817-46d9-ae76-926d2afc6fa7 2018-08-01 14:12:20 57.7 56.9


----------------------------------------------------


ORS Estimated TT [418.58]
ORS Estimated Distance [1824.59]
RouteID_59f3a83d-63b5-42fe-a18e-9eb145ef1ca1 2018-07-22 14:32:00 288.0 286.6
RouteID_80e69c6f-0817-46d9-ae76-926d2afc6fa7 2018-08-01 14:12:20 296.3 721.2


----------------------------------------------------


ORS Estimated TT [213.57]
ORS Estimated Distance [889.89]
RouteID_5ab24a12-62bf-4b91-9b49-3be9c3509b7b 2018-07-21 15:30:00 149.3 173.1
RouteID_80e69c6f-0817-46d9-ae76-926d2afc6fa7 2018-08-01 14:12:20 145.4 545.4


----------------------------------------------------


ORS Estimated TT [42.89]
ORS Estimated Distance [178.7]
RouteID_2ddf94c5-f531-48b6-9d65-d8cc514285b2 2018-08-24 15:36:43 409.3 404.3
RouteID_c383fc71-1659-4a1e-8421-c8a440d9f0f3 2018-08-17 15:30:28 52.5 65.7


----------------------------------------------------


ORS Estimated TT [150.05]
ORS Estimated Distance [1863.69]
RouteID_1102698e-232b-46cb-b52f-ec3f97a11dbe 2018-07-20 14:32:08 155.5 148.3
RouteID_5461ac15-4445-49d2-997e-29ceb4d32335 2018-08-14 15:00:00 210.4 458.7


----------------------------------------------------




In [39]:
tt_df.loc[tt_df[('r1', 'is_station')]].groupby([("from", ""), ("to", "")])['tt_diff'].agg(('max', 'count', 'std', 'mean')).sort_values(('count'), ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,max,count,std,mean
"(from, )","(to, )",Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"(47.464945, -122.231073)","(47.517331, -122.367252)",0.0,28,0.000000,0.000000
"(47.937344, -122.244952)","(47.681563, -122.317502)",36.0,28,17.904509,14.142857
"(47.937344, -122.244952)","(47.675995, -122.298414)",2.2,21,1.018402,0.628571
"(47.937344, -122.244952)","(47.671002, -122.292604)",0.0,15,0.000000,0.000000
"(47.464945, -122.231073)","(47.51147, -122.374978)",6.9,15,3.366855,2.300000
...,...,...,...,...,...
"(33.965477, -117.653303)","(33.894791, -117.505634)",0.0,1,,0.000000
"(33.965477, -117.653303)","(33.894764, -117.867973)",0.0,1,,0.000000
"(33.965477, -117.653303)","(33.894702, -117.877765)",0.0,1,,0.000000
"(33.965477, -117.653303)","(33.894697, -117.551099)",2.7,1,,2.700000


In [40]:
tt_df.loc[(tt_df[("from", "")] == (47.464945, -122.231073)) & (tt_df[("to", "")] == (47.517331, -122.367252)), ('r1', 'date')].unique()

array(['2018-08-19T13:39:27.000000000', '2018-08-17T13:37:03.000000000',
       '2018-08-25T14:14:04.000000000', '2018-08-16T13:57:20.000000000',
       '2018-08-09T15:04:55.000000000', '2018-08-12T13:38:17.000000000',
       '2018-08-20T13:53:16.000000000'], dtype='datetime64[ns]')

In [41]:
get_ors_travel_time([(47.517331, -122.367252)[::-1], (47.464945, -122.231073)[::-1]], [1])

{'durations': [[1479.18], [0.0]],
 'distances': [[16799.52], [0.0]],
 'destinations': [{'location': [-122.23167, 47.464705],
   'snapped_distance': 52.22}],
 'sources': [{'location': [-122.367296, 47.517332], 'snapped_distance': 3.29},
  {'location': [-122.23167, 47.464705], 'snapped_distance': 52.22}],
 'metadata': {'attribution': 'openrouteservice.org | OpenStreetMap contributors',
  'service': 'matrix',
  'timestamp': 1660671815050,
  'query': {'locations': [[-122.367252, 47.517331], [-122.231073, 47.464945]],
   'profile': 'driving-hgv',
   'responseType': 'json',
   'metricsStrings': ['DISTANCE', 'DURATION'],
   'destinations': ['1'],
   'metrics': ['duration', 'distance']},
  'engine': {'version': '6.7.0',
   'build_date': '2022-02-18T19:37:41Z',
   'graph_date': '2022-08-01T21:12:42Z'}}}