Import packages

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
import io as io

Set file path to Google Drive

In [26]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Read Data

In [27]:
path = "/content/drive/MyDrive/orders.csv"
orders = pd.read_csv(path)

In [28]:
path = "/content/drive/MyDrive/order_stages.csv"
order_stages = pd.read_csv(path)

In [14]:
orders.head(5)

Unnamed: 0,task_id,order_id,customer_id,restaurant_id,city,restaurant_address_lng,restaurant_address_lat,delivery_address_lng,delivery_address_lat,order_state,order_promised_delivery,restaurant_finished_preparation
0,1,1,15116,721,Bucharest,26.074823,44.41509,26.014888,44.428852,delivered,2021-10-15T03:31:11.000Z,2021-10-15T03:08:44.000Z
1,1,2,12572,1445,Bucharest,26.054794,44.454021,26.093656,44.434069,delivered,2021-10-15T03:46:07.000Z,2021-10-15T03:24:22.000Z
2,1,3,34814,891,Bucharest,26.049776,44.42441,26.020117,44.411906,delivered,2021-10-15T04:00:30.000Z,2021-10-15T03:45:39.000Z
3,1,4,17168,368,Bucharest,26.143047,44.430457,26.12689,44.41753,delivered,2021-10-15T04:15:53.000Z,2021-10-15T04:08:40.000Z
4,1,5,35129,721,Bucharest,26.074823,44.41509,26.030227,44.433221,delivered,2021-10-15T04:44:42.000Z,2021-10-15T04:28:17.000Z


In [15]:
order_stages.head(5)

Unnamed: 0,log_id,city,order_id,courier_id,vehicle_type,order_stage,order_stage_start,courier_location_lng_at_start,courier_location_lat_at_start,estimated_travel_time_in_seconds_to_restaurant,estimated_travel_time_in_seconds_to_eater,distance_courier_to_restaurant_address,distance_courier_to_customer_address,updated_expected_delivery_time,task_id
0,1,Bucharest,1,677,motorbike,order_proposed_to_courier,2021-10-15 03:04:02.000,26.082872,44.416126,,,649.953324,,2021-10-15 03:27:30.000,1
1,2,Bucharest,1,677,motorbike,courier_accepts_order,2021-10-15 03:04:12.000,26.082872,44.416126,218.0,,649.953324,,2021-10-15 03:27:10.000,1
2,3,Bucharest,1,677,motorbike,courier_arrived_at_restaurant,2021-10-15 03:08:51.000,26.0746,44.414964,,,22.596096,,2021-10-15 03:26:36.000,1
3,4,Bucharest,1,677,motorbike,courier_picked_up_order,2021-10-15 03:08:54.000,26.074581,44.414966,,756.0,23.668382,4988.52796,2021-10-15 03:24:39.000,1
4,5,Bucharest,2,1970,motorbike,order_proposed_to_courier,2021-10-15 03:24:02.000,26.04089,44.479828,,,3076.13108,,2021-10-15 03:46:36.000,1


In [10]:
print('missing (null) values\n',orders.isnull().mean()*100)

missing (null) values
 task_id                            0.000000
order_id                           0.000000
customer_id                        0.000000
restaurant_id                      0.000000
city                               0.000000
restaurant_address_lng             0.000000
restaurant_address_lat             0.000000
delivery_address_lng               0.000000
delivery_address_lat               0.000000
order_state                        0.000000
order_promised_delivery            0.000000
restaurant_finished_preparation    0.089961
dtype: float64


In [11]:
print('missing (null) values\n',order_stages.isnull().mean()*100)

missing (null) values
 log_id                                             0.000000
city                                               0.000000
order_id                                           0.000000
courier_id                                         0.000000
vehicle_type                                       0.000000
order_stage                                        0.000000
order_stage_start                                  0.000000
courier_location_lng_at_start                      0.020003
courier_location_lat_at_start                      0.020003
estimated_travel_time_in_seconds_to_restaurant    83.879180
estimated_travel_time_in_seconds_to_eater         83.354939
distance_courier_to_restaurant_address            33.322596
distance_courier_to_customer_address              50.040916
updated_expected_delivery_time                    16.828468
task_id                                            0.000000
dtype: float64


In [34]:
orders_unique = orders[['task_id','city','order_state']].drop_duplicates()
orders_unique

Unnamed: 0,task_id,city,order_state
0,1,Bucharest,delivered
77,1,Bucharest,failed


In [35]:
order_stage_unique = order_stages[['order_stage','city','vehicle_type','task_id']].drop_duplicates()
order_stage_unique

Unnamed: 0,order_stage,city,vehicle_type,task_id
0,order_proposed_to_courier,Bucharest,motorbike,1
1,courier_accepts_order,Bucharest,motorbike,1
2,courier_arrived_at_restaurant,Bucharest,motorbike,1
3,courier_picked_up_order,Bucharest,motorbike,1
10,courier_arrived_at_customer,Bucharest,motorbike,1
13,courier_delivered_order,Bucharest,motorbike,1
20,order_proposed_to_courier,Bucharest,car,1
21,courier_accepts_order,Bucharest,car,1
30,courier_arrived_at_restaurant,Bucharest,car,1
36,courier_arrived_at_restaurant,Bucharest,bicycle,1


In [37]:
#count number of total order_id in orders
total_orders = orders['order_id'].count()
total_orders

65584

In [32]:
#count number of unique order_id in order_stages
unique_orders = order_stages['order_id'].nunique()
unique_orders

64205


In [33]:
#count number of unique courier_id in order_stages
unique_couriers = order_stages['courier_id'].nunique()
unique_couriers

3039


In [40]:
#inner join overlapping orders
inner_join_df = pd.merge(order_stages, orders, on='order_id', how='inner')
overlap_innerjoin = inner_join_df['order_id'].nunique()
overlap_innerjoin

64205

In [45]:
#left join order stages to orders
left_join_df = pd.merge(orders, order_stages, on='order_id', how='left')
left_join_df = left_join_df[[col for col in left_join_df.columns if not col.endswith('_y')]]
left_join_df = left_join_df.rename(columns=lambda x: x[:-2] if x.endswith('_x') else x)
left_join_df

Unnamed: 0,task_id,order_id,customer_id,restaurant_id,city,restaurant_address_lng,restaurant_address_lat,delivery_address_lng,delivery_address_lat,order_state,...,vehicle_type,order_stage,order_stage_start,courier_location_lng_at_start,courier_location_lat_at_start,estimated_travel_time_in_seconds_to_restaurant,estimated_travel_time_in_seconds_to_eater,distance_courier_to_restaurant_address,distance_courier_to_customer_address,updated_expected_delivery_time
0,1,1,15116,721,Bucharest,26.074823,44.415090,26.014888,44.428852,delivered,...,motorbike,order_proposed_to_courier,2021-10-15 03:04:02.000,26.082872,44.416126,,,649.953324,,2021-10-15 03:27:30.000
1,1,1,15116,721,Bucharest,26.074823,44.415090,26.014888,44.428852,delivered,...,motorbike,courier_accepts_order,2021-10-15 03:04:12.000,26.082872,44.416126,218.0,,649.953324,,2021-10-15 03:27:10.000
2,1,1,15116,721,Bucharest,26.074823,44.415090,26.014888,44.428852,delivered,...,motorbike,courier_arrived_at_restaurant,2021-10-15 03:08:51.000,26.074600,44.414964,,,22.596096,,2021-10-15 03:26:36.000
3,1,1,15116,721,Bucharest,26.074823,44.415090,26.014888,44.428852,delivered,...,motorbike,courier_picked_up_order,2021-10-15 03:08:54.000,26.074581,44.414966,,756.0,23.668382,4988.527960,2021-10-15 03:24:39.000
4,1,1,15116,721,Bucharest,26.074823,44.415090,26.014888,44.428852,delivered,...,motorbike,courier_arrived_at_customer,2021-10-15 03:38:01.000,26.015199,44.428529,,,,43.611625,2021-10-15 03:38:01.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386311,1,65584,1288,1211,Bucharest,26.139071,44.380931,26.144956,44.370394,delivered,...,car,courier_accepts_order,2021-11-19 03:25:44.000,26.107957,44.393815,485.0,,2859.131456,,2021-11-19 03:46:19.000
386312,1,65584,1288,1211,Bucharest,26.139071,44.380931,26.144956,44.370394,delivered,...,car,courier_arrived_at_restaurant,2021-11-19 03:33:18.000,26.138979,44.380971,,,8.562911,,2021-11-19 03:43:21.000
386313,1,65584,1288,1211,Bucharest,26.139071,44.380931,26.144956,44.370394,delivered,...,car,courier_picked_up_order,2021-11-19 03:37:32.000,26.138885,44.381084,,260.0,22.550264,1283.600670,2021-11-19 03:45:35.000
386314,1,65584,1288,1211,Bucharest,26.139071,44.380931,26.144956,44.370394,delivered,...,car,courier_arrived_at_customer,2021-11-19 03:41:25.000,26.143944,44.370897,,,,98.029569,2021-11-19 03:41:25.000
