In [1]:
#Import libraries
from pyhive import presto
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import h3
from datetime import datetime
from functools import reduce
import json
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 500)

In [2]:
#Define presto credentials
presto_host = 'presto-gateway.serving.data.plectrum.dev'
presto_port = '80'
username = 'aditya.bhattar@rapido.bike'

#Create connection to presto host
connection = presto.connect(presto_host,presto_port,username = username)

In [3]:
#Load the hexes in zone and holdout
hexes = pd.read_excel('auto_segments_bangalore.xlsx', sheet_name = 'hsr_upper')
hexes = hexes.rename(columns = {'uid':'pickup_location_hex_8', 'segment':'Segment'})
hyderabad_new_zone = hexes['pickup_location_hex_8'].unique().tolist()
hexes.head()

Unnamed: 0,pickup_location_hex_8,Segment,Label
0,88618925c3fffff,S1,Zone
1,88618925cbfffff,S1,Zone
2,88618924bdfffff,S1,Holdout
3,88618925c7fffff,S1,Zone
4,88618925c1fffff,S1,Zone


In [4]:
print(hyderabad_new_zone)

['88618925c3fffff', '88618925cbfffff', '88618924bdfffff', '88618925c7fffff', '88618925c1fffff', '8861892513fffff', '8860145a0dfffff', '886014586dfffff', '8861892cc9fffff', '8861892ec9fffff', '8861892c19fffff', '886189263dfffff', '8861892cddfffff', '8860145ac9fffff', '88618920adfffff', '8860145841fffff', '8861892c1dfffff', '8860145a25fffff', '88618925ddfffff', '8860145b15fffff', '8861892631fffff', '8861892c55fffff', '8861892607fffff', '88601451e5fffff', '88601459a9fffff', '8860145933fffff', '8861892623fffff', '886014586bfffff', '886014c939fffff', '8861892615fffff', '8861892cd9fffff', '886189268dfffff', '8860145137fffff', '8860145149fffff', '8860145833fffff', '8860145ad3fffff', '8861892693fffff', '8860145927fffff', '8861893483fffff', '88601451adfffff', '88601696e3fffff', '8861892f05fffff', '88618925e9fffff', '8861892561fffff', '8860145323fffff', '88618925e1fffff', '8861892cd3fffff', '88618921d7fffff', '88601458c9fffff', '8861892469fffff', '8861892519fffff', '8861892e15fffff', '8861892c81

In [5]:
#Query data from order_logs_snapshot

q = """
select order_id, accept_to_pickup_distance, city_name, hhmmss, hour, service_obj_service_name, time_bucket, weekday, yyyymmdd,
order_status, cancel_reason, eta, map_riders, pickup_location_hex_8
from orders.order_logs_snapshot
where yyyymmdd >= '20230420'
and yyyymmdd <= '20230501'
and city_name in ('Bangalore')
and service_obj_service_name = 'Auto'
and pickup_location_hex_8 in ('88618925c3fffff', '88618925cbfffff', '88618924bdfffff', '88618925c7fffff', '88618925c1fffff', '8861892513fffff', '8860145a0dfffff',
 '886014586dfffff', '8861892cc9fffff', '8861892ec9fffff', '8861892c19fffff', '886189263dfffff', '8861892cddfffff', '8860145ac9fffff', '88618920adfffff',
   '8860145841fffff', '8861892c1dfffff', '8860145a25fffff', '88618925ddfffff', '8860145b15fffff', '8861892631fffff', '8861892c55fffff', '8861892607fffff',
     '88601451e5fffff', '88601459a9fffff', '8860145933fffff', '8861892623fffff', '886014586bfffff', '886014c939fffff', '8861892615fffff', '8861892cd9fffff',
       '886189268dfffff', '8860145137fffff', '8860145149fffff', '8860145833fffff', '8860145ad3fffff', '8861892693fffff', '8860145927fffff', '8861893483fffff',
         '88601451adfffff', '88601696e3fffff', '8861892f05fffff', '88618925e9fffff', '8861892561fffff', '8860145323fffff', '88618925e1fffff', '8861892cd3fffff',
           '88618921d7fffff', '88601458c9fffff', '8861892469fffff', '8861892519fffff', '8861892e15fffff', '8861892c81fffff', '88618924d3fffff', '8860145911fffff',
             '8861892e3bfffff', '88618925e3fffff', '886189241dfffff', '8861892687fffff', '8861892517fffff', '88618920d1fffff', '8861892511fffff', '88618925edfffff',
               '8861892f51fffff', '88618924d1fffff', '8861892017fffff', '886189275dfffff', '8861892cdbfffff', '8861892f55fffff', '8861892711fffff',
                 '8861892c23fffff', '8861892703fffff', '88618920c7fffff', '8860169651fffff', '8860144b69fffff', '8861892abbfffff', '8861892f5bfffff',
                   '886014583bfffff', '8860169741fffff', '88618926e3fffff', '8861892011fffff', '886189359dfffff', '8861892035fffff', '8861892d85fffff',
                     '8861892515fffff', '8861892507fffff', '886189250bfffff', '8861892c27fffff', '8860145ab1fffff', '88618926d5fffff', '8861892113fffff',
                       '886016975bfffff', '8861892695fffff', '8861892ab3fffff', '886016906bfffff', '8860145ab7fffff', '88618925cdfffff', '8861892599fffff',
                         '886189251bfffff', '886189258bfffff', '8861892581fffff', '88618924c5fffff', '88618925c5fffff', '8861892587fffff', '8861892557fffff',
                           '88618925d1fffff', '8860145a45fffff', '8861892589fffff', '88618925d7fffff', '8861892427fffff', '88618925d3fffff', '8861892663fffff',
                             '88618925ebfffff', '8861892487fffff', '88618925d5fffff', '886189258dfffff', '88618925c9fffff', '8861892425fffff', '886189259dfffff',
                               '8861892493fffff', '8861892435fffff', '886016966bfffff', '8860145ac5fffff', '8860145901fffff', '88618935bbfffff', '8860145ad1fffff',
                                 '8860145b25fffff', '8861892ddbfffff', '886189266bfffff', '8861892db9fffff', '8861892a9bfffff', '8860145825fffff', '8860145a03fffff',
                                   '8860145ae5fffff', '8860145325fffff', '88601696d5fffff', '886014c935fffff', '8860145349fffff', '886016968dfffff',
                                     '8860145331fffff', '8860169685fffff', '8861892ecbfffff', '8861892e9bfffff', '886189256dfffff', '88618920a5fffff',
                                       '8861892ed7fffff', '88618925b3fffff', '8861892c9dfffff', '8861892e55fffff', '8861892585fffff', '8861892533fffff',
                                         '8861892583fffff', '8861892c95fffff', '8861892095fffff', '886014594dfffff', '88618926c5fffff', '886014c903fffff',
                                           '8861892085fffff', '8860169649fffff', '886016965dfffff', '88618921c9fffff', '88618920c1fffff', '8861892709fffff',
                                             '8861892187fffff', '88618921a1fffff', '88618922a7fffff', '886189251dfffff', '8861892c21fffff', '886189229dfffff',
                                               '8860144b51fffff', '886014c9adfffff', '8860144a65fffff', '8861892287fffff')
and hour in ('17', '18', '19', '20', '21')
"""

#Load data into pandas table
df_copy = pd.read_sql(q, connection)

In [6]:
#Create a copy of the df retrived from presto
df = df_copy.copy()
df = df[df['pickup_location_hex_8'].isin(hyderabad_new_zone)]
df.head()

Unnamed: 0,order_id,accept_to_pickup_distance,city_name,hhmmss,hour,service_obj_service_name,time_bucket,weekday,yyyymmdd,order_status,cancel_reason,eta,map_riders,pickup_location_hex_8
0,6447be25013d8c668b830e2e,1.516,Bangalore,171853,17,Auto,17:00,2,20230425,dropped,,2.0,"[""63a03a98d0fa7118c29e4357"",""64291d8dd47342944...",88618924c5fffff
1,6447bf4dfc8ef91704ed2cbc,,Bangalore,172349,17,Auto,17:00,2,20230425,expired,,2.0,"[""6329bd0be21bd914ffcd2ac9"",""60fc2464fe2f6088a...",88618920a5fffff
2,6447bfc41ce20052b0d347ac,1.565,Bangalore,172548,17,Auto,17:00,2,20230425,dropped,,7.0,"[""61c01ae1f3de6fbf5f51b699""]",8861892709fffff
3,6447bfda8dece675cd1646e7,,Bangalore,172610,17,Auto,17:00,2,20230425,expired,,3.0,"[""5c1cab053f491d0a6e601ee8"",""6102a5626cd492a28...",88618925c5fffff
4,6447c03cdf41c0543f1c6b85,,Bangalore,172748,17,Auto,17:00,2,20230425,customerCancelled,order cancelled before rider accepted,5.0,"[""6113a7778dddcbaaa36c49c0"",""62ce79e47fc185b34...",886189258bfffff


In [7]:
#View the dataset
df_captain = df.copy()
df_captain = df_captain[df_captain['pickup_location_hex_8'].isin(hyderabad_new_zone)]
df_captain.head()

Unnamed: 0,order_id,accept_to_pickup_distance,city_name,hhmmss,hour,service_obj_service_name,time_bucket,weekday,yyyymmdd,order_status,cancel_reason,eta,map_riders,pickup_location_hex_8
0,6447be25013d8c668b830e2e,1.516,Bangalore,171853,17,Auto,17:00,2,20230425,dropped,,2.0,"[""63a03a98d0fa7118c29e4357"",""64291d8dd47342944...",88618924c5fffff
1,6447bf4dfc8ef91704ed2cbc,,Bangalore,172349,17,Auto,17:00,2,20230425,expired,,2.0,"[""6329bd0be21bd914ffcd2ac9"",""60fc2464fe2f6088a...",88618920a5fffff
2,6447bfc41ce20052b0d347ac,1.565,Bangalore,172548,17,Auto,17:00,2,20230425,dropped,,7.0,"[""61c01ae1f3de6fbf5f51b699""]",8861892709fffff
3,6447bfda8dece675cd1646e7,,Bangalore,172610,17,Auto,17:00,2,20230425,expired,,3.0,"[""5c1cab053f491d0a6e601ee8"",""6102a5626cd492a28...",88618925c5fffff
4,6447c03cdf41c0543f1c6b85,,Bangalore,172748,17,Auto,17:00,2,20230425,customerCancelled,order cancelled before rider accepted,5.0,"[""6113a7778dddcbaaa36c49c0"",""62ce79e47fc185b34...",886189258bfffff


In [8]:
#Filter only on the hexes needed
df_filter_level1 = df
df_filter_level1['count'] = 1
df_filter_level1['date'] = df_filter_level1['yyyymmdd'].astype(str).apply(lambda x: x[6:])

In [9]:
#Create a new column for cancel_reason to identify OCARA
df_filter_level1['new_cancel_reason'] = np.where(df_filter_level1['order_status'] == 'customerCancelled',
 np.where(df_filter_level1['cancel_reason'] == 'order cancelled before rider accepted', 'COBRA',
 np.where(df_filter_level1['cancel_reason'] == 'Order cancelled before rider was mapped', 'COBRM', 'OCARA')), 'no_cancellation')

In [10]:
#Check if the order is expiry_mapped or expiry_unmapped
df_filter_level1['expiry_type'] = np.where(df_filter_level1['order_status'] == 'expired',
 np.where(df_filter_level1['map_riders'] == '[]', 'expiry_unmapped', 'expiry_mapped'), 'no_expiry')
df_filter_level1['final_order_status'] = np.where(df_filter_level1['order_status'] == 'customerCancelled', df_filter_level1['new_cancel_reason'],
np.where(df_filter_level1['order_status'] == 'expired', df_filter_level1['expiry_type'], df_filter_level1['order_status']))

In [11]:
#Add segment and label to the datasets
df_filter_level1 = df_filter_level1.merge(hexes, on = 'pickup_location_hex_8', how = 'left')
df_captain = df_captain.merge(hexes, on = 'pickup_location_hex_8', how = 'left')
df_filter_level1.head()

Unnamed: 0,order_id,accept_to_pickup_distance,city_name,hhmmss,hour,service_obj_service_name,time_bucket,weekday,yyyymmdd,order_status,cancel_reason,eta,map_riders,pickup_location_hex_8,count,date,new_cancel_reason,expiry_type,final_order_status,Segment,Label
0,6447be25013d8c668b830e2e,1.516,Bangalore,171853,17,Auto,17:00,2,20230425,dropped,,2.0,"[""63a03a98d0fa7118c29e4357"",""64291d8dd47342944...",88618924c5fffff,1,25,no_cancellation,no_expiry,dropped,S3,Holdout
1,6447bf4dfc8ef91704ed2cbc,,Bangalore,172349,17,Auto,17:00,2,20230425,expired,,2.0,"[""6329bd0be21bd914ffcd2ac9"",""60fc2464fe2f6088a...",88618920a5fffff,1,25,no_cancellation,expiry_mapped,expiry_mapped,S4,Holdout
2,6447bfc41ce20052b0d347ac,1.565,Bangalore,172548,17,Auto,17:00,2,20230425,dropped,,7.0,"[""61c01ae1f3de6fbf5f51b699""]",8861892709fffff,1,25,no_cancellation,no_expiry,dropped,S4,Holdout
3,6447bfda8dece675cd1646e7,,Bangalore,172610,17,Auto,17:00,2,20230425,expired,,3.0,"[""5c1cab053f491d0a6e601ee8"",""6102a5626cd492a28...",88618925c5fffff,1,25,no_cancellation,expiry_mapped,expiry_mapped,S3,Zone
4,6447c03cdf41c0543f1c6b85,,Bangalore,172748,17,Auto,17:00,2,20230425,customerCancelled,order cancelled before rider accepted,5.0,"[""6113a7778dddcbaaa36c49c0"",""62ce79e47fc185b34...",886189258bfffff,1,25,COBRA,no_expiry,COBRA,S3,Zone


In [12]:
#Metrics_pivot
metrics_pivot = pd.pivot_table(df_filter_level1, index = ['yyyymmdd', 'hour', 'Segment', 'Label'], columns = 'final_order_status',
 values = 'count', aggfunc = 'sum', fill_value=0).reset_index().rename_axis(None, axis=1)
metrics_pivot['total'] = metrics_pivot[['COBRA', 'COBRM', 'OCARA', 'aborted', 'dropped', 'expiry_mapped', 'expiry_unmapped']].sum(axis = 1)
metrics_pivot['OCARA%'] = round(metrics_pivot['OCARA']/metrics_pivot['total'], 4)
metrics_pivot['COBRA%'] = round(metrics_pivot['COBRA']/metrics_pivot['total'], 4)
metrics_pivot['COBRM%'] = round(metrics_pivot['COBRM']/metrics_pivot['total'], 4)
metrics_pivot['G2N'] = round(metrics_pivot['dropped']/metrics_pivot['total'], 4)
metrics_pivot['expiry_mapped%'] = round(metrics_pivot['expiry_mapped']/metrics_pivot['total'], 4)
if 'expiry_unmapped' in metrics_pivot.columns:
    metrics_pivot['Stockout%'] = round(metrics_pivot['expiry_unmapped']/metrics_pivot['total'], 4)
else:
    metrics_pivot['Stockout%'] = 0
metrics_df = metrics_pivot[['yyyymmdd', 'hour', 'Segment', 'Label', 'OCARA%', 'COBRA%', 'COBRM%', 'G2N', 'expiry_mapped%', 'Stockout%', 'total']]
metrics_df.head()

Unnamed: 0,yyyymmdd,hour,Segment,Label,OCARA%,COBRA%,COBRM%,G2N,expiry_mapped%,Stockout%,total
0,20230420,17,S1,Holdout,0.1141,0.2886,0.0034,0.4262,0.151,0.0134,298
1,20230420,17,S1,Zone,0.0861,0.3795,0.0,0.2971,0.2372,0.0,801
2,20230420,17,S2,Holdout,0.105,0.3094,0.0,0.3149,0.2597,0.011,181
3,20230420,17,S2,Zone,0.0966,0.4034,0.0,0.2159,0.2841,0.0,176
4,20230420,17,S3,Holdout,0.1258,0.2742,0.0,0.3888,0.2045,0.0045,445


In [13]:
#Get the average ETA and FM
fm_eta = df_filter_level1.groupby(['yyyymmdd', 'hour', 'Segment', 'Label'])[['accept_to_pickup_distance', 'eta']].median().reset_index()
fm_eta = fm_eta.rename(columns = {'accept_to_pickup_distance':'FM'})
metrics_df = metrics_df.merge(fm_eta, on = ['yyyymmdd', 'hour', 'Segment', 'Label'], how = 'left')
metrics_df.head()

Unnamed: 0,yyyymmdd,hour,Segment,Label,OCARA%,COBRA%,COBRM%,G2N,expiry_mapped%,Stockout%,total,FM,eta
0,20230420,17,S1,Holdout,0.1141,0.2886,0.0034,0.4262,0.151,0.0134,298,0.6955,2.0
1,20230420,17,S1,Zone,0.0861,0.3795,0.0,0.2971,0.2372,0.0,801,0.783,3.0
2,20230420,17,S2,Holdout,0.105,0.3094,0.0,0.3149,0.2597,0.011,181,0.856,2.0
3,20230420,17,S2,Zone,0.0966,0.4034,0.0,0.2159,0.2841,0.0,176,0.833,2.0
4,20230420,17,S3,Holdout,0.1258,0.2742,0.0,0.3888,0.2045,0.0045,445,0.872,3.0


In [14]:
metrics_df.to_csv('fm_upper.csv', index = False)

In [47]:
#Get the number of captains by cluster
#Make columns for second and minute and date
df_captain['second'] = df_captain['hhmmss'].astype(str).apply(lambda x: x[4:])
df_captain['minute'] = df_captain['hhmmss'].astype(str).apply(lambda x: x[2:4])
df_captain['date'] = df_captain['yyyymmdd'].astype(str).apply(lambda x: x[6:])
df_captain['map_riders'] = df_captain['map_riders'].apply(lambda x: json.loads(x))
df_captain = df_captain[df_captain['map_riders'].apply(lambda x: len(x)) > 0]
df_captain['15_sec_batch'] = pd.cut(df_captain['second'].astype(int), bins = [-1, 15, 30, 45, 61], labels = ['0-15', '15-30', '30-45', '45-60'])

#Get the riders mapped in 20_sec_windows
df_captains_20_sec = df_captain.groupby(['yyyymmdd', 'hour', 'Segment', 'Label', 'minute', '15_sec_batch'])['map_riders'].apply(list).reset_index()

#Get the number of captains in a single list
map_riders = []
df_captains_20_sec = df_captains_20_sec.dropna()
for x in df_captains_20_sec['map_riders'].values:
    riders = [item for sublist in x for item in sublist]
    map_riders.append(riders)

unique_captains = []
for x in map_riders:
    unique_captains_list = []
    for y in x:
        if y in unique_captains_list:
            continue
        else:
            unique_captains_list.append(y)
    unique_captains.append(unique_captains_list)

df_captains_20_sec['total_captains'] = unique_captains

#Get the number of captains in each batch
df_captains_20_sec['number_of_captains'] = df_captains_20_sec['total_captains'].apply(lambda x: len(x))

#Get the average number of captains coming in each 20_second_window
df_20_sec_average = df_captains_20_sec.groupby(['yyyymmdd', 'hour', 'Segment', 'Label'])['number_of_captains'].median().reset_index()

#Merge the hourly captains and 20_sec_captains with the metrics table
metrics_df = metrics_df.merge(df_20_sec_average, on = ['yyyymmdd', 'hour', 'Segment', 'Label'], how = 'left')
metrics_df.head()

Unnamed: 0,yyyymmdd,hour,Segment,Label,OCARA%,COBRA%,COBRM%,G2N,expiry_mapped%,Stockout%,total,FM,eta,number_of_captains
0,20230420,17,S1,Holdout,0.1141,0.2886,0.0034,0.4262,0.151,0.0134,298,0.813923,2.912752,43.0
1,20230420,17,S1,Zone,0.0861,0.3795,0.0,0.2971,0.2372,0.0,801,11.541598,2.700375,84.0
2,20230420,17,S2,Holdout,0.105,0.3094,0.0,0.3149,0.2597,0.011,181,23.619686,2.729282,20.0
3,20230420,17,S2,Zone,0.0966,0.4034,0.0,0.2159,0.2841,0.0,176,0.85497,2.823864,24.0
4,20230420,17,S3,Holdout,0.1258,0.2742,0.0,0.3888,0.2045,0.0045,445,0.993022,3.125843,47.0


In [17]:
#Query data from order_logs_immutable for calculating TTC

q = """
select order_id, accept_to_pickup_distance, captain_location_hex_8, captain_location_latitude,
captain_location_longitude, city_name, customer_location_hex_8, drop_location_hex_8, drop_location_latitude, drop_location_longitude, customer_location_latitude, 
customer_location_longitude, hhmmss, hour, quarter_hour, service_obj_service_name, time_bucket, weekday, yyyymmdd,
order_status, cancel_reason, updated_epoch, event_type, pickup_location_hex_8
from orders.order_logs_immutable
where yyyymmdd >= '20230420'
and yyyymmdd <= '20230501'
and city_name in ('Bangalore')
and service_obj_service_name = 'Auto'
and event_type in ('customer_cancelled', 'order_requested', 'accepted')
and pickup_location_hex_8 in ('88618925c3fffff', '88618925cbfffff', '88618924bdfffff', '88618925c7fffff', '88618925c1fffff', '8861892513fffff', '8860145a0dfffff',
 '886014586dfffff', '8861892cc9fffff', '8861892ec9fffff', '8861892c19fffff', '886189263dfffff', '8861892cddfffff', '8860145ac9fffff', '88618920adfffff',
   '8860145841fffff', '8861892c1dfffff', '8860145a25fffff', '88618925ddfffff', '8860145b15fffff', '8861892631fffff', '8861892c55fffff', '8861892607fffff',
     '88601451e5fffff', '88601459a9fffff', '8860145933fffff', '8861892623fffff', '886014586bfffff', '886014c939fffff', '8861892615fffff', '8861892cd9fffff',
       '886189268dfffff', '8860145137fffff', '8860145149fffff', '8860145833fffff', '8860145ad3fffff', '8861892693fffff', '8860145927fffff', '8861893483fffff',
         '88601451adfffff', '88601696e3fffff', '8861892f05fffff', '88618925e9fffff', '8861892561fffff', '8860145323fffff', '88618925e1fffff', '8861892cd3fffff',
           '88618921d7fffff', '88601458c9fffff', '8861892469fffff', '8861892519fffff', '8861892e15fffff', '8861892c81fffff', '88618924d3fffff', '8860145911fffff',
             '8861892e3bfffff', '88618925e3fffff', '886189241dfffff', '8861892687fffff', '8861892517fffff', '88618920d1fffff', '8861892511fffff', '88618925edfffff',
               '8861892f51fffff', '88618924d1fffff', '8861892017fffff', '886189275dfffff', '8861892cdbfffff', '8861892f55fffff', '8861892711fffff',
                 '8861892c23fffff', '8861892703fffff', '88618920c7fffff', '8860169651fffff', '8860144b69fffff', '8861892abbfffff', '8861892f5bfffff',
                   '886014583bfffff', '8860169741fffff', '88618926e3fffff', '8861892011fffff', '886189359dfffff', '8861892035fffff', '8861892d85fffff',
                     '8861892515fffff', '8861892507fffff', '886189250bfffff', '8861892c27fffff', '8860145ab1fffff', '88618926d5fffff', '8861892113fffff',
                       '886016975bfffff', '8861892695fffff', '8861892ab3fffff', '886016906bfffff', '8860145ab7fffff', '88618925cdfffff', '8861892599fffff',
                         '886189251bfffff', '886189258bfffff', '8861892581fffff', '88618924c5fffff', '88618925c5fffff', '8861892587fffff', '8861892557fffff',
                           '88618925d1fffff', '8860145a45fffff', '8861892589fffff', '88618925d7fffff', '8861892427fffff', '88618925d3fffff', '8861892663fffff',
                             '88618925ebfffff', '8861892487fffff', '88618925d5fffff', '886189258dfffff', '88618925c9fffff', '8861892425fffff', '886189259dfffff',
                               '8861892493fffff', '8861892435fffff', '886016966bfffff', '8860145ac5fffff', '8860145901fffff', '88618935bbfffff', '8860145ad1fffff',
                                 '8860145b25fffff', '8861892ddbfffff', '886189266bfffff', '8861892db9fffff', '8861892a9bfffff', '8860145825fffff', '8860145a03fffff',
                                   '8860145ae5fffff', '8860145325fffff', '88601696d5fffff', '886014c935fffff', '8860145349fffff', '886016968dfffff',
                                     '8860145331fffff', '8860169685fffff', '8861892ecbfffff', '8861892e9bfffff', '886189256dfffff', '88618920a5fffff',
                                       '8861892ed7fffff', '88618925b3fffff', '8861892c9dfffff', '8861892e55fffff', '8861892585fffff', '8861892533fffff',
                                         '8861892583fffff', '8861892c95fffff', '8861892095fffff', '886014594dfffff', '88618926c5fffff', '886014c903fffff',
                                           '8861892085fffff', '8860169649fffff', '886016965dfffff', '88618921c9fffff', '88618920c1fffff', '8861892709fffff',
                                             '8861892187fffff', '88618921a1fffff', '88618922a7fffff', '886189251dfffff', '8861892c21fffff', '886189229dfffff',
                                               '8860144b51fffff', '886014c9adfffff', '8860144a65fffff', '8861892287fffff')
and hour in ('17', '18', '19', '20', '21')
"""

#Load data into pandas table
df_ttc_copy = pd.read_sql(q, connection)

In [48]:
#Load and view dataset
df_ttc = df_ttc_copy.copy()
df_ttc = df_ttc[df_ttc['pickup_location_hex_8'].isin(hyderabad_new_zone)]
df_ttc['timestamp'] = df_ttc['updated_epoch'].apply(lambda x: datetime.fromtimestamp(x/1000))
df_ttc = df_ttc.sort_values(by = 'timestamp')
df_ttc.head()

Unnamed: 0,order_id,accept_to_pickup_distance,captain_location_hex_8,captain_location_latitude,captain_location_longitude,city_name,customer_location_hex_8,drop_location_hex_8,drop_location_latitude,drop_location_longitude,customer_location_latitude,customer_location_longitude,hhmmss,hour,quarter_hour,service_obj_service_name,time_bucket,weekday,yyyymmdd,order_status,cancel_reason,updated_epoch,event_type,pickup_location_hex_8,timestamp
591412,644122381c6d14472f498524,,,,,Bangalore,88618925d3fffff,8861892589fffff,12.930696,77.603659,12.91821,77.591995,170000,17,1700,Auto,17:00,4,20230420,requested,,1681990200725,order_requested,88618925d3fffff,2023-04-20 17:00:00.725
600080,6441223c246f7347ed4ee994,,,,,Bangalore,886189251bfffff,8861892591fffff,12.929675,77.580175,12.925279,77.633805,170004,17,1700,Auto,17:00,4,20230420,requested,,1681990204112,order_requested,886189251bfffff,2023-04-20 17:00:04.112
620343,6441223c9e36a256b02295d1,,,,,Bangalore,8861892583fffff,886189251bfffff,12.921909,77.636881,12.938639,77.595291,170004,17,1700,Auto,17:00,4,20230420,requested,,1681990204254,order_requested,8861892583fffff,2023-04-20 17:00:04.254
600057,6441223d67e55167dd9ebca6,,,,,Bangalore,88618925c5fffff,8861892e9dfffff,12.9858,77.622551,12.935035,77.623222,170005,17,1700,Auto,17:00,4,20230420,requested,,1681990205546,order_requested,88618925c5fffff,2023-04-20 17:00:05.546
536958,6441223f2937c94eff8091aa,,,,,Bangalore,8861892589fffff,88618925d5fffff,12.92674,77.605897,12.932643,77.610463,170007,17,1700,Auto,17:00,4,20230420,requested,,1681990207217,order_requested,8861892589fffff,2023-04-20 17:00:07.217


In [49]:
#Create a column to identify OCARA, COBRA and COBRM
df_ttc['new_cancel_reason'] = np.where(df_ttc['event_type'] == 'customer_cancelled',
 np.where(df_ttc['cancel_reason'] == 'order cancelled before rider accepted', 'COBRA',
 np.where(df_ttc['cancel_reason'] == 'Order cancelled before rider was mapped', 'COBRM', 'OCARA')), 'no_cancellation')
df_ttc.head()

Unnamed: 0,order_id,accept_to_pickup_distance,captain_location_hex_8,captain_location_latitude,captain_location_longitude,city_name,customer_location_hex_8,drop_location_hex_8,drop_location_latitude,drop_location_longitude,customer_location_latitude,customer_location_longitude,hhmmss,hour,quarter_hour,service_obj_service_name,time_bucket,weekday,yyyymmdd,order_status,cancel_reason,updated_epoch,event_type,pickup_location_hex_8,timestamp,new_cancel_reason
591412,644122381c6d14472f498524,,,,,Bangalore,88618925d3fffff,8861892589fffff,12.930696,77.603659,12.91821,77.591995,170000,17,1700,Auto,17:00,4,20230420,requested,,1681990200725,order_requested,88618925d3fffff,2023-04-20 17:00:00.725,no_cancellation
600080,6441223c246f7347ed4ee994,,,,,Bangalore,886189251bfffff,8861892591fffff,12.929675,77.580175,12.925279,77.633805,170004,17,1700,Auto,17:00,4,20230420,requested,,1681990204112,order_requested,886189251bfffff,2023-04-20 17:00:04.112,no_cancellation
620343,6441223c9e36a256b02295d1,,,,,Bangalore,8861892583fffff,886189251bfffff,12.921909,77.636881,12.938639,77.595291,170004,17,1700,Auto,17:00,4,20230420,requested,,1681990204254,order_requested,8861892583fffff,2023-04-20 17:00:04.254,no_cancellation
600057,6441223d67e55167dd9ebca6,,,,,Bangalore,88618925c5fffff,8861892e9dfffff,12.9858,77.622551,12.935035,77.623222,170005,17,1700,Auto,17:00,4,20230420,requested,,1681990205546,order_requested,88618925c5fffff,2023-04-20 17:00:05.546,no_cancellation
536958,6441223f2937c94eff8091aa,,,,,Bangalore,8861892589fffff,88618925d5fffff,12.92674,77.605897,12.932643,77.610463,170007,17,1700,Auto,17:00,4,20230420,requested,,1681990207217,order_requested,8861892589fffff,2023-04-20 17:00:07.217,no_cancellation


In [50]:
#Filter the df only zone1
df_ttc_filter = df_ttc
df_ttc_filter = df_ttc_filter.merge(hexes, on = 'pickup_location_hex_8', how = 'left')
df_ttc_filter.head()

Unnamed: 0,order_id,accept_to_pickup_distance,captain_location_hex_8,captain_location_latitude,captain_location_longitude,city_name,customer_location_hex_8,drop_location_hex_8,drop_location_latitude,drop_location_longitude,customer_location_latitude,customer_location_longitude,hhmmss,hour,quarter_hour,service_obj_service_name,time_bucket,weekday,yyyymmdd,order_status,cancel_reason,updated_epoch,event_type,pickup_location_hex_8,timestamp,new_cancel_reason,Segment,Label
0,644122381c6d14472f498524,,,,,Bangalore,88618925d3fffff,8861892589fffff,12.930696,77.603659,12.91821,77.591995,170000,17,1700,Auto,17:00,4,20230420,requested,,1681990200725,order_requested,88618925d3fffff,2023-04-20 17:00:00.725,no_cancellation,S3,Zone
1,6441223c246f7347ed4ee994,,,,,Bangalore,886189251bfffff,8861892591fffff,12.929675,77.580175,12.925279,77.633805,170004,17,1700,Auto,17:00,4,20230420,requested,,1681990204112,order_requested,886189251bfffff,2023-04-20 17:00:04.112,no_cancellation,S3,Zone
2,6441223c9e36a256b02295d1,,,,,Bangalore,8861892583fffff,886189251bfffff,12.921909,77.636881,12.938639,77.595291,170004,17,1700,Auto,17:00,4,20230420,requested,,1681990204254,order_requested,8861892583fffff,2023-04-20 17:00:04.254,no_cancellation,S4,Zone
3,6441223d67e55167dd9ebca6,,,,,Bangalore,88618925c5fffff,8861892e9dfffff,12.9858,77.622551,12.935035,77.623222,170005,17,1700,Auto,17:00,4,20230420,requested,,1681990205546,order_requested,88618925c5fffff,2023-04-20 17:00:05.546,no_cancellation,S3,Zone
4,6441223f2937c94eff8091aa,,,,,Bangalore,8861892589fffff,88618925d5fffff,12.92674,77.605897,12.932643,77.610463,170007,17,1700,Auto,17:00,4,20230420,requested,,1681990207217,order_requested,8861892589fffff,2023-04-20 17:00:07.217,no_cancellation,S3,Zone


In [51]:
#Filter df on the columns needed
df_filter = df_ttc_filter[['order_id', 'pickup_location_hex_8', 'hour', 'yyyymmdd', 'timestamp', 'event_type', 'new_cancel_reason', 'Segment', 'Label']]

#Filter on the respective order_status
df_cobra = df_filter[df_filter['new_cancel_reason'] == 'COBRA']
df_ocara = df_filter[df_filter['new_cancel_reason'] == 'OCARA']
df_cobrm = df_filter[df_filter['new_cancel_reason'] == 'COBRM']
df_order_created = df_filter[df_filter['event_type'] == 'order_requested']
df_accepted = df_filter[df_filter['event_type'] == 'accepted']

#Sort the dfs by timestamp
df_cobra = df_cobra.sort_values(by = 'timestamp', ascending=True)
df_cobra = df_cobra.drop_duplicates(subset='order_id')

df_ocara = df_ocara.sort_values(by = 'timestamp', ascending=True)
df_ocara = df_ocara.drop_duplicates(subset='order_id')

df_cobrm = df_cobrm.sort_values(by = 'timestamp', ascending=True)
df_cobrm = df_cobrm.drop_duplicates(subset='order_id')

df_accepted = df_accepted.sort_values(by = 'timestamp', ascending=True)
df_accepted = df_accepted.drop_duplicates(subset='order_id')

#Change the column names
df_cobra = df_cobra[['order_id', 'new_cancel_reason', 'timestamp']]
df_cobra.columns = ['order_id', 'COBRA', 'timestamp_cobra']

df_cobrm = df_cobrm[['order_id', 'new_cancel_reason', 'timestamp']]
df_cobrm.columns = ['order_id', 'COBRM', 'timestamp_cobrm']

df_ocara = df_ocara[['order_id', 'new_cancel_reason', 'timestamp']]
df_ocara.columns = ['order_id', 'OCARA', 'timestamp_ocara']

df_accepted = df_accepted[['order_id', 'event_type', 'timestamp']]
df_accepted.columns = ['order_id', 'Accepted', 'timestamp_accepted']

#Merge the dfs together
dfs_to_merge = [df_order_created, df_cobra, df_cobrm, df_ocara, df_accepted]
df_final = reduce(lambda  left, right: pd.merge(left, right,on=['order_id'], how='left'), dfs_to_merge).fillna('NA')
df_final.head()

Unnamed: 0,order_id,pickup_location_hex_8,hour,yyyymmdd,timestamp,event_type,new_cancel_reason,Segment,Label,COBRA,timestamp_cobra,COBRM,timestamp_cobrm,OCARA,timestamp_ocara,Accepted,timestamp_accepted
0,644122381c6d14472f498524,88618925d3fffff,17,20230420,2023-04-20 17:00:00.725,order_requested,no_cancellation,S3,Zone,,,,,,,,
1,6441223c246f7347ed4ee994,886189251bfffff,17,20230420,2023-04-20 17:00:04.112,order_requested,no_cancellation,S3,Zone,,,,,,,accepted,2023-04-20 17:00:08.748000
2,6441223c9e36a256b02295d1,8861892583fffff,17,20230420,2023-04-20 17:00:04.254,order_requested,no_cancellation,S4,Zone,,,,,,,,
3,6441223d67e55167dd9ebca6,88618925c5fffff,17,20230420,2023-04-20 17:00:05.546,order_requested,no_cancellation,S3,Zone,,,,,OCARA,2023-04-20 17:07:00.372000,accepted,2023-04-20 17:02:31.398000
4,6441223f2937c94eff8091aa,8861892589fffff,17,20230420,2023-04-20 17:00:07.217,order_requested,no_cancellation,S3,Zone,,,,,,,,


In [52]:
#Filter on the respective order_status
df_cobra = df_final[df_final['COBRA'] == 'COBRA']
df_ocara = df_final[df_final['OCARA'] == 'OCARA']
df_cobrm = df_final[df_final['COBRM'] == 'COBRM']
df_accepted = df_final[df_final['Accepted'] == 'accepted']

#Calculate TTC for cobrm, ocara, and cobra and tta
df_cobra['ttc_cobra'] = pd.to_datetime(df_cobra['timestamp_cobra']) - pd.to_datetime(df_cobra['timestamp'])
df_cobra['ttc_cobra'] = df_cobra['ttc_cobra'].apply(lambda x: x.total_seconds())

df_cobrm['ttc_cobrm'] = pd.to_datetime(df_cobrm['timestamp_cobrm']) - pd.to_datetime(df_cobrm['timestamp'])
df_cobrm['ttc_cobrm'] = df_cobrm['ttc_cobrm'].apply(lambda x: x.total_seconds())

df_ocara['ttc_ocara'] = pd.to_datetime(df_ocara['timestamp_ocara']) - pd.to_datetime(df_ocara['timestamp'])
df_ocara['ttc_ocara'] = df_ocara['ttc_ocara'].apply(lambda x: x.total_seconds())

df_accepted['tta'] = pd.to_datetime(df_accepted['timestamp_accepted']) - pd.to_datetime(df_accepted['timestamp'])
df_accepted['tta'] = df_accepted['tta'].apply(lambda x: x.total_seconds())

#Filter only on the new columns made
df_cobra = df_cobra[['order_id', 'ttc_cobra']]
df_cobrm = df_cobrm[['order_id', 'ttc_cobrm']]
df_ocara = df_ocara[['order_id', 'ttc_ocara']]
df_accepted = df_accepted[['order_id', 'tta']]

#Merge the dfs together
dfs_to_merge = [df_final, df_cobra, df_cobrm, df_ocara, df_accepted]
df_final = reduce(lambda  left, right: pd.merge(left, right,on=['order_id'], how='left'), dfs_to_merge)
df_final.head()

Unnamed: 0,order_id,pickup_location_hex_8,hour,yyyymmdd,timestamp,event_type,new_cancel_reason,Segment,Label,COBRA,timestamp_cobra,COBRM,timestamp_cobrm,OCARA,timestamp_ocara,Accepted,timestamp_accepted,ttc_cobra,ttc_cobrm,ttc_ocara,tta
0,644122381c6d14472f498524,88618925d3fffff,17,20230420,2023-04-20 17:00:00.725,order_requested,no_cancellation,S3,Zone,,,,,,,,,,,,
1,6441223c246f7347ed4ee994,886189251bfffff,17,20230420,2023-04-20 17:00:04.112,order_requested,no_cancellation,S3,Zone,,,,,,,accepted,2023-04-20 17:00:08.748000,,,,4.636
2,6441223c9e36a256b02295d1,8861892583fffff,17,20230420,2023-04-20 17:00:04.254,order_requested,no_cancellation,S4,Zone,,,,,,,,,,,,
3,6441223d67e55167dd9ebca6,88618925c5fffff,17,20230420,2023-04-20 17:00:05.546,order_requested,no_cancellation,S3,Zone,,,,,OCARA,2023-04-20 17:07:00.372000,accepted,2023-04-20 17:02:31.398000,,,414.826,145.852
4,6441223f2937c94eff8091aa,8861892589fffff,17,20230420,2023-04-20 17:00:07.217,order_requested,no_cancellation,S3,Zone,,,,,,,,,,,,


In [53]:
#Create a date column
df_final['date'] = df_final['yyyymmdd'].astype(str).apply(lambda x: x[6:])
df_final['count'] = 1

#Get average ttc, tta
df_cobra_grouped = df_final[df_final['COBRA'] == 'COBRA'].groupby(['yyyymmdd', 'hour', 'Segment', 'Label'])['ttc_cobra'].median().reset_index()
df_ocara_grouped = df_final[df_final['OCARA'] == 'OCARA'].groupby(['yyyymmdd', 'hour', 'Segment', 'Label'])['ttc_ocara'].median().reset_index()
df_cobrm_grouped = df_final[df_final['COBRM'] == 'COBRM'].groupby(['yyyymmdd', 'hour', 'Segment', 'Label'])['ttc_cobrm'].median().reset_index()
df_accepted_grouped = df_final[df_final['Accepted'] == 'accepted'].groupby(['yyyymmdd', 'hour', 'Segment', 'Label'])['tta'].median().reset_index()

#Merge the dfs together
dfs_to_merge = [df_cobra_grouped, df_cobrm_grouped, df_ocara_grouped, df_accepted_grouped]
df_metrics_final = reduce(lambda  left, right: pd.merge(left, right,on=['yyyymmdd', 'hour', 'Segment', 'Label'], how='left'), dfs_to_merge).fillna('NA')
df_metrics_final.head()

Unnamed: 0,yyyymmdd,hour,Segment,Label,ttc_cobra,ttc_cobrm,ttc_ocara,tta
0,20230420,17,S1,Holdout,83.2675,156.331,340.537,10.347
1,20230420,17,S1,Zone,92.433,,253.351,24.178
2,20230420,17,S2,Holdout,80.849,,317.3615,9.887
3,20230420,17,S2,Zone,113.851,,210.576,15.913
4,20230420,17,S3,Holdout,111.92,,198.8575,11.1755


In [54]:
#Merge ttc with metrics_df
metrics_df = metrics_df.merge(df_metrics_final, on = ['yyyymmdd', 'hour', 'Segment', 'Label'], how = 'left')
metrics_df.head()

Unnamed: 0,yyyymmdd,hour,Segment,Label,OCARA%,COBRA%,COBRM%,G2N,expiry_mapped%,Stockout%,total,FM,eta,number_of_captains,ttc_cobra,ttc_cobrm,ttc_ocara,tta
0,20230420,17,S1,Holdout,0.1141,0.2886,0.0034,0.4262,0.151,0.0134,298,0.813923,2.912752,43.0,83.2675,156.331,340.537,10.347
1,20230420,17,S1,Zone,0.0861,0.3795,0.0,0.2971,0.2372,0.0,801,11.541598,2.700375,84.0,92.433,,253.351,24.178
2,20230420,17,S2,Holdout,0.105,0.3094,0.0,0.3149,0.2597,0.011,181,23.619686,2.729282,20.0,80.849,,317.3615,9.887
3,20230420,17,S2,Zone,0.0966,0.4034,0.0,0.2159,0.2841,0.0,176,0.85497,2.823864,24.0,113.851,,210.576,15.913
4,20230420,17,S3,Holdout,0.1258,0.2742,0.0,0.3888,0.2045,0.0045,445,0.993022,3.125843,47.0,111.92,,198.8575,11.1755


In [25]:
#Query data from order_logs_snapshot

q = """
select order_id, accept_to_pickup_distance, captain_location_hex_8, captain_location_latitude,
captain_location_longitude, city_name, customer_location_hex_8, drop_location_hex_8, drop_location_latitude, drop_location_longitude, customer_location_latitude, 
customer_location_longitude, hhmmss, hour, quarter_hour, service_obj_service_name, time_bucket, weekday, yyyymmdd,
order_status, cancel_reason, updated_epoch, event_type, captain_id, spd_fraud_flag, pickup_location_hex_8
from orders.order_logs_immutable
where yyyymmdd >= '20230420'
and yyyymmdd <= '20230501'
and service_obj_service_name = 'Auto'
and event_type in ('accepted', 'rider_busy', 'rider_reject', 'dropped')
and pickup_location_hex_8 in ('88618925c3fffff', '88618925cbfffff', '88618924bdfffff', '88618925c7fffff', '88618925c1fffff', '8861892513fffff', '8860145a0dfffff',
 '886014586dfffff', '8861892cc9fffff', '8861892ec9fffff', '8861892c19fffff', '886189263dfffff', '8861892cddfffff', '8860145ac9fffff', '88618920adfffff',
   '8860145841fffff', '8861892c1dfffff', '8860145a25fffff', '88618925ddfffff', '8860145b15fffff', '8861892631fffff', '8861892c55fffff', '8861892607fffff',
     '88601451e5fffff', '88601459a9fffff', '8860145933fffff', '8861892623fffff', '886014586bfffff', '886014c939fffff', '8861892615fffff', '8861892cd9fffff',
       '886189268dfffff', '8860145137fffff', '8860145149fffff', '8860145833fffff', '8860145ad3fffff', '8861892693fffff', '8860145927fffff', '8861893483fffff',
         '88601451adfffff', '88601696e3fffff', '8861892f05fffff', '88618925e9fffff', '8861892561fffff', '8860145323fffff', '88618925e1fffff', '8861892cd3fffff',
           '88618921d7fffff', '88601458c9fffff', '8861892469fffff', '8861892519fffff', '8861892e15fffff', '8861892c81fffff', '88618924d3fffff', '8860145911fffff',
             '8861892e3bfffff', '88618925e3fffff', '886189241dfffff', '8861892687fffff', '8861892517fffff', '88618920d1fffff', '8861892511fffff', '88618925edfffff',
               '8861892f51fffff', '88618924d1fffff', '8861892017fffff', '886189275dfffff', '8861892cdbfffff', '8861892f55fffff', '8861892711fffff',
                 '8861892c23fffff', '8861892703fffff', '88618920c7fffff', '8860169651fffff', '8860144b69fffff', '8861892abbfffff', '8861892f5bfffff',
                   '886014583bfffff', '8860169741fffff', '88618926e3fffff', '8861892011fffff', '886189359dfffff', '8861892035fffff', '8861892d85fffff',
                     '8861892515fffff', '8861892507fffff', '886189250bfffff', '8861892c27fffff', '8860145ab1fffff', '88618926d5fffff', '8861892113fffff',
                       '886016975bfffff', '8861892695fffff', '8861892ab3fffff', '886016906bfffff', '8860145ab7fffff', '88618925cdfffff', '8861892599fffff',
                         '886189251bfffff', '886189258bfffff', '8861892581fffff', '88618924c5fffff', '88618925c5fffff', '8861892587fffff', '8861892557fffff',
                           '88618925d1fffff', '8860145a45fffff', '8861892589fffff', '88618925d7fffff', '8861892427fffff', '88618925d3fffff', '8861892663fffff',
                             '88618925ebfffff', '8861892487fffff', '88618925d5fffff', '886189258dfffff', '88618925c9fffff', '8861892425fffff', '886189259dfffff',
                               '8861892493fffff', '8861892435fffff', '886016966bfffff', '8860145ac5fffff', '8860145901fffff', '88618935bbfffff', '8860145ad1fffff',
                                 '8860145b25fffff', '8861892ddbfffff', '886189266bfffff', '8861892db9fffff', '8861892a9bfffff', '8860145825fffff', '8860145a03fffff',
                                   '8860145ae5fffff', '8860145325fffff', '88601696d5fffff', '886014c935fffff', '8860145349fffff', '886016968dfffff',
                                     '8860145331fffff', '8860169685fffff', '8861892ecbfffff', '8861892e9bfffff', '886189256dfffff', '88618920a5fffff',
                                       '8861892ed7fffff', '88618925b3fffff', '8861892c9dfffff', '8861892e55fffff', '8861892585fffff', '8861892533fffff',
                                         '8861892583fffff', '8861892c95fffff', '8861892095fffff', '886014594dfffff', '88618926c5fffff', '886014c903fffff',
                                           '8861892085fffff', '8860169649fffff', '886016965dfffff', '88618921c9fffff', '88618920c1fffff', '8861892709fffff',
                                             '8861892187fffff', '88618921a1fffff', '88618922a7fffff', '886189251dfffff', '8861892c21fffff', '886189229dfffff',
                                               '8860144b51fffff', '886014c9adfffff', '8860144a65fffff', '8861892287fffff')
and hour in ('17', '18', '19', '20', '21')
"""

#Load data into pandas table
df_apr_copy = pd.read_sql(q, connection)

In [55]:
#Load and view dataset
df_apr = df_apr_copy.copy()
df_apr = df_apr[df_apr['pickup_location_hex_8'].isin(hyderabad_new_zone)]
df_apr = df_apr.merge(hexes, on = 'pickup_location_hex_8', how = 'left')
df_apr.head()

Unnamed: 0,order_id,accept_to_pickup_distance,captain_location_hex_8,captain_location_latitude,captain_location_longitude,city_name,customer_location_hex_8,drop_location_hex_8,drop_location_latitude,drop_location_longitude,customer_location_latitude,customer_location_longitude,hhmmss,hour,quarter_hour,service_obj_service_name,time_bucket,weekday,yyyymmdd,order_status,cancel_reason,updated_epoch,event_type,captain_id,spd_fraud_flag,pickup_location_hex_8,Segment,Label
0,6446a9b103d5a46895f10acb,,8861892581fffff,12.940999,77.602692,Bangalore,886189258dfffff,88618925c7fffff,12.934619,77.616392,12.940338,77.608971,213921,21,2130,Auto,21:59,1,20230424,new,,1682352666090,rider_reject,6388c9c735021c49a5aace12,,886189258dfffff,S3,Zone
1,6446a9b4c8861032e1b00f0c,,8861892513fffff,12.930336,77.633701,Bangalore,88618925c9fffff,88618925cbfffff,12.915308,77.615959,12.921772,77.62162,213924,21,2130,Auto,21:59,1,20230424,new,,1682352599814,rider_reject,6256a03691957f550091fd03,,88618925cdfffff,S3,Holdout
2,6446aaa109a254684890493c,,88618925c3fffff,12.928213,77.609242,Bangalore,8861892589fffff,88618925cbfffff,12.917201,77.613471,12.930253,77.608222,214321,21,2130,Auto,21:59,1,20230424,new,,1682352950583,rider_reject,60e90d16e5cb35659758a4ee,,8861892589fffff,S3,Zone
3,6446ab20ec23876d6c075ce5,,88618924e7fffff,12.905938,77.58569,Bangalore,88618925d3fffff,88618925d7fffff,12.927814,77.595482,12.915991,77.587585,214528,21,2145,Auto,21:59,1,20230424,new,,1682352958380,rider_busy,5c5833fef2edc733674d3344,,88618925d3fffff,S3,Zone
4,6446abb32a803956b692a95b,,88618925c3fffff,12.925224,77.608837,Bangalore,8861892589fffff,88618925e9fffff,12.939089,77.626205,12.932362,77.608803,214755,21,2145,Auto,21:59,1,20230424,new,,1682353178904,rider_busy,635b53f2a6f3c7c933675a60,,8861892589fffff,S3,Zone


In [56]:
#Create a count column
df_apr['count'] = 1

#Create a column to identify total pings and accepted ping
df_apr['total_pings'] = np.where(df_apr['event_type'].isin(['accepted', 'rider_busy', 'rider_reject']), 1, 0)
df_apr['accepted_pings'] = np.where(df_apr['event_type'].isin(['accepted']), 1, 0)
df_me = df_apr[(df_apr['order_status'] == 'dropped') & (df_apr['spd_fraud_flag'] != True)]

#Groupby on date and hour to get the APR and ME on an hourly basis
apr_hyderabad = df_apr.groupby(['yyyymmdd', 'hour', 'Segment', 'Label'])[['total_pings', 'accepted_pings']].sum().reset_index()
apr_hyderabad['APR'] = round(apr_hyderabad['accepted_pings']/apr_hyderabad['total_pings'], 4)
me_hyderabad = df_me.groupby(['yyyymmdd', 'hour', 'Segment', 'Label'])['order_id'].nunique().reset_index()
me_hyderabad = me_hyderabad.rename(columns={'order_id':'net_orders'})
apr_hyderabad = apr_hyderabad.merge(me_hyderabad, on = ['yyyymmdd', 'hour', 'Segment', 'Label'], how = 'left')
apr_hyderabad['ME'] = round(apr_hyderabad['total_pings']/apr_hyderabad['net_orders'], 2)

#Merge APR and ME with metrics_df
metrics_df = metrics_df.merge(apr_hyderabad, on = ['yyyymmdd', 'hour', 'Segment', 'Label'], how = 'left')
metrics_df.head()

Unnamed: 0,yyyymmdd,hour,Segment,Label,OCARA%,COBRA%,COBRM%,G2N,expiry_mapped%,Stockout%,total,FM,eta,number_of_captains,ttc_cobra,ttc_cobrm,ttc_ocara,tta,total_pings,accepted_pings,APR,net_orders,ME
0,20230420,17,S1,Holdout,0.1141,0.2886,0.0034,0.4262,0.151,0.0134,298,0.813923,2.912752,43.0,83.2675,156.331,340.537,10.347,3677,210,0.0571,127.0,28.95
1,20230420,17,S1,Zone,0.0861,0.3795,0.0,0.2971,0.2372,0.0,801,11.541598,2.700375,84.0,92.433,,253.351,24.178,11065,378,0.0342,238.0,46.49
2,20230420,17,S2,Holdout,0.105,0.3094,0.0,0.3149,0.2597,0.011,181,23.619686,2.729282,20.0,80.849,,317.3615,9.887,2180,98,0.045,57.0,38.25
3,20230420,17,S2,Zone,0.0966,0.4034,0.0,0.2159,0.2841,0.0,176,0.85497,2.823864,24.0,113.851,,210.576,15.913,2306,70,0.0304,38.0,60.68
4,20230420,17,S3,Holdout,0.1258,0.2742,0.0,0.3888,0.2045,0.0045,445,0.993022,3.125843,47.0,111.92,,198.8575,11.1755,5840,284,0.0486,173.0,33.76


In [28]:
#Query data to calculate gsr
q = """
select 
        pickup_location_hex_8, yyyymmdd, hour,
        count(distinct order_id) as Gross_Orders,
        count(case when (time_diff >=20 or time_diff is NULL) then customer_id end) as GSR,
        count(distinct case when event_type in ('dropped') and spd_fraud_flag != True then order_id end) as net_orders,
        (case when count(distinct case when event_type in ('dropped') and spd_fraud_flag != True then order_id end) = 0 then 0 else
        sum(case when event_type in ('accepted', 'rider_busy', 'rider_reject') then 1 else 0 end)
        /cast(count(distinct case when event_type in ('dropped') and spd_fraud_flag != True then order_id end) as real) end) as ME
from
(
select
        *, 
        date_diff('minute',  LAG(FROM_UNIXTIME(CAST(epoch AS double) /1000)) OVER(PARTITION BY yyyymmdd, hour, city_name, customer_id, order_type, customer_location_hex_8
        ORDER BY yyyymmdd, customer_id, city_name, customer_location_hex_8, order_type, epoch), FROM_UNIXTIME(CAST(epoch AS double) /1000)) as time_diff,
        date_diff('minute',  LAG(FROM_UNIXTIME(CAST(epoch AS double) /1000)) OVER(PARTITION BY yyyymmdd, hour, city_name, customer_id, order_type, order_status, customer_location_hex_8, spd_fraud_flag 
        ORDER BY yyyymmdd, customer_id, city_name, customer_location_hex_8, order_type, epoch) , FROM_UNIXTIME(CAST(epoch AS double) /1000)) as time_diff_drop
from
(
select 
    order_id, city_name, order_type, cancel_reason, yyyymmdd, customer_id, epoch, substr(hhmmss,1,2) as hour,
    row_number() over (partition by order_id order by updated_epoch desc) as Rank_1,
    
    event_type, order_status, captain_id, pickup_cluster, customer_location_hex_8, spd_fraud_flag, pickup_location_hex_8

from
    orders.order_logs_immutable 
where
    yyyymmdd >= '20230420'
    and yyyymmdd <= '20230501'
    and service_obj_service_name IN ('Auto')
    and pickup_location_hex_8 in ('88618925c3fffff', '88618925cbfffff', '88618924bdfffff', '88618925c7fffff', '88618925c1fffff', '8861892513fffff', '8860145a0dfffff',
 '886014586dfffff', '8861892cc9fffff', '8861892ec9fffff', '8861892c19fffff', '886189263dfffff', '8861892cddfffff', '8860145ac9fffff', '88618920adfffff',
   '8860145841fffff', '8861892c1dfffff', '8860145a25fffff', '88618925ddfffff', '8860145b15fffff', '8861892631fffff', '8861892c55fffff', '8861892607fffff',
     '88601451e5fffff', '88601459a9fffff', '8860145933fffff', '8861892623fffff', '886014586bfffff', '886014c939fffff', '8861892615fffff', '8861892cd9fffff',
       '886189268dfffff', '8860145137fffff', '8860145149fffff', '8860145833fffff', '8860145ad3fffff', '8861892693fffff', '8860145927fffff', '8861893483fffff',
         '88601451adfffff', '88601696e3fffff', '8861892f05fffff', '88618925e9fffff', '8861892561fffff', '8860145323fffff', '88618925e1fffff', '8861892cd3fffff',
           '88618921d7fffff', '88601458c9fffff', '8861892469fffff', '8861892519fffff', '8861892e15fffff', '8861892c81fffff', '88618924d3fffff', '8860145911fffff',
             '8861892e3bfffff', '88618925e3fffff', '886189241dfffff', '8861892687fffff', '8861892517fffff', '88618920d1fffff', '8861892511fffff', '88618925edfffff',
               '8861892f51fffff', '88618924d1fffff', '8861892017fffff', '886189275dfffff', '8861892cdbfffff', '8861892f55fffff', '8861892711fffff',
                 '8861892c23fffff', '8861892703fffff', '88618920c7fffff', '8860169651fffff', '8860144b69fffff', '8861892abbfffff', '8861892f5bfffff',
                   '886014583bfffff', '8860169741fffff', '88618926e3fffff', '8861892011fffff', '886189359dfffff', '8861892035fffff', '8861892d85fffff',
                     '8861892515fffff', '8861892507fffff', '886189250bfffff', '8861892c27fffff', '8860145ab1fffff', '88618926d5fffff', '8861892113fffff',
                       '886016975bfffff', '8861892695fffff', '8861892ab3fffff', '886016906bfffff', '8860145ab7fffff', '88618925cdfffff', '8861892599fffff',
                         '886189251bfffff', '886189258bfffff', '8861892581fffff', '88618924c5fffff', '88618925c5fffff', '8861892587fffff', '8861892557fffff',
                           '88618925d1fffff', '8860145a45fffff', '8861892589fffff', '88618925d7fffff', '8861892427fffff', '88618925d3fffff', '8861892663fffff',
                             '88618925ebfffff', '8861892487fffff', '88618925d5fffff', '886189258dfffff', '88618925c9fffff', '8861892425fffff', '886189259dfffff',
                               '8861892493fffff', '8861892435fffff', '886016966bfffff', '8860145ac5fffff', '8860145901fffff', '88618935bbfffff', '8860145ad1fffff',
                                 '8860145b25fffff', '8861892ddbfffff', '886189266bfffff', '8861892db9fffff', '8861892a9bfffff', '8860145825fffff', '8860145a03fffff',
                                   '8860145ae5fffff', '8860145325fffff', '88601696d5fffff', '886014c935fffff', '8860145349fffff', '886016968dfffff',
                                     '8860145331fffff', '8860169685fffff', '8861892ecbfffff', '8861892e9bfffff', '886189256dfffff', '88618920a5fffff',
                                       '8861892ed7fffff', '88618925b3fffff', '8861892c9dfffff', '8861892e55fffff', '8861892585fffff', '8861892533fffff',
                                         '8861892583fffff', '8861892c95fffff', '8861892095fffff', '886014594dfffff', '88618926c5fffff', '886014c903fffff',
                                           '8861892085fffff', '8860169649fffff', '886016965dfffff', '88618921c9fffff', '88618920c1fffff', '8861892709fffff',
                                             '8861892187fffff', '88618921a1fffff', '88618922a7fffff', '886189251dfffff', '8861892c21fffff', '886189229dfffff',
                                               '8860144b51fffff', '886014c9adfffff', '8860144a65fffff', '8861892287fffff')
and hour in ('17', '18', '19', '20', '21')
))
group by 1,2,3
"""

df_gsr_copy = pd.read_sql(q, connection)

In [57]:
#Create a copy of the dataset and view it
df_gsr = df_gsr_copy.copy()
df_gsr = df_gsr[df_gsr['pickup_location_hex_8'].isin(hyderabad_new_zone)]
df_gsr = df_gsr.merge(hexes, on = 'pickup_location_hex_8', how = 'left')
df_gsr.head()

Unnamed: 0,pickup_location_hex_8,yyyymmdd,hour,Gross_Orders,GSR,net_orders,ME,Segment,Label
0,8861892435fffff,20230426,21,13,12,5,30.6,S3,Holdout
1,8861892585fffff,20230422,17,41,21,3,96.333336,S4,Zone
2,8861892487fffff,20230420,18,45,34,23,31.043478,S3,Holdout
3,8861892e9bfffff,20230430,21,478,258,7,310.0,S4,Holdout
4,88618920c7fffff,20230429,19,3,3,0,0.0,S2,Holdout


In [58]:
#Calculate GSR2Net
df_gsr_grouped = df_gsr.groupby(['yyyymmdd', 'hour', 'Segment', 'Label'])[['GSR', 'net_orders']].sum().reset_index()
df_gsr_grouped['GSR2Net'] = round(df_gsr_grouped['net_orders']/df_gsr_grouped['GSR'], 4)

#Merge GSR with metrics_df
metrics_df = metrics_df.merge(df_gsr_grouped, on = ['yyyymmdd', 'hour', 'Segment', 'Label'], how = 'left')
metrics_df.head()

Unnamed: 0,yyyymmdd,hour,Segment,Label,OCARA%,COBRA%,COBRM%,G2N,expiry_mapped%,Stockout%,total,FM,eta,number_of_captains,ttc_cobra,ttc_cobrm,ttc_ocara,tta,total_pings,accepted_pings,APR,net_orders_x,ME,GSR,net_orders_y,GSR2Net
0,20230420,17,S1,Holdout,0.1141,0.2886,0.0034,0.4262,0.151,0.0134,298,0.813923,2.912752,43.0,83.2675,156.331,340.537,10.347,3677,210,0.0571,127.0,28.95,231,127,0.5498
1,20230420,17,S1,Zone,0.0861,0.3795,0.0,0.2971,0.2372,0.0,801,11.541598,2.700375,84.0,92.433,,253.351,24.178,11065,378,0.0342,238.0,46.49,585,238,0.4068
2,20230420,17,S2,Holdout,0.105,0.3094,0.0,0.3149,0.2597,0.011,181,23.619686,2.729282,20.0,80.849,,317.3615,9.887,2180,98,0.045,57.0,38.25,129,57,0.4419
3,20230420,17,S2,Zone,0.0966,0.4034,0.0,0.2159,0.2841,0.0,176,0.85497,2.823864,24.0,113.851,,210.576,15.913,2306,70,0.0304,38.0,60.68,111,38,0.3423
4,20230420,17,S3,Holdout,0.1258,0.2742,0.0,0.3888,0.2045,0.0045,445,0.993022,3.125843,47.0,111.92,,198.8575,11.1755,5840,284,0.0486,173.0,33.76,355,173,0.4873


In [38]:
#Query data from supply_login_servicelevel_kpi
q = """
select
        location as pickup_location_hex_8, yyyymmdd, substr(hhmm, 1, 2) as hour,
        sum(case when cast(status AS varchar) IN ('2','3','6','7','8','10') then duration end) as login_time,
        sum(case when cast(status AS varchar) IN ('2') then duration end) as idle_time
from 
(select 
        a.*, city, cluster
        
from 
        hive.datasets.supplycursory_history a 
        left join datasets.city_cluster_hex b 
        on a.location = b.hex_id and a.resolution = cast(b.resolution as varchar)
where 
        yyyymmdd >= '20230420'
        and yyyymmdd <= '20230501'
        and city in ('Bangalore')
        and a.resolution = '8' 
        and service in ('auto')
        and location in ('88618925c3fffff', '88618925cbfffff', '88618924bdfffff', '88618925c7fffff', '88618925c1fffff', '8861892513fffff', '8860145a0dfffff',
 '886014586dfffff', '8861892cc9fffff', '8861892ec9fffff', '8861892c19fffff', '886189263dfffff', '8861892cddfffff', '8860145ac9fffff', '88618920adfffff',
   '8860145841fffff', '8861892c1dfffff', '8860145a25fffff', '88618925ddfffff', '8860145b15fffff', '8861892631fffff', '8861892c55fffff', '8861892607fffff',
     '88601451e5fffff', '88601459a9fffff', '8860145933fffff', '8861892623fffff', '886014586bfffff', '886014c939fffff', '8861892615fffff', '8861892cd9fffff',
       '886189268dfffff', '8860145137fffff', '8860145149fffff', '8860145833fffff', '8860145ad3fffff', '8861892693fffff', '8860145927fffff', '8861893483fffff',
         '88601451adfffff', '88601696e3fffff', '8861892f05fffff', '88618925e9fffff', '8861892561fffff', '8860145323fffff', '88618925e1fffff', '8861892cd3fffff',
           '88618921d7fffff', '88601458c9fffff', '8861892469fffff', '8861892519fffff', '8861892e15fffff', '8861892c81fffff', '88618924d3fffff', '8860145911fffff',
             '8861892e3bfffff', '88618925e3fffff', '886189241dfffff', '8861892687fffff', '8861892517fffff', '88618920d1fffff', '8861892511fffff', '88618925edfffff',
               '8861892f51fffff', '88618924d1fffff', '8861892017fffff', '886189275dfffff', '8861892cdbfffff', '8861892f55fffff', '8861892711fffff',
                 '8861892c23fffff', '8861892703fffff', '88618920c7fffff', '8860169651fffff', '8860144b69fffff', '8861892abbfffff', '8861892f5bfffff',
                   '886014583bfffff', '8860169741fffff', '88618926e3fffff', '8861892011fffff', '886189359dfffff', '8861892035fffff', '8861892d85fffff',
                     '8861892515fffff', '8861892507fffff', '886189250bfffff', '8861892c27fffff', '8860145ab1fffff', '88618926d5fffff', '8861892113fffff',
                       '886016975bfffff', '8861892695fffff', '8861892ab3fffff', '886016906bfffff', '8860145ab7fffff', '88618925cdfffff', '8861892599fffff',
                         '886189251bfffff', '886189258bfffff', '8861892581fffff', '88618924c5fffff', '88618925c5fffff', '8861892587fffff', '8861892557fffff',
                           '88618925d1fffff', '8860145a45fffff', '8861892589fffff', '88618925d7fffff', '8861892427fffff', '88618925d3fffff', '8861892663fffff',
                             '88618925ebfffff', '8861892487fffff', '88618925d5fffff', '886189258dfffff', '88618925c9fffff', '8861892425fffff', '886189259dfffff',
                               '8861892493fffff', '8861892435fffff', '886016966bfffff', '8860145ac5fffff', '8860145901fffff', '88618935bbfffff', '8860145ad1fffff',
                                 '8860145b25fffff', '8861892ddbfffff', '886189266bfffff', '8861892db9fffff', '8861892a9bfffff', '8860145825fffff', '8860145a03fffff',
                                   '8860145ae5fffff', '8860145325fffff', '88601696d5fffff', '886014c935fffff', '8860145349fffff', '886016968dfffff',
                                     '8860145331fffff', '8860169685fffff', '8861892ecbfffff', '8861892e9bfffff', '886189256dfffff', '88618920a5fffff',
                                       '8861892ed7fffff', '88618925b3fffff', '8861892c9dfffff', '8861892e55fffff', '8861892585fffff', '8861892533fffff',
                                         '8861892583fffff', '8861892c95fffff', '8861892095fffff', '886014594dfffff', '88618926c5fffff', '886014c903fffff',
                                           '8861892085fffff', '8860169649fffff', '886016965dfffff', '88618921c9fffff', '88618920c1fffff', '8861892709fffff',
                                             '8861892187fffff', '88618921a1fffff', '88618922a7fffff', '886189251dfffff', '8861892c21fffff', '886189229dfffff',
                                               '8860144b51fffff', '886014c9adfffff', '8860144a65fffff', '8861892287fffff')
and substr(hhmm, 1, 2) in ('17', '18', '19', '20', '21')
)
group by 1,2,3
"""

#Load data into pandas table
df_tse_copy = pd.read_sql(q, connection)

In [59]:
#Create a copy of the df and view it
df_tse = df_tse_copy.copy()
df_tse = df_tse[df_tse['pickup_location_hex_8'].isin(hyderabad_new_zone)]
df_tse = df_tse.merge(hexes, on = 'pickup_location_hex_8', how = 'left')
df_tse.head()

Unnamed: 0,pickup_location_hex_8,yyyymmdd,hour,login_time,idle_time,Segment,Label
0,8860145a0dfffff,20230423,18,41287.0,7847.0,S1,Holdout
1,8860145a0dfffff,20230426,21,20349.0,6223.0,S1,Holdout
2,8860145a0dfffff,20230423,20,15438.0,1260.0,S1,Holdout
3,8861892f55fffff,20230424,19,6724.0,2511.0,S2,Holdout
4,8861892f55fffff,20230429,20,12992.0,6450.0,S2,Holdout


In [60]:
#Calculate time_spent_earning
df_tse_grouped = df_tse.groupby(['yyyymmdd', 'hour', 'Segment', 'Label'])[['login_time', 'idle_time']].sum().reset_index()
df_tse_grouped['time_spent_earning'] = round((df_tse_grouped['login_time'] - df_tse_grouped['idle_time'])/df_tse_grouped['login_time'], 4)
df_tse_grouped.head()

Unnamed: 0,yyyymmdd,hour,Segment,Label,login_time,idle_time,time_spent_earning
0,20230420,17,S1,Holdout,893717.0,518935.0,0.4194
1,20230420,17,S1,Zone,800880.0,73289.0,0.9085
2,20230420,17,S2,Holdout,472340.0,249663.0,0.4714
3,20230420,17,S2,Zone,223182.0,30846.0,0.8618
4,20230420,17,S3,Holdout,888605.0,373839.0,0.5793


In [61]:
#Merge GSR with metrics_df
metrics_df = metrics_df.merge(df_tse_grouped, on = ['yyyymmdd', 'hour', 'Segment', 'Label'], how = 'left')
metrics_df.head()

Unnamed: 0,yyyymmdd,hour,Segment,Label,OCARA%,COBRA%,COBRM%,G2N,expiry_mapped%,Stockout%,total,FM,eta,number_of_captains,ttc_cobra,ttc_cobrm,ttc_ocara,tta,total_pings,accepted_pings,APR,net_orders_x,ME,GSR,net_orders_y,GSR2Net,login_time,idle_time,time_spent_earning
0,20230420,17,S1,Holdout,0.1141,0.2886,0.0034,0.4262,0.151,0.0134,298,0.813923,2.912752,43.0,83.2675,156.331,340.537,10.347,3677,210,0.0571,127.0,28.95,231,127,0.5498,893717.0,518935.0,0.4194
1,20230420,17,S1,Zone,0.0861,0.3795,0.0,0.2971,0.2372,0.0,801,11.541598,2.700375,84.0,92.433,,253.351,24.178,11065,378,0.0342,238.0,46.49,585,238,0.4068,800880.0,73289.0,0.9085
2,20230420,17,S2,Holdout,0.105,0.3094,0.0,0.3149,0.2597,0.011,181,23.619686,2.729282,20.0,80.849,,317.3615,9.887,2180,98,0.045,57.0,38.25,129,57,0.4419,472340.0,249663.0,0.4714
3,20230420,17,S2,Zone,0.0966,0.4034,0.0,0.2159,0.2841,0.0,176,0.85497,2.823864,24.0,113.851,,210.576,15.913,2306,70,0.0304,38.0,60.68,111,38,0.3423,223182.0,30846.0,0.8618
4,20230420,17,S3,Holdout,0.1258,0.2742,0.0,0.3888,0.2045,0.0045,445,0.993022,3.125843,47.0,111.92,,198.8575,11.1755,5840,284,0.0486,173.0,33.76,355,173,0.4873,888605.0,373839.0,0.5793


In [62]:
#Create a copy of the dataset and view it
df_captains_mapped = df_copy.copy()
df_captains_mapped = df_captains_mapped[df_captains_mapped['pickup_location_hex_8'].isin(hyderabad_new_zone)]
df_captains_mapped = df_captains_mapped.merge(hexes, on = 'pickup_location_hex_8', how = 'left')
df_captains_mapped.head()

Unnamed: 0,order_id,accept_to_pickup_distance,city_name,hhmmss,hour,service_obj_service_name,time_bucket,weekday,yyyymmdd,order_status,cancel_reason,eta,map_riders,pickup_location_hex_8,Segment,Label
0,6442a86d78e11a7274caec0f,,Bangalore,204453,20,Auto,20:59,5,20230421,customerCancelled,order cancelled before rider accepted,6.0,"[""60ebffe0e5cb35096b5a4eea"",""62fdaaca020cc0112...",8861892589fffff,S3,Zone
1,6442a89f44636e16f07f81fa,,Bangalore,204543,20,Auto,20:59,5,20230421,expired,,4.0,"[""61909b14ae4f730f1a1898cd"",""60e824c6569b260ad...",88618925cbfffff,S1,Zone
2,6442a8f35902af3a45018d49,,Bangalore,204707,20,Auto,20:59,5,20230421,customerCancelled,order cancelled before rider accepted,0.0,"[""63f89547f22ead414d84a11f"",""62209ca4a657e2da0...",88618925c1fffff,S1,Zone
3,6442a92574b9cf25c0a6b851,,Bangalore,204757,20,Auto,20:59,5,20230421,expired,,0.0,"[""6173b373fbce662b0a9fe646"",""63f641e7b85f46570...",886189258dfffff,S3,Zone
4,6442a932acdc254ceefde991,,Bangalore,204810,20,Auto,20:59,5,20230421,expired,,7.0,"[""64099ef5371734f82655d906"",""5fa80af236c93c857...",886189251bfffff,S3,Zone


In [63]:
#Get the number of captains mapped
df_captains_mapped['map_riders'] = df_captains_mapped['map_riders'].apply(lambda x: json.loads(x))
df_captains_mapped_grouped = df_captains_mapped.groupby(['yyyymmdd', 'hour', 'Segment', 'Label'])['map_riders'].apply(list).reset_index()
df_captains_mapped_grouped['map_riders'] = df_captains_mapped_grouped['map_riders'].apply(lambda x: list(set([item for sublist in x for item in sublist])))
master_list = []
for x in df_captains_mapped_grouped['map_riders']:
    captains_list = []
    for captain in x:
        if captain in captains_list:
            continue
        else:
            captains_list.append(captain)
    master_list.append(captains_list)
df_captains_mapped_grouped['map_riders'] = master_list
df_captains_mapped_grouped['number_of_mapped_captains'] = df_captains_mapped_grouped['map_riders'].apply(lambda x: len(x))
metrics_df = metrics_df.merge(df_captains_mapped_grouped[['yyyymmdd', 'hour', 'Segment', 'Label', 'number_of_mapped_captains']], 
on = ['yyyymmdd', 'hour', 'Segment', 'Label'], how = 'left')
metrics_df.head()

Unnamed: 0,yyyymmdd,hour,Segment,Label,OCARA%,COBRA%,COBRM%,G2N,expiry_mapped%,Stockout%,total,FM,eta,number_of_captains,ttc_cobra,ttc_cobrm,ttc_ocara,tta,total_pings,accepted_pings,APR,net_orders_x,ME,GSR,net_orders_y,GSR2Net,login_time,idle_time,time_spent_earning,number_of_mapped_captains
0,20230420,17,S1,Holdout,0.1141,0.2886,0.0034,0.4262,0.151,0.0134,298,0.813923,2.912752,43.0,83.2675,156.331,340.537,10.347,3677,210,0.0571,127.0,28.95,231,127,0.5498,893717.0,518935.0,0.4194,2253
1,20230420,17,S1,Zone,0.0861,0.3795,0.0,0.2971,0.2372,0.0,801,11.541598,2.700375,84.0,92.433,,253.351,24.178,11065,378,0.0342,238.0,46.49,585,238,0.4068,800880.0,73289.0,0.9085,1022
2,20230420,17,S2,Holdout,0.105,0.3094,0.0,0.3149,0.2597,0.011,181,23.619686,2.729282,20.0,80.849,,317.3615,9.887,2180,98,0.045,57.0,38.25,129,57,0.4419,472340.0,249663.0,0.4714,1344
3,20230420,17,S2,Zone,0.0966,0.4034,0.0,0.2159,0.2841,0.0,176,0.85497,2.823864,24.0,113.851,,210.576,15.913,2306,70,0.0304,38.0,60.68,111,38,0.3423,223182.0,30846.0,0.8618,360
4,20230420,17,S3,Holdout,0.1258,0.2742,0.0,0.3888,0.2045,0.0045,445,0.993022,3.125843,47.0,111.92,,198.8575,11.1755,5840,284,0.0486,173.0,33.76,355,173,0.4873,888605.0,373839.0,0.5793,2370


In [64]:
metrics_df.to_csv('metrics_holdout_hsr_upper.csv', index = False)