In [32]:
import pandas as pd

df = pd.read_csv('data_cleaned.csv')

In [33]:
# booking status distribution
df['Booking Status'].value_counts(normalize=True) * 100

Booking Status
Completed                62.0
Cancelled by Driver      18.0
No Driver Found           7.0
Cancelled by Customer     7.0
Incomplete                6.0
Name: proportion, dtype: float64

In [34]:
# cancellation type distribution among cancelled rides
df[df['is_cancelled'] == 1]['cancellation_type'].value_counts(normalize=True) * 100

cancellation_type
Driver      72.0
Customer    28.0
Name: proportion, dtype: float64

In [35]:
# booking volume by hour
df.groupby('hour')['Booking ID'].count()

hour
0      1373
1      1360
2      1339
3      1383
4      1321
5      2786
6      4160
7      5450
8      6861
9      8234
10     9577
11     8390
12     7006
13     5470
14     7031
15     8202
16     9633
17    11044
18    12397
19    11047
20     9630
21     8103
22     5441
23     2762
Name: Booking ID, dtype: int64

In [36]:
# cancellation rate by hour
df.groupby('hour')['is_cancelled'].mean() * 100

hour
0     24.763292
1     25.294118
2     22.180732
3     25.668836
4     25.586677
5     26.417803
6     24.158654
7     25.357798
8     24.981781
9     24.908914
10    24.903414
11    24.052443
12    24.735941
13    25.246801
14    26.084483
15    25.676664
16    24.519880
17    24.891344
18    25.377107
19    25.527292
20    24.589823
21    23.978773
22    25.565153
23    25.416365
Name: is_cancelled, dtype: float64

In [37]:
# weekday vs weekend cancellation rate
df.groupby('is_weekend')['is_cancelled'].mean() * 100

is_weekend
0    25.059780
1    24.850955
Name: is_cancelled, dtype: float64

In [38]:
# volume and completion rate
df.groupby('Vehicle Type').agg(
    total_bookings=('Booking ID', 'count'),
    completion_rate=('is_completed', 'mean'),
    cancellation_rate=('is_cancelled', 'mean')
)

Unnamed: 0_level_0,total_bookings,completion_rate,cancellation_rate
Vehicle Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Auto,37419,0.618803,0.249152
Bike,22517,0.623262,0.25101
Go Mini,29806,0.622324,0.249178
Go Sedan,27141,0.614421,0.252865
Premier Sedan,18111,0.62128,0.249351
Uber XL,4449,0.625534,0.244774
eBike,10557,0.620536,0.249124


In [39]:
# vtat and ctat by vehicle type
df.groupby('Vehicle Type')[['Avg VTAT', 'Avg CTAT']].mean()

Unnamed: 0_level_0,Avg VTAT,Avg CTAT
Vehicle Type,Unnamed: 1_level_1,Unnamed: 2_level_1
Auto,8.448299,29.142302
Bike,8.500043,29.199297
Go Mini,8.468101,29.159237
Go Sedan,8.401596,29.040217
Premier Sedan,8.43875,29.218741
Uber XL,8.575762,29.209852
eBike,8.478422,29.177204


In [40]:
# revenue contribution by vehicle type
df.groupby('Vehicle Type')['Booking Value'].sum().sort_values(ascending=False)

Vehicle Type
Auto             12878422.0
Go Mini          10338496.0
Go Sedan          9369719.0
Bike              7837697.0
Premier Sedan     6275332.0
eBike             3618485.0
Uber XL           1528032.0
Name: Booking Value, dtype: float64

In [41]:
# revenue per km by vehicle type
df.groupby('Vehicle Type')['revenue_per_km'].mean()

Vehicle Type
Auto             38.085136
Bike             38.168779
Go Mini          37.642931
Go Sedan         38.220880
Premier Sedan    37.855816
Uber XL          37.932004
eBike            36.314851
Name: revenue_per_km, dtype: float64

In [42]:
# customer cancellation reason
df[df['cancellation_type'] == 'Customer']['Reason for cancelling by Customer'] \
    .value_counts(normalize=True) * 100

Reason for cancelling by Customer
Wrong Address                                   22.495238
Change of plans                                 22.409524
Driver is not moving towards pickup location    22.238095
Driver asked to cancel                          21.857143
AC is not working                               11.000000
Name: proportion, dtype: float64

In [43]:
# driver cancellation reason
df[df['cancellation_type'] == 'Driver']['Driver Cancellation Reason'] \
    .value_counts(normalize=True) * 100

Driver Cancellation Reason
Customer related issue                 25.322222
The customer was coughing/sick         25.003704
Personal & Car related issues          24.911111
More than permitted people in there    24.762963
Name: proportion, dtype: float64

In [44]:
# rating vs completion
df.groupby('is_completed')[['Customer Rating', 'Driver Ratings']].mean()

Unnamed: 0_level_0,Customer Rating,Driver Ratings
is_completed,Unnamed: 1_level_1,Unnamed: 2_level_1
0,,
1,4.404584,4.230992


In [45]:
# rating by vehicle type
df[df['is_completed'] == 1].groupby('Vehicle Type')[
    ['Customer Rating', 'Driver Ratings']
].mean()

Unnamed: 0_level_0,Customer Rating,Driver Ratings
Vehicle Type,Unnamed: 1_level_1,Unnamed: 2_level_1
Auto,4.402,4.232369
Bike,4.40394,4.230056
Go Mini,4.404297,4.227694
Go Sedan,4.409996,4.231812
Premier Sedan,4.403457,4.234865
Uber XL,4.404851,4.23834
eBike,4.403954,4.225614


In [46]:
# vtat vs cancellation
df.groupby(pd.qcut(df['Avg VTAT'], 5))['is_cancelled'].mean() * 100

  df.groupby(pd.qcut(df['Avg VTAT'], 5))['is_cancelled'].mean() * 100


Avg VTAT
(1.999, 4.8]    19.236999
(4.8, 7.1]      30.475636
(7.1, 9.4]      30.832335
(9.4, 11.9]     33.297449
(11.9, 20.0]    20.924336
Name: is_cancelled, dtype: float64