In [22]:
#Import libraries
from pyhive import presto
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
import h3
from shapely.geometry import Polygon, Point
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 500)

In [23]:
#Define presto credentials
presto_host = 'presto-gateway.serving.data.plectrum.dev'
presto_port = '80'
username = 'aditya.bhattar@rapido.bike'

#Create connection to presto host
connection = presto.connect(presto_host,presto_port,username = username)

In [24]:
#Query data from order_logs_snapshot

q = """
select order_id, accept_to_pickup_distance, city_name, customer_location_hex_8, customer_location_latitude, 
customer_location_longitude, hhmmss, hour, quarter_hour, service_obj_service_name, time_bucket, weekday, yyyymmdd,
order_status, cancel_reason, eta, map_riders, drop_location_hex_8, pickup_location_hex_8, distance_final_distance
from orders.order_logs_snapshot
where yyyymmdd >= '20230605'
and yyyymmdd <= '20230611'
and city_name in ('Delhi')
and service_obj_service_name = 'Auto'
and hour in ('17', '18', '19', '20', '21')
"""


#Load data into pandas table
df_copy = pd.read_sql(q, connection)

In [25]:
grouped = df_copy.groupby(['pickup_location_hex_8'])['order_id'].nunique().reset_index()
grouped = grouped.sort_values(by = 'order_id', ascending=False)
grouped.head(20)

Unnamed: 0,pickup_location_hex_8,order_id
1373,883da11a83fffff,2473
751,883da111a1fffff,2357
755,883da111a9fffff,2240
1375,883da11a87fffff,2114
741,883da11185fffff,1850
1383,883da11a99fffff,1805
1399,883da11abdfffff,1663
757,883da111adfffff,1579
756,883da111abfffff,1310
1197,883da1180bfffff,1241


In [21]:
hexes = pd.read_csv('hyd_batch.csv')
hexes_list = hexes['hex_id'].unique().tolist()
print(hexes_list)
print(len(hexes_list))

['8860a24b61fffff', '8860a24b6bfffff', '8860b196b1fffff', '8860a24b0dfffff', '8860b196b7fffff', '8860b196b3fffff', '8860a24b57fffff', '8860a24b6dfffff', '8860a24b67fffff', '8860b19691fffff', '8860a24b41fffff', '8860a24b65fffff', '8860a24b2dfffff', '8860b196bbfffff', '8860a24b05fffff', '8860a24b51fffff', '8860a24b69fffff', '8860a24b09fffff', '8860b196a3fffff', '8860a24b29fffff', '8860b194dbfffff', '8860a24b63fffff', '8860a24b2bfffff', '8860a24b0bfffff', '8860a24b1dfffff', '8860b1969dfffff', '8860b196bdfffff', '8860a259a5fffff', '8860a24b21fffff', '8860a24b07fffff', '8860b19697fffff', '8860a24b03fffff', '8860a24b5bfffff', '8860a259a1fffff', '8860a24b53fffff', '8860a24b5dfffff', '8860a259a3fffff', '8860b19695fffff', '8860b196b9fffff', '8860b196b5fffff', '8860a259abfffff', '8860b19687fffff', '8860a24b49fffff', '8860a24b01fffff', '8860b19699fffff', '8860b19683fffff', '8860a24b47fffff', '8860b194d9fffff', '8860a24b45fffff', '8860a24b43fffff', '8860a259a7fffff', '8860a24b4dfffff', '8860a259bd

In [14]:
#Query data from order_logs_snapshot

q = """
select order_id, accept_to_pickup_distance, captain_location_hex_8, captain_location_latitude,
captain_location_longitude, city_name, customer_location_hex_8, customer_location_latitude, 
customer_location_longitude, hhmmss, hour, quarter_hour, service_obj_service_name, time_bucket, weekday, yyyymmdd,
order_status, cancel_reason, pickup_location_hex_8
from orders.order_logs_snapshot
where yyyymmdd >= '20230605'
and yyyymmdd <= '20230611'
and city_name in ('Hyderabad')
and service_obj_service_name = 'Auto'
and hour in ('17', '18', '19', '20', '21')
and pickup_location_
"""

#Load data into pandas table
df_copy = pd.read_sql(q, connection)

In [15]:
#Create a copy of the df retrived from presto
df = df_copy.copy()

In [16]:
#Load data from cluster table
q = """
select * from datasets.city_cluster_hex
where city in ('Hyderabad')
and resolution = 8
"""
#Load data into pandas table
df_cluster_copy = pd.read_sql(q, connection)

In [17]:
df_cluster = df_cluster_copy.copy()
df_cluster = df_cluster.rename(columns = {'hex_id':'pickup_location_hex_8'})
df = df.merge(df_cluster[['pickup_location_hex_8', 'cluster']], on = 'pickup_location_hex_8', how = 'left')
df.head()

Unnamed: 0,order_id,accept_to_pickup_distance,captain_location_hex_8,captain_location_latitude,captain_location_longitude,city_name,customer_location_hex_8,customer_location_latitude,customer_location_longitude,hhmmss,hour,quarter_hour,service_obj_service_name,time_bucket,weekday,yyyymmdd,order_status,cancel_reason,pickup_location_hex_8,cluster
0,6481ea756268a04438c6afb4,3.117,8860b19643fffff,17.472235,78.502953,Hyderabad,8860a24a6dfffff,17.445278,78.375404,201925,20,2015,Auto,20:00,4,20230608,dropped,Heavy Traffic,8860a24a6dfffff,Hitech City
1,6481ea981698fd4e571d8744,,8860a25b21fffff,17.406615,78.496895,Hyderabad,8860b52ddbfffff,17.461769,78.543649,202000,20,2015,Auto,20:00,4,20230608,expired,,8860a25b3dfffff,Basheerbagh
2,6481eab51982f112e599e54b,,8860a24a65fffff,17.448948,78.369087,Hyderabad,8860a24a65fffff,17.44879,78.36924,202029,20,2015,Auto,20:00,4,20230608,customerCancelled,order cancelled before rider accepted,8860a24a65fffff,Hitech City
3,6481eab67b77e0585e54f0b1,,8860a25863fffff,17.393957,78.442184,Hyderabad,8860a25863fffff,17.393957,78.442184,202030,20,2015,Auto,20:00,4,20230608,customerCancelled,order cancelled before rider accepted,8860a25829fffff,Mehedipatman 3
4,6481eac0fa22b174d4fa11c7,2.152,8860a25969fffff,17.441456,78.48568,Hyderabad,8860a25961fffff,17.442318,78.485466,202040,20,2015,Auto,20:00,4,20230608,customerCancelled,Taking longer than expected,8860a25961fffff,Bharath Nagar Metro Station


In [18]:
cluster_grouped = df.groupby(['cluster'])['order_id'].nunique().reset_index()
cluster_grouped = cluster_grouped.sort_values(by = 'order_id', ascending=False)
cluster_grouped.head(20)

Unnamed: 0,cluster,order_id
62,Hitech City,58611
13,Attapur 2,38729
20,Banjara Hills,23629
120,Mehedipatnam,23030
174,Shamshabad,20640
53,Golconda,19699
1,AOC Center,15240
21,Banjara Hills 2,13474
63,Hitech City 2,11506
112,Manikonda,10812


In [20]:
print(df[df['cluster'] == 'Hitech City']['pickup_location_hex_8'].unique().tolist())

['8860a24a6dfffff', '8860a24a65fffff', '8860a25995fffff', '8860a259b9fffff', '8860a259bbfffff', '8860a24a61fffff', '8860a25997fffff']


In [13]:
grouped = df.groupby(['yyyymmdd'])['order_id'].nunique().reset_index()
grouped

Unnamed: 0,yyyymmdd,order_id
0,20230605,7971
1,20230606,6498
2,20230607,8762
3,20230608,10308
4,20230609,10791
5,20230610,14058
6,20230611,13343


In [5]:
df_pickup = df[['pickup_location_hex_8']]
df_pickup = df_pickup.drop_duplicates()
df_pickup.to_csv('hyderabad_hexes.csv')

In [5]:
#View the dataset
df.head()

Unnamed: 0,order_id,accept_to_pickup_distance,captain_location_hex_8,captain_location_latitude,captain_location_longitude,city_name,customer_location_hex_8,customer_location_latitude,customer_location_longitude,hhmmss,hour,quarter_hour,service_obj_service_name,time_bucket,weekday,yyyymmdd,order_status,cancel_reason
0,63e67a9c54520e4d96673f39,1.737,8860a24a49fffff,17.414194,78.364893,Hyderabad,8860a259bbfffff,17.442769,78.377374,224052,22,2230,Link,22:59,5,20230210,customerCancelled,Change of plans
1,63e67ad2d6161345149b260b,0.635,8860b525e3fffff,17.345427,78.533798,Hyderabad,8860b52517fffff,17.337988,78.552909,224146,22,2230,Link,22:59,5,20230210,dropped,
2,63e67af054520e4d96673fb4,,8860a25a35fffff,17.357952,78.448738,Hyderabad,8860a25a35fffff,17.357952,78.448738,224216,22,2230,Link,22:59,5,20230210,customerCancelled,order cancelled before rider accepted
3,63e67b676dde8027bf6b7f71,0.951,8860a25b05fffff,17.394968,78.498154,Hyderabad,8860a25b63fffff,17.393127,78.502083,224415,22,2230,Link,22:59,5,20230210,customerCancelled,Others
4,63e67b8b13cf522e20a9c2d6,0.212,8860b196b5fffff,17.507944,78.421524,Hyderabad,8860b196b9fffff,17.495689,78.420471,224451,22,2230,Link,22:59,5,20230210,dropped,


In [6]:
#Make columns for second and minute and date
df['second'] = df['hhmmss'].apply(lambda x: x[4:])
df['minute'] = df['hhmmss'].apply(lambda x: x[2:4])
df['date'] = df['yyyymmdd'].apply(lambda x: x[6:])
df.head()

Unnamed: 0,order_id,accept_to_pickup_distance,captain_location_hex_8,captain_location_latitude,captain_location_longitude,city_name,customer_location_hex_8,customer_location_latitude,customer_location_longitude,hhmmss,hour,quarter_hour,service_obj_service_name,time_bucket,weekday,yyyymmdd,order_status,cancel_reason,second,minute,date
0,63e67a9c54520e4d96673f39,1.737,8860a24a49fffff,17.414194,78.364893,Hyderabad,8860a259bbfffff,17.442769,78.377374,224052,22,2230,Link,22:59,5,20230210,customerCancelled,Change of plans,52,40,10
1,63e67ad2d6161345149b260b,0.635,8860b525e3fffff,17.345427,78.533798,Hyderabad,8860b52517fffff,17.337988,78.552909,224146,22,2230,Link,22:59,5,20230210,dropped,,46,41,10
2,63e67af054520e4d96673fb4,,8860a25a35fffff,17.357952,78.448738,Hyderabad,8860a25a35fffff,17.357952,78.448738,224216,22,2230,Link,22:59,5,20230210,customerCancelled,order cancelled before rider accepted,16,42,10
3,63e67b676dde8027bf6b7f71,0.951,8860a25b05fffff,17.394968,78.498154,Hyderabad,8860a25b63fffff,17.393127,78.502083,224415,22,2230,Link,22:59,5,20230210,customerCancelled,Others,15,44,10
4,63e67b8b13cf522e20a9c2d6,0.212,8860b196b5fffff,17.507944,78.421524,Hyderabad,8860b196b9fffff,17.495689,78.420471,224451,22,2230,Link,22:59,5,20230210,dropped,,51,44,10


In [7]:
#Create 15-sec batches and 20-sec batches
df['second'] = df['second'].astype(float)
df['minute'] = df['minute'].astype(float)
df['20_sec_batch'] = pd.cut(df['second'], bins = [-1, 20, 40, 61], labels = ['0-20', '20-40', '40-60'])
df['count'] = 1
df.head()

Unnamed: 0,order_id,accept_to_pickup_distance,captain_location_hex_8,captain_location_latitude,captain_location_longitude,city_name,customer_location_hex_8,customer_location_latitude,customer_location_longitude,hhmmss,hour,quarter_hour,service_obj_service_name,time_bucket,weekday,yyyymmdd,order_status,cancel_reason,second,minute,date,20_sec_batch,count
0,63e67a9c54520e4d96673f39,1.737,8860a24a49fffff,17.414194,78.364893,Hyderabad,8860a259bbfffff,17.442769,78.377374,224052,22,2230,Link,22:59,5,20230210,customerCancelled,Change of plans,52.0,40.0,10,40-60,1
1,63e67ad2d6161345149b260b,0.635,8860b525e3fffff,17.345427,78.533798,Hyderabad,8860b52517fffff,17.337988,78.552909,224146,22,2230,Link,22:59,5,20230210,dropped,,46.0,41.0,10,40-60,1
2,63e67af054520e4d96673fb4,,8860a25a35fffff,17.357952,78.448738,Hyderabad,8860a25a35fffff,17.357952,78.448738,224216,22,2230,Link,22:59,5,20230210,customerCancelled,order cancelled before rider accepted,16.0,42.0,10,0-20,1
3,63e67b676dde8027bf6b7f71,0.951,8860a25b05fffff,17.394968,78.498154,Hyderabad,8860a25b63fffff,17.393127,78.502083,224415,22,2230,Link,22:59,5,20230210,customerCancelled,Others,15.0,44.0,10,0-20,1
4,63e67b8b13cf522e20a9c2d6,0.212,8860b196b5fffff,17.507944,78.421524,Hyderabad,8860b196b9fffff,17.495689,78.420471,224451,22,2230,Link,22:59,5,20230210,dropped,,51.0,44.0,10,40-60,1


In [8]:
#Check the number of rides coming from each hex
df_grouped = df.groupby(['customer_location_hex_8'])['count'].sum().reset_index()

#Filter out those hexes where atleast 1000 rides have come in the last two weeks
df_grouped = df_grouped[df_grouped['count'] >= 100].reset_index(drop = True)

#Add a column to keep for analysis
df_grouped['keep_for_analysis'] = 'yes'
df_grouped.head()

Unnamed: 0,customer_location_hex_8,count,keep_for_analysis
0,8860a2410dfffff,132,yes
1,8860a2412dfffff,148,yes
2,8860a24163fffff,331,yes
3,8860a24165fffff,748,yes
4,8860a24169fffff,152,yes


In [9]:
#Merge the number of rides to the original df
df = df.merge(df_grouped[['customer_location_hex_8', 'keep_for_analysis']], on = 'customer_location_hex_8', how = 'left')
df['keep_for_analysis'] = df['keep_for_analysis'].fillna('no')
df.head()

Unnamed: 0,order_id,accept_to_pickup_distance,captain_location_hex_8,captain_location_latitude,captain_location_longitude,city_name,customer_location_hex_8,customer_location_latitude,customer_location_longitude,hhmmss,hour,quarter_hour,service_obj_service_name,time_bucket,weekday,yyyymmdd,order_status,cancel_reason,second,minute,date,20_sec_batch,count,keep_for_analysis
0,63e67a9c54520e4d96673f39,1.737,8860a24a49fffff,17.414194,78.364893,Hyderabad,8860a259bbfffff,17.442769,78.377374,224052,22,2230,Link,22:59,5,20230210,customerCancelled,Change of plans,52.0,40.0,10,40-60,1,yes
1,63e67ad2d6161345149b260b,0.635,8860b525e3fffff,17.345427,78.533798,Hyderabad,8860b52517fffff,17.337988,78.552909,224146,22,2230,Link,22:59,5,20230210,dropped,,46.0,41.0,10,40-60,1,yes
2,63e67af054520e4d96673fb4,,8860a25a35fffff,17.357952,78.448738,Hyderabad,8860a25a35fffff,17.357952,78.448738,224216,22,2230,Link,22:59,5,20230210,customerCancelled,order cancelled before rider accepted,16.0,42.0,10,0-20,1,yes
3,63e67b676dde8027bf6b7f71,0.951,8860a25b05fffff,17.394968,78.498154,Hyderabad,8860a25b63fffff,17.393127,78.502083,224415,22,2230,Link,22:59,5,20230210,customerCancelled,Others,15.0,44.0,10,0-20,1,yes
4,63e67b8b13cf522e20a9c2d6,0.212,8860b196b5fffff,17.507944,78.421524,Hyderabad,8860b196b9fffff,17.495689,78.420471,224451,22,2230,Link,22:59,5,20230210,dropped,,51.0,44.0,10,40-60,1,yes


In [10]:
#Filter the df only on hexes to keep
df_filter = df[df['keep_for_analysis'] == 'yes']

In [11]:
#Groupby on hex to see number of orders coming in 15 and 20 second batches
df_hex_grouped = df_filter.groupby(['customer_location_hex_8', 'date', 'hour', 'minute', '20_sec_batch'])['count'].sum().reset_index()
df_hex_grouped = df_hex_grouped.sort_values(by = ['customer_location_hex_8', 'date', 'hour', 'minute', '20_sec_batch'])
df_hex_grouped.head()

Unnamed: 0,customer_location_hex_8,date,hour,minute,20_sec_batch,count
0,8860a2410dfffff,6,0,0.0,0-20,0
1,8860a2410dfffff,6,0,0.0,20-40,0
2,8860a2410dfffff,6,0,0.0,40-60,0
3,8860a2410dfffff,6,0,1.0,0-20,0
4,8860a2410dfffff,6,0,1.0,20-40,0


In [12]:
#Identify high demand hexes
df_hex_grouped_filter2 = df_hex_grouped[df_hex_grouped['count'] >= 1]
count_df = pd.DataFrame(df_hex_grouped_filter2['customer_location_hex_8'].value_counts().reset_index())
count_df.columns = ['customer_location_hex_8', 'count']
count_df.head(20)

Unnamed: 0,customer_location_hex_8,count
0,8860a259bbfffff,11080
1,8860a259b9fffff,8750
2,8860a25915fffff,7929
3,8860a24a65fffff,7480
4,8860a24a61fffff,7291
5,8860a24b6dfffff,7001
6,8860a259b1fffff,6984
7,8860a24b61fffff,6362
8,8860a2591dfffff,6281
9,8860a259bdfffff,6251


In [13]:
df[['customer_location_hex_8', 'count']].to_csv('jaipur_hexes.csv', index = False)

In [13]:
#Look at the average demand for each minute
df_hex_grouped['time'] = (pd.to_datetime(df_hex_grouped['hour'].astype(str) + ':' + df_hex_grouped['minute'].astype(int).astype(str), format='%H:%M').dt.time)
df_hex_grouped.head()

Unnamed: 0,customer_location_hex_8,date,hour,minute,20_sec_batch,count,time
0,8860a2410dfffff,6,0,0.0,0-20,0,00:00:00
1,8860a2410dfffff,6,0,0.0,20-40,0,00:00:00
2,8860a2410dfffff,6,0,0.0,40-60,0,00:00:00
3,8860a2410dfffff,6,0,1.0,0-20,0,00:01:00
4,8860a2410dfffff,6,0,1.0,20-40,0,00:01:00


In [14]:
#Load data from cluster table
q = """
select * from datasets.city_cluster_hex
where city in ('Hyderabad')
and resolution = 8
"""
#Load data into pandas table
df_cluster_copy = pd.read_sql(q, connection)

In [15]:
hexes = df_cluster_copy[df_cluster_copy['cluster'].isin(['Nagole', 'Dilsukhnagar', 'Victoria Memorial Metro', 'Saroor Nagar', 'Saroor Nagar Lake Park',
 'shilparamam', 'Bahadurguda', 'Shamshabad', 'kharmanghat', 'L B Nagar Metro'])]['hex_id'].unique().tolist()

In [15]:
#Filter on the hexes identified as high demand
high_demand_hexes = ['883da219ddfffff', '883da219d1fffff', '883da219d3fffff', '883da218a5fffff', '883da219d5fffff', '883da219d7fffff', '883da21999fffff',
 '883da2198bfffff', '883da2199dfffff', '883da219b9fffff', '883da21995fffff', '883da219b3fffff', '883da20a6dfffff', '883da219c7fffff']
hexes = []
for x in high_demand_hexes:
    hex_ids = h3.k_ring(x, 1)
    hexes.append(hex_ids)

hexes = [item for sublist in hexes for item in sublist]

In [16]:
#Get the total number of rides per hour
df_hex_high_demand = df_hex_grouped[df_hex_grouped['customer_location_hex_8'].isin(hexes)]
df_level_1_grouped = round(df_hex_high_demand.groupby(['date', 'hour'])['count'].sum().reset_index(), 0)
df_level_1_grouped = df_level_1_grouped.rename(columns = {'count':'rides_per_hour'})
df_level_1_grouped['rides_per_20_sec'] = df_level_1_grouped['rides_per_hour'].apply(lambda x: round(x/180, 2))
df_level_1_grouped

Unnamed: 0,date,hour,rides_per_hour,rides_per_20_sec
0,06,00,322,1.79
1,06,01,193,1.07
2,06,02,112,0.62
3,06,03,80,0.44
4,06,04,169,0.94
...,...,...,...,...
115,10,19,1544,8.58
116,10,20,1465,8.14
117,10,21,1140,6.33
118,10,22,869,4.83


In [21]:
jaipur_hexes = df_hex_high_demand['customer_location_hex_8'].unique().tolist()
print(jaipur_hexes)

['883da20a61fffff', '883da20a65fffff', '883da20a69fffff', '883da20a6dfffff', '883da20b59fffff', '883da21837fffff', '883da218a1fffff', '883da218a5fffff', '883da218a7fffff', '883da218adfffff', '883da21981fffff', '883da21983fffff', '883da21987fffff', '883da21989fffff', '883da2198bfffff', '883da2198dfffff', '883da21991fffff', '883da21995fffff', '883da21997fffff', '883da21999fffff', '883da2199bfffff', '883da2199dfffff', '883da219b1fffff', '883da219b3fffff', '883da219b7fffff', '883da219b9fffff', '883da219bbfffff', '883da219bdfffff', '883da219c1fffff', '883da219c3fffff', '883da219c5fffff', '883da219c7fffff', '883da219cbfffff', '883da219d1fffff', '883da219d3fffff', '883da219d5fffff', '883da219d7fffff', '883da219d9fffff', '883da219dbfffff', '883da219ddfffff', '883da219ebfffff']


In [22]:
#Load data from cluster table
q = """
select * from datasets.city_cluster_hex
where city in ('Jaipur')
and resolution = 8
and hex_id in ('883da20a61fffff', '883da20a65fffff', '883da20a69fffff', '883da20a6dfffff', '883da20b59fffff', '883da21837fffff', '883da218a1fffff',
 '883da218a5fffff', '883da218a7fffff', '883da218adfffff', '883da21981fffff', '883da21983fffff', '883da21987fffff', '883da21989fffff', '883da2198bfffff',
  '883da2198dfffff', '883da21991fffff', '883da21995fffff', '883da21997fffff', '883da21999fffff', '883da2199bfffff', '883da2199dfffff', '883da219b1fffff',
   '883da219b3fffff', '883da219b7fffff', '883da219b9fffff', '883da219bbfffff', '883da219bdfffff', '883da219c1fffff', '883da219c3fffff', '883da219c5fffff',
    '883da219c7fffff', '883da219cbfffff', '883da219d1fffff', '883da219d3fffff', '883da219d5fffff', '883da219d7fffff', '883da219d9fffff', '883da219dbfffff',
     '883da219ddfffff', '883da219ebfffff')
"""
#Load data into pandas table
df_cluster_copy = pd.read_sql(q, connection)

In [23]:
df_cluster_copy['cluster'].value_counts()

Gopalpura_JAI                 8
Malviya_Nagar_JAI 2           5
Mansarovar 2_JAI              3
Nirman_Nagar_JAI              2
Sudarshanpura_JAI             2
Malviya Nagar_JAI             2
Maharani Farm_JAI             2
MNIT_JAI                      2
Durgapura_JAI                 2
                              2
Swej Farm_JAI                 2
Satkar_Malviya_Nagar_JAI      1
Triveni_Nagar_JAI             1
VT Road Mansarovar_JAI        1
Jhalana Dungari_JAI           1
Lalkothi_JAI                  1
Narayan Vihar_JAI             1
Landscape_Park_JAI            1
Swarna Path Mansarovar_JAI    1
Gurjar_Ki_Thadi_JAI           1
Name: cluster, dtype: int64

In [17]:
df_level_1_grouped[df_level_1_grouped['hour'].isin(['08', '09', '10', '11', '17', '18', '19', '20', '21'])]

Unnamed: 0,date,hour,rides_per_hour,rides_per_20_sec
8,6,8,2214,12.3
9,6,9,1811,10.06
10,6,10,1406,7.81
11,6,11,1155,6.42
17,6,17,1351,7.51
18,6,18,1409,7.83
19,6,19,1331,7.39
20,6,20,1177,6.54
21,6,21,953,5.29
32,7,8,1429,7.94


In [18]:
df_level_1_grouped.groupby(['hour'])['rides_per_20_sec'].median()

hour
00    0.38
01    0.23
02    0.17
03    0.16
04    0.73
05    1.46
06    1.02
07    1.38
08    2.18
09    4.96
10    4.12
11    3.29
12    3.76
13    3.79
14    3.56
15    3.57
16    3.56
17    3.73
18    4.57
19    4.21
20    3.13
21    2.37
22    1.59
23    0.76
Name: rides_per_20_sec, dtype: float64

In [19]:
df_level_1_grouped.groupby(['hour'])['rides_per_20_sec'].mean()

hour
00    0.388
01    0.224
02    0.170
03    0.152
04    0.656
05    1.504
06    1.020
07    1.366
08    2.252
09    4.652
10    4.164
11    3.356
12    3.716
13    3.892
14    3.974
15    3.574
16    3.478
17    3.590
18    4.510
19    4.254
20    3.348
21    2.420
22    1.714
23    0.766
Name: rides_per_20_sec, dtype: float64

In [44]:
df_hex_high_demand_grouped.to_csv('demand_chennai.csv', index = False)

In [43]:
df_hex_high_demand_grouped = df_hex_high_demand.groupby(['customer_location_hex_8'])['count'].sum().reset_index()
df_hex_high_demand_grouped

Unnamed: 0,customer_location_hex_8,count
0,88618c4881fffff,1095
1,88618c4883fffff,3407
2,88618c4885fffff,3705
3,88618c4887fffff,1339
4,88618c4889fffff,2186
5,88618c488bfffff,1321
6,88618c488dfffff,369
7,88618c4891fffff,1183
8,88618c4893fffff,816
9,88618c4895fffff,1343
