## Load libraries and data

In [1]:
import pandas as pd 
import numpy as np
import datetime
import time
import geopandas as gpd

In [2]:
#load geofence
zipfile = "TMSUITE-Geofences-asof-0429 SHP.zip"
geofences = gpd.read_file(zipfile)
geofences.head()

Unnamed: 0,name,address,type,location,window_tim,window_t_1,group_ids,group_name,category,geofence_c,...,barangay,municipali,province,region,created,modified,client_ids,client_nam,store_code,geometry
0,City Supermarket - Market Square,"Inside CSI Market Square, Downtown District, D...",rectangle,"{'lat': 16.043410180944765, 'lon': 120.3362203...",1990-12-12 00:00:00,1990-12-12 01:00:00,"[343, 30]","['Nestle', 'Webcast']",Pickup/Dropoff,PHEC0000343131,...,Barangay II (Nueva),DAGUPAN CITY,PANGASINAN,REGION I (ILOCOS REGION),2020/06/10 10:42:55.000,2020/09/10 07:09:45.000,,,,"POLYGON ((120.33711 16.04253, 120.33533 16.042..."
1,Puregold Price Club - Agora,"1 N Domingo St.Cor F Blumentritt, Pedro Cruz, ...",polygon,"{'lat': 14.605119723980204, 'lon': 121.0232510...",1990-12-12 00:00:00,1990-12-12 01:00:00,"[343, 30]","['Nestle', 'Webcast']",Pickup/Dropoff,PHEC0002174857,...,Pedro Cruz,CITY OF SAN JUAN,"NCR, SECOND DISTRICT",NCR (NATIONAL CAPITAL REGION),2020/06/10 10:43:47.000,2020/07/30 06:59:55.000,,,,"POLYGON ((121.02351 14.60612, 121.02432 14.605..."
2,Ultra Mega Multi Sales - Kalookan,"163 Teofilo Samson Ave, Caloocan, 1420 Metro M...",rectangle,"{'lat': 14.739714110694091, 'lon': 121.0251580...",1990-12-12 00:00:00,1990-12-12 01:00:00,"[343, 30]","['Nestle', 'Webcast']",Pickup/Dropoff,PHEC0002168891,...,Barangay 168,CALOOCAN CITY,"NCR, THIRD DISTRICT",NCR (NATIONAL CAPITAL REGION),2020/06/10 10:43:47.000,2020/09/10 04:53:41.000,,,,"POLYGON ((121.02558 14.73925, 121.02473 14.739..."
3,Rustan's - Marikina,"27 Royal Palm St, Marikina, 1800 Metro Manila,...",rectangle,"{'lat': 14.6503079761005, 'lon': 121.116088176...",Invalid date,Invalid date,"[343, 30]","['Nestle', 'Webcast']",Pickup/Dropoff,PHEC0002598218,...,Marikina Heights (Concepcion),CITY OF MARIKINA,"NCR, SECOND DISTRICT",NCR (NATIONAL CAPITAL REGION),2020/06/10 10:43:55.000,2020/08/20 04:21:18.000,,,,"POLYGON ((121.11639 14.65008, 121.11579 14.650..."
4,Rustan's - Fairview,"6 General Aguinaldo Ave, Cubao, Quezon City, 1...",polygon,"{'lat': 14.622015808686214, 'lon': 121.0535702...",Invalid date,Invalid date,"[343, 30]","['Nestle', 'Webcast']",Pickup/Dropoff,PHEC0002578706,...,Socorro,QUEZON CITY,"NCR, SECOND DISTRICT",NCR (NATIONAL CAPITAL REGION),2020/06/10 10:43:51.000,2020/08/20 04:14:16.000,,,,"POLYGON ((121.05288 14.62244, 121.05419 14.622..."


In [3]:
## load data
gps_points = pd.read_csv('Nestle_March_Kodigo_GPS-Formatted.csv')
gps_points = gpd.GeoDataFrame(gps_points, geometry=gpd.points_from_xy(gps_points['longitude'], gps_points['latitude']))
gps_points.head()

Unnamed: 0,created,plateno,longitude,latitude,geometry
0,2021-03-09 13:30:57,WQC386,125.532698,11.193735,POINT (125.53270 11.19374)
1,2021-03-09 13:32:22,KAD4602,124.758065,8.520773,POINT (124.75807 8.52077)
2,2021-03-09 13:29:23,KAD4602,124.758065,8.520773,POINT (124.75807 8.52077)
3,2021-03-01 09:06:31,WQC386,124.607978,11.010027,POINT (124.60798 11.01003)
4,2021-03-09 13:30:48,WQC386,125.533362,11.195668,POINT (125.53336 11.19567)


In [4]:
gps_points.set_crs(epsg=4326, inplace=True)

Unnamed: 0,created,plateno,longitude,latitude,geometry
0,2021-03-09 13:30:57,WQC386,125.532698,11.193735,POINT (125.53270 11.19374)
1,2021-03-09 13:32:22,KAD4602,124.758065,8.520773,POINT (124.75807 8.52077)
2,2021-03-09 13:29:23,KAD4602,124.758065,8.520773,POINT (124.75807 8.52077)
3,2021-03-01 09:06:31,WQC386,124.607978,11.010027,POINT (124.60798 11.01003)
4,2021-03-09 13:30:48,WQC386,125.533362,11.195668,POINT (125.53336 11.19567)
...,...,...,...,...,...
149994,2021-03-21 09:18:06,KAD4602,124.650352,8.478460,POINT (124.65035 8.47846)
149995,2021-03-21 09:22:51,ABK1494,121.130035,14.206833,POINT (121.13004 14.20683)
149996,2021-03-21 09:23:03,KAD4602,124.650352,8.478460,POINT (124.65035 8.47846)
149997,2021-03-21 09:22:03,WQC386,123.925433,10.368815,POINT (123.92543 10.36881)


In [5]:
#make sure they have the same coordinate reference system
print(gps_points.crs)
print(geofences.crs)

epsg:4326
epsg:4326


In [6]:
### Spatial join
points_inside_geofence = gpd.sjoin(gps_points, geofences, how = 'inner', op = 'within')

In [7]:
points_inside_geofence = points_inside_geofence.loc[:, ['plateno', 'name','created_left']]
points_inside_geofence = points_inside_geofence.rename(columns = {'name' :'geofence_name','created_left':'datestamp'})

In [8]:
points_inside_geofence['datestamp'] = pd.to_datetime(points_inside_geofence['datestamp'])

In [9]:
data = points_inside_geofence.sort_values(by = 'datestamp')

## Generate CICO

In [10]:
data['datestamp'] = pd.to_datetime(data['datestamp'])
data.dtypes

plateno                  object
geofence_name            object
datestamp        datetime64[ns]
dtype: object

In [11]:
data['track_period'] = data['datestamp'].dt.floor('30T')

In [12]:
data['time_start'] = data.groupby(['plateno','geofence_name','track_period']).transform('min')
data['time_end'] = data.groupby(['plateno','geofence_name','track_period']).transform('max')['datestamp']
data['count'] = data.groupby(['plateno','geofence_name','track_period']).transform('count')['datestamp']
data['duration'] = (data['time_end'] - data['time_start']).dt.total_seconds()


In [13]:
data = data.loc[(data['count'] > 5) & (data['duration'] > 300), :]
data = data.sort_values(by = ['plateno','geofence_name','track_period'])
data = data.loc[:, ['plateno','geofence_name','track_period','time_start','time_end','duration','count']]

In [14]:
data.drop_duplicates(inplace = True)
data = data.sort_values(by = ['plateno','track_period'])
data = data.sort_values(by = ['plateno','geofence_name'])

In [15]:
data['time_end_prev_loc'] = data.groupby(['plateno','geofence_name'])['time_end'].shift(1)
data['time_start_next_loc'] = data.groupby(['plateno','geofence_name'])['time_start'].shift(-1)


In [16]:
data['time_diff_next_loc'] = (data['time_start_next_loc'] - data['time_end']).dt.total_seconds()
data['time_diff_prev_loc'] = (data['time_start'] - data['time_end_prev_loc']).dt.total_seconds()

In [17]:
data['continue_next_loc']= data['time_diff_next_loc'] < 300
data['continue_prev_loc'] = data['time_diff_prev_loc'] < 300
data['continuing'] = data['continue_prev_loc'] & data['continue_next_loc']

In [18]:
data = data.loc[(data['continuing'] == False), :]

In [19]:
data['lead_time_diff_prev_loc'] = data.groupby(['plateno','geofence_name'])['time_diff_prev_loc'].shift(-1)
data['lead_time_end'] = data.groupby(['plateno','geofence_name'])['time_end'].shift(-1)

In [20]:
c1 = data['time_end_prev_loc'].isnull()
c2 = data['lead_time_diff_prev_loc'] < 300
c3 = data['time_diff_next_loc'] <= 300

condition = ((c1 | c2) & c3)

data['actual_time_end'] = np.where(condition, data['lead_time_end'], data['time_end'])

In [21]:
data['actual_time_end'] = np.where(data['time_diff_next_loc'].isnull(), data['time_end'], data['actual_time_end'])

In [22]:
data = data.loc[data['actual_time_end'].notnull(),:]
data = data.loc[:, ['plateno','geofence_name','time_start','actual_time_end']]

In [23]:
data['duration'] = (data['actual_time_end'] - data['time_start']).dt.total_seconds()

In [24]:
data['ranking'] = data.groupby(['plateno','geofence_name','actual_time_end'])['duration'].rank(ascending = False)

In [25]:
data = data.loc[data['ranking'] != 2, :]

In [26]:
data = data.loc[:, ['plateno','geofence_name','time_start','actual_time_end','duration']]

In [27]:
data = data.rename(columns= {'time_start':'datestamp_entry', 'actual_time_end': 'datestamp_left','duration' : 'dwell_time'}).reset_index(drop=True)

In [28]:
data = data.sort_values(by = 'datestamp_entry').reset_index(drop=True)

In [29]:
data['dwell_time_hms'] = pd.to_datetime(data['dwell_time'], unit='s').dt.strftime("%H:%M:%S")

### Normalizing CICO

In [57]:
data = data.sort_values(by = ['plateno', 'datestamp_entry', 'geofence_name'])

In [58]:
data['prev_match'] = data['geofence_name'].eq(data['geofence_name'].shift(1))
data['next_match'] = data['geofence_name'].eq(data['geofence_name'].shift(-1))
data['continuing'] = np.where((data['prev_match']) & (data['next_match']), 1, 0)

data

Unnamed: 0,plateno,geofence_name,datestamp_entry,datestamp_left,dwell_time,dwell_time_hms,prev_match,next_match,continuing
0,ABK1494,BICOL EXCL'T SLS & TRADING-CALAMBA,2021-03-01 00:00:58,2021-03-01 07:27:49,26811.0,07:26:51,False,False,0
5,ABK1494,PH Batino Grocery - Destination,2021-03-01 07:34:01,2021-03-01 16:16:45,31364.0,08:42:44,False,False,0
4,ABK1494,PH Batino Grocery - Source,2021-03-01 07:34:01,2021-03-01 16:16:45,31364.0,08:42:44,False,False,0
16,ABK1494,BICOL EXCL'T SLS & TRADING-CALAMBA,2021-03-01 16:32:11,2021-03-02 03:47:18,40507.0,11:15:07,False,False,0
21,ABK1494,PUREGOLD PRICE CLUB - PARIAN JR,2021-03-02 03:32:06,2021-03-02 03:43:09,663.0,00:11:03,False,False,0
...,...,...,...,...,...,...,...,...,...
471,WQC386,PH Mandaue Grocery - Destination,2021-03-18 21:51:05,2021-03-19 03:06:32,18927.0,05:15:27,False,False,0
472,WQC386,PH Mandaue Grocery - Source,2021-03-18 21:52:06,2021-03-19 02:59:58,18472.0,05:07:52,False,False,0
507,WQC386,Puregold Price Club - Kananga,2021-03-20 01:43:11,2021-03-20 12:24:49,38498.0,10:41:38,False,False,0
545,WQC386,PH Mandaue Grocery - Destination,2021-03-21 08:38:04,2021-03-21 10:59:59,8515.0,02:21:55,False,False,0


In [59]:
data = data.loc[:, ['plateno', 'geofence_name','datestamp_entry', 'datestamp_left', 'dwell_time','dwell_time_hms', 'continuing']]
data = data.loc[data['continuing'] == 0]

In [60]:
data['has_same_next'] =  np.where(data['geofence_name'].eq(data['geofence_name'].shift(-1)), 1, 0)

In [61]:
data

Unnamed: 0,plateno,geofence_name,datestamp_entry,datestamp_left,dwell_time,dwell_time_hms,continuing,has_same_next
0,ABK1494,BICOL EXCL'T SLS & TRADING-CALAMBA,2021-03-01 00:00:58,2021-03-01 07:27:49,26811.0,07:26:51,0,0
5,ABK1494,PH Batino Grocery - Destination,2021-03-01 07:34:01,2021-03-01 16:16:45,31364.0,08:42:44,0,0
4,ABK1494,PH Batino Grocery - Source,2021-03-01 07:34:01,2021-03-01 16:16:45,31364.0,08:42:44,0,0
16,ABK1494,BICOL EXCL'T SLS & TRADING-CALAMBA,2021-03-01 16:32:11,2021-03-02 03:47:18,40507.0,11:15:07,0,0
21,ABK1494,PUREGOLD PRICE CLUB - PARIAN JR,2021-03-02 03:32:06,2021-03-02 03:43:09,663.0,00:11:03,0,0
...,...,...,...,...,...,...,...,...
471,WQC386,PH Mandaue Grocery - Destination,2021-03-18 21:51:05,2021-03-19 03:06:32,18927.0,05:15:27,0,0
472,WQC386,PH Mandaue Grocery - Source,2021-03-18 21:52:06,2021-03-19 02:59:58,18472.0,05:07:52,0,0
507,WQC386,Puregold Price Club - Kananga,2021-03-20 01:43:11,2021-03-20 12:24:49,38498.0,10:41:38,0,0
545,WQC386,PH Mandaue Grocery - Destination,2021-03-21 08:38:04,2021-03-21 10:59:59,8515.0,02:21:55,0,0


In [62]:
data['new_datestamp_left'] = np.where(data['has_same_next'] == 1, data['datestamp_left'].shift(-1), data['datestamp_left'])

In [63]:
data['to_remove'] = data['has_same_next'].shift(1) == 1
data = data.loc[data['to_remove'] == False, ['plateno', 'geofence_name', 'datestamp_entry','new_datestamp_left' ,'dwell_time', 'dwell_time_hms']]

In [64]:
data.rename(columns= {'new_datestamp_left' :'datestamp_left'}, inplace=True)

In [65]:
data['dwell_time'] = (data['datestamp_left'] - data['datestamp_entry']).dt.total_seconds()
data['dwell_time_hms'] = pd.to_datetime(data['dwell_time'], unit='s').dt.strftime("%H:%M:%S")
data.reset_index(drop = True, inplace = True)