## Load libraries and data

In [32]:
import pandas as pd 
import numpy as np
import datetime
import time
import geopandas as gpd

In [33]:
#load geofence data
zipfile = "TMSUITE-Geofences-asof-0429 SHP.zip"
geofences = gpd.read_file(zipfile)
geofences.head()

Unnamed: 0,name,address,type,location,window_tim,window_t_1,group_ids,group_name,category,geofence_c,...,barangay,municipali,province,region,created,modified,client_ids,client_nam,store_code,geometry
0,City Supermarket - Market Square,"Inside CSI Market Square, Downtown District, D...",rectangle,"{'lat': 16.043410180944765, 'lon': 120.3362203...",1990-12-12 00:00:00,1990-12-12 01:00:00,"[343, 30]","['Nestle', 'Webcast']",Pickup/Dropoff,PHEC0000343131,...,Barangay II (Nueva),DAGUPAN CITY,PANGASINAN,REGION I (ILOCOS REGION),2020/06/10 10:42:55.000,2020/09/10 07:09:45.000,,,,"POLYGON ((120.33711 16.04253, 120.33533 16.042..."
1,Puregold Price Club - Agora,"1 N Domingo St.Cor F Blumentritt, Pedro Cruz, ...",polygon,"{'lat': 14.605119723980204, 'lon': 121.0232510...",1990-12-12 00:00:00,1990-12-12 01:00:00,"[343, 30]","['Nestle', 'Webcast']",Pickup/Dropoff,PHEC0002174857,...,Pedro Cruz,CITY OF SAN JUAN,"NCR, SECOND DISTRICT",NCR (NATIONAL CAPITAL REGION),2020/06/10 10:43:47.000,2020/07/30 06:59:55.000,,,,"POLYGON ((121.02351 14.60612, 121.02432 14.605..."
2,Ultra Mega Multi Sales - Kalookan,"163 Teofilo Samson Ave, Caloocan, 1420 Metro M...",rectangle,"{'lat': 14.739714110694091, 'lon': 121.0251580...",1990-12-12 00:00:00,1990-12-12 01:00:00,"[343, 30]","['Nestle', 'Webcast']",Pickup/Dropoff,PHEC0002168891,...,Barangay 168,CALOOCAN CITY,"NCR, THIRD DISTRICT",NCR (NATIONAL CAPITAL REGION),2020/06/10 10:43:47.000,2020/09/10 04:53:41.000,,,,"POLYGON ((121.02558 14.73925, 121.02473 14.739..."
3,Rustan's - Marikina,"27 Royal Palm St, Marikina, 1800 Metro Manila,...",rectangle,"{'lat': 14.6503079761005, 'lon': 121.116088176...",Invalid date,Invalid date,"[343, 30]","['Nestle', 'Webcast']",Pickup/Dropoff,PHEC0002598218,...,Marikina Heights (Concepcion),CITY OF MARIKINA,"NCR, SECOND DISTRICT",NCR (NATIONAL CAPITAL REGION),2020/06/10 10:43:55.000,2020/08/20 04:21:18.000,,,,"POLYGON ((121.11639 14.65008, 121.11579 14.650..."
4,Rustan's - Fairview,"6 General Aguinaldo Ave, Cubao, Quezon City, 1...",polygon,"{'lat': 14.622015808686214, 'lon': 121.0535702...",Invalid date,Invalid date,"[343, 30]","['Nestle', 'Webcast']",Pickup/Dropoff,PHEC0002578706,...,Socorro,QUEZON CITY,"NCR, SECOND DISTRICT",NCR (NATIONAL CAPITAL REGION),2020/06/10 10:43:51.000,2020/08/20 04:14:16.000,,,,"POLYGON ((121.05288 14.62244, 121.05419 14.622..."


## Get points inside geofences

In [34]:
## load data
gps_points = pd.read_csv('Nestle_April_Kodigo_GPS-Formatted-v2.csv')
gps_points = gpd.GeoDataFrame(gps_points, geometry=gpd.points_from_xy(gps_points['longitude'], gps_points['latitude']))
gps_points.head()

Unnamed: 0,created,plateno,longitude,latitude,geometry
0,2021-04-06 03:19:10,KAD4602,124.758758,8.521865,POINT (124.75876 8.52187)
1,2021-04-11 10:59:15,KAD4602,124.758278,8.520842,POINT (124.75828 8.52084)
2,2021-04-21 10:28:54,KAD4602,124.758073,8.520575,POINT (124.75807 8.52057)
3,2021-04-07 12:41:40,KAD4602,124.758327,8.520905,POINT (124.75833 8.52091)
4,2021-04-06 03:22:08,KAD4602,124.758758,8.521865,POINT (124.75876 8.52187)


In [35]:
# set coordinate referece system to 4326
gps_points.set_crs(epsg=4326, inplace=True)

Unnamed: 0,created,plateno,longitude,latitude,geometry
0,2021-04-06 03:19:10,KAD4602,124.758758,8.521865,POINT (124.75876 8.52187)
1,2021-04-11 10:59:15,KAD4602,124.758278,8.520842,POINT (124.75828 8.52084)
2,2021-04-21 10:28:54,KAD4602,124.758073,8.520575,POINT (124.75807 8.52057)
3,2021-04-07 12:41:40,KAD4602,124.758327,8.520905,POINT (124.75833 8.52091)
4,2021-04-06 03:22:08,KAD4602,124.758758,8.521865,POINT (124.75876 8.52187)
...,...,...,...,...,...
58070,2021-04-01 02:08:32,KAD4602,124.758870,8.522120,POINT (124.75887 8.52212)
58071,2021-04-01 02:07:32,KAD4602,124.758870,8.522120,POINT (124.75887 8.52212)
58072,2021-04-01 02:13:33,KAD4602,124.758870,8.522120,POINT (124.75887 8.52212)
58073,2021-04-01 02:12:32,KAD4602,124.758870,8.522120,POINT (124.75887 8.52212)


In [36]:
#make sure they have the same coordinate reference system
print(gps_points.crs)
print(geofences.crs)

epsg:4326
epsg:4326


In [37]:
### Spatial join
points_inside_geofence = gpd.sjoin(gps_points, geofences, how = 'inner', op = 'within')

In [38]:
# select and rename columns
points_inside_geofence = points_inside_geofence.loc[:, ['plateno', 'name','created_left']]
points_inside_geofence = points_inside_geofence.rename(columns = {'name' :'geofence_name','created_left':'datestamp'})

In [39]:
# convert datestamp
points_inside_geofence['datestamp'] = pd.to_datetime(points_inside_geofence['datestamp'])

In [40]:
# sort values by datestamp
data = points_inside_geofence.sort_values(by = 'datestamp')

### Final data of points inside geofence

In [41]:
data

Unnamed: 0,plateno,geofence_name,datestamp
57716,KAD4602,PH Cagayan Grocery - Source,2021-04-01 00:00:33
57716,KAD4602,PH Cagayan Grocery - Destination,2021-04-01 00:00:33
58026,KAD4602,PH Cagayan Grocery - Source,2021-04-01 00:01:33
58026,KAD4602,PH Cagayan Grocery - Destination,2021-04-01 00:01:33
57613,KAD4602,PH Cagayan Grocery - Destination,2021-04-01 00:02:33
...,...,...,...
54354,KAD4602,PH Cagayan Grocery - Source,2021-04-30 16:15:37
54360,KAD4602,PH Cagayan Grocery - Source,2021-04-30 16:15:43
54360,KAD4602,PH Cagayan Grocery - Destination,2021-04-30 16:15:43
54352,KAD4602,PH Cagayan Grocery - Source,2021-04-30 16:15:49


In [43]:
# export to csv
data.to_csv('points_inside_geofences.csv', index=False)

## Generate CICO

In [11]:
# make sure datestamp column is a datestamp object
data['datestamp'] = pd.to_datetime(data['datestamp'])
data.dtypes

plateno                  object
geofence_name            object
datestamp        datetime64[ns]
dtype: object

In [12]:
# create a tracking period of 30 mins
data['track_period'] = data['datestamp'].dt.floor('30T')

In [13]:
# groupby and get timestart, end, count, duration
data['time_start'] = data.groupby(['plateno','geofence_name','track_period']).transform('min')
data['time_end'] = data.groupby(['plateno','geofence_name','track_period']).transform('max')['datestamp']
data['count'] = data.groupby(['plateno','geofence_name','track_period']).transform('count')['datestamp']
data['duration'] = (data['time_end'] - data['time_start']).dt.total_seconds()


In [14]:
# filter those that are less than 5 points and 5 mins
#data = data.loc[(data['count'] > 5) & (data['duration'] > 300), :]

# sort by plateno, geofence, and track period
data = data.sort_values(by = ['plateno','geofence_name','track_period'])
data = data.loc[:, ['plateno','geofence_name','track_period','time_start','time_end','duration','count']]

In [15]:
# remove duplicates and sort values
data.drop_duplicates(inplace = True)
data = data.sort_values(by = ['plateno','track_period'])
data = data.sort_values(by = ['plateno','geofence_name'])

In [16]:
# get time end at prev loc and time at next loc
data['time_end_prev_loc'] = data.groupby(['plateno','geofence_name'])['time_end'].shift(1)
data['time_start_next_loc'] = data.groupby(['plateno','geofence_name'])['time_start'].shift(-1)


In [17]:
#get time diff from previous and next locations
data['time_diff_next_loc'] = (data['time_start_next_loc'] - data['time_end']).dt.total_seconds()
data['time_diff_prev_loc'] = (data['time_start'] - data['time_end_prev_loc']).dt.total_seconds()

In [18]:
# check if continuing
# continuing if the timestamp from previous and next location is less than 5 mins
data['continue_next_loc']= data['time_diff_next_loc'] < 300
data['continue_prev_loc'] = data['time_diff_prev_loc'] < 300
data['continuing'] = data['continue_prev_loc'] & data['continue_next_loc']

In [19]:
#filter not continuing CICO
data = data.loc[(data['continuing'] == False), :]

In [20]:
# adjust for time end
data['lead_time_diff_prev_loc'] = data.groupby(['plateno','geofence_name'])['time_diff_prev_loc'].shift(-1)
data['lead_time_end'] = data.groupby(['plateno','geofence_name'])['time_end'].shift(-1)

In [21]:
# comppute for actual time end
c1 = data['time_end_prev_loc'].isnull()
c2 = data['lead_time_diff_prev_loc'] < 300
c3 = data['time_diff_next_loc'] <= 300

condition = ((c1 | c2) & c3)

data['actual_time_end'] = np.where(condition, data['lead_time_end'], data['time_end'])

data['actual_time_end'] = np.where(data['time_diff_next_loc'].isnull(), data['time_end'], data['actual_time_end'])

In [22]:
# filter those with actual time and select relevant columns
data = data.loc[data['actual_time_end'].notnull(),:]
data = data.loc[:, ['plateno','geofence_name','time_start','actual_time_end']]

In [23]:
# compute for dwell time
data['duration'] = (data['actual_time_end'] - data['time_start']).dt.total_seconds()

In [24]:
# remove cico that were used as a reference to adjust time
data['ranking'] = data.groupby(['plateno','geofence_name','actual_time_end'])['duration'].rank(ascending = False)
data = data.loc[data['ranking'] != 2, :]

In [25]:
# select columns
data = data.loc[:, ['plateno','geofence_name','time_start','actual_time_end','duration']]

In [26]:
# rename columns
data = data.rename(columns= {'time_start':'datestamp_entry', 'actual_time_end': 'datestamp_left','duration' : 'dwell_time'}).reset_index(drop=True)

In [27]:
# sory by datestamp
data = data.sort_values(by = 'datestamp_entry').reset_index(drop=True)

In [28]:
# add dwell time on HMS
data['dwell_time_hms'] = pd.to_datetime(data['dwell_time'], unit='s').dt.strftime("%H:%M:%S")

### Final CICO data

In [29]:
data = data.reset_index().rename(columns= {'index' : 'cico_id'})

In [30]:
data

Unnamed: 0,cico_id,plateno,geofence_name,datestamp_entry,datestamp_left,dwell_time,dwell_time_hms
0,0,KAD4602,PH Cagayan Grocery - Source,2021-04-01 00:00:33,2021-04-03 07:39:30,200337.0,07:38:57
1,1,KAD4602,PH Cagayan Grocery - Destination,2021-04-01 00:00:33,2021-04-03 07:39:30,200337.0,07:38:57
2,2,KAD4602,Robinson's Supermarket - Gusa,2021-04-03 08:15:54,2021-04-03 09:43:29,5255.0,01:27:35
3,3,KAD4602,Puregold Price Club - Cagayan De,2021-04-03 09:49:15,2021-04-03 09:51:09,114.0,00:01:54
4,4,KAD4602,McDonald's CDO Limketkai,2021-04-03 09:50:54,2021-04-03 09:51:24,30.0,00:00:30
...,...,...,...,...,...,...,...
354,354,KAD4602,McDonald's CDO Ororama,2021-04-30 11:23:17,2021-04-30 11:23:29,12.0,00:00:12
355,355,KAD4602,Gaisano City - JR Borja,2021-04-30 11:24:38,2021-04-30 11:24:38,0.0,00:00:00
356,356,KAD4602,Robinson's Supermarket - Gusa,2021-04-30 11:38:18,2021-04-30 11:38:18,0.0,00:00:00
357,357,KAD4602,PH Cagayan Grocery - Destination,2021-04-30 14:33:07,2021-04-30 16:15:49,6162.0,01:42:42


In [31]:
data.to_csv('KAD4602_ver2_python.csv', index=False)

## Normalizing CICO

In [34]:
# sort by plateno and datestamp entry
data = data.sort_values(by = ['plateno', 'datestamp_entry', 'geofence_name'])

In [35]:
# check if continuing
data['prev_match'] = data['geofence_name'].eq(data['geofence_name'].shift(1))
data['next_match'] = data['geofence_name'].eq(data['geofence_name'].shift(-1))
data['continuing'] = np.where((data['prev_match']) & (data['next_match']), 1, 0)

In [36]:
# select relevant columns and those that are not continuing
data = data.loc[data['continuing'] == 0, ['plateno', 'geofence_name','datestamp_entry', 'datestamp_left', 'dwell_time','dwell_time_hms', 'continuing']]

In [37]:
#check if same next
data['has_same_next'] =  np.where(data['geofence_name'].eq(data['geofence_name'].shift(-1)), 1, 0)

In [38]:
# adjust datestamp left
data['new_datestamp_left'] = np.where(data['has_same_next'] == 1, data['datestamp_left'].shift(-1), data['datestamp_left'])

In [39]:
# remove those with same next
data['to_remove'] = data['has_same_next'].shift(1) == 1
data = data.loc[data['to_remove'] == False, ['plateno', 'geofence_name', 'datestamp_entry','new_datestamp_left' ,'dwell_time', 'dwell_time_hms']]

In [40]:
data.rename(columns= {'new_datestamp_left' :'datestamp_left'}, inplace=True)

In [41]:
data['dwell_time'] = (data['datestamp_left'] - data['datestamp_entry']).dt.total_seconds()
data['dwell_time_hms'] = pd.to_datetime(data['dwell_time'], unit='s').dt.strftime("%H:%M:%S")
data.reset_index(drop = True, inplace = True)

### Final normalized data

In [43]:
data.to_csv('normalized_cico.csv', index=False)