# About

Now, we will try to perform performance estimate with different time shifts adjustments

In [1]:
import os
from utils.idGenerators import generate_point_id, generate_trip_id
import numpy as np
import pandas as pd
import logging
from shapely.geometry import Point
import plotly.express as px
import geopandas as gpd
import plotly.express as px
from multiprocessing import Pool
from scipy.stats import zscore
from GPSOdyssey import Polaris, Kepler, Void, Vega
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

# 1. Data

* Good RFID candidates
* RFID merge


### 1.1. GPS records

In [2]:
%%time
S_CLUSTERED_GPS = '/Data/Outputs/GPS_Clusters/'


vega = Vega(engine='pandas')
gps = vega.read_from_dir(directory=S_CLUSTERED_GPS,
                             file_extensions='.csv',
                             args={'parse_dates': ['time']},
                             concatenate=True, amt_in_parallel=6)

CPU times: user 953 ms, sys: 208 ms, total: 1.16 s
Wall time: 1.27 s


#### 1.1.1 Time shift experiments

In [3]:
def preprocess_gps(gps, time_shift='-02:00'):
    gps = Polaris(gps) \
        .select_columns(['truck_id', 'lon', 'lat', 'time', 'lon_match', 'lat_match', 'cluster_id']) \
        .pandas_df_operation(func_name='rename', arguments={'columns': {'truck_id': 'vehicle_id', 'time': 'datetime'}}) \
        .construct_datetime(datetime='datetime', offset=time_shift) \
        .remove_tz(time_col='datetime') \
        .add_date_col('datetime', 'date') \
        .add_time_col('datetime', 'time') \
        .add_unixtime('datetime', 'unixtime') \
        .df

    gps = generate_trip_id(gps)
    gps = generate_point_id(gps)
    gps['unixtime'] = gps['unixtime'].astype('int')
    return gps


is_after_2020_10_24 = gps['time'].dt.date.astype('str') > '2020-10-24'
gps_2h = gps[~is_after_2020_10_24]
gps_1h = gps[is_after_2020_10_24]


gps = pd.concat([
    preprocess_gps(gps_1h, time_shift='-01:00'),
    preprocess_gps(gps_2h, time_shift='-02:00')
]).reset_index(drop=True)

### 1.2. RFID

In [4]:
S_RFID_OCTOBER = '/Data/Source/RFID/RFID_october.csv'


rfid = pd.read_csv(S_RFID_OCTOBER)
rfid['is_rfid_active'] = True

RFID_COLUMNS = ['date', 'time', 'unixtime', 'vehicle_id',
                'RFID', 'is_rfid_active', 'Latitude', 'Longitude']
rfid = rfid.loc[:, RFID_COLUMNS]
rfid.columns = [str(x).lower() for x in rfid.columns]

rfid = rfid.sort_values(by='unixtime')
rfid['datetime'] = rfid['date'] + ' ' + rfid['time']


rfid = generate_trip_id(rfid)
rfid = generate_point_id(rfid)

# Select only suitalbe gps
JS_SE_TRIPS = gps[~gps['trip_id'].duplicated(keep='last')][['trip_id']]
rfid = rfid.merge(JS_SE_TRIPS, how='inner', on=['trip_id'])
rfid.head()

# REMOVE INCORRECT LOCATION RFID RECORD
rfid = rfid[rfid['point_id'] != '2020-10-02 16:38:12 XE-5629T']

Unnamed: 0,date,time,unixtime,vehicle_id,rfid,is_rfid_active,latitude,longitude,datetime,trip_id,point_id
0,2020-10-01,06:50:46,1601535000.0,XE-5629T,E28011057000020F9BDEA06B,True,1.343322,103.738312,2020-10-01 06:50:46,2020-10-01 XE-5629T,2020-10-01 06:50:46 XE-5629T
1,2020-10-01,07:10:25,1601536000.0,XE-5629T,E200001C2611006620802535,True,1.348958,103.751651,2020-10-01 07:10:25,2020-10-01 XE-5629T,2020-10-01 07:10:25 XE-5629T
2,2020-10-01,07:26:38,1601537000.0,XE-5629T,E28011057000020F9BDDFB4C,True,1.351008,103.747145,2020-10-01 07:26:38,2020-10-01 XE-5629T,2020-10-01 07:26:38 XE-5629T
3,2020-10-01,07:42:50,1601538000.0,XE-5629T,E28011057000020F9BDDBD3C,True,1.357562,103.748499,2020-10-01 07:42:50,2020-10-01 XE-5629T,2020-10-01 07:42:50 XE-5629T
4,2020-10-01,07:55:22,1601539000.0,XE-5629T,E28011057000020F9BDD9341,True,1.357328,103.751373,2020-10-01 07:55:22,2020-10-01 XE-5629T,2020-10-01 07:55:22 XE-5629T


### 1.3. RFID & GPS assignment output

In [5]:
I_GPS_RFID_ASSIGNMENT = '/Data/Intermediate/assignments/rfid_gps_clusters_assignments.csv'

# 2. Merge: RFID & clustered GPS

In [6]:
rfid['datetime'] = rfid['datetime'].astype('str')
gps['datetime'] = gps['datetime'].astype('str')

#### 2.1. Merge with ffill of GPS

In [7]:
# 1. Full outer join
merge_columns = ['vehicle_id', 'datetime']
gps_m = gps.merge(rfid, on=merge_columns, how='outer', suffixes=['_gps', '_rfid'])

# 2. Sort values
gps_m.sort_values(merge_columns, inplace=True)

# 3. Fillna
gps_columns = list(list(gps_m.loc[:,:'point_id_gps'].columns))
gps_m[gps_columns] = gps_m[gps_columns].fillna(method='ffill')

# 3. Metrics computing

* how many and percentage RFID activations did not get captured in any cluster?
* how many and percentage RFID activations get captured in clusters?

In [8]:
rfid_activations = gps_m.loc[gps_m['is_rfid_active'] == True,
                             ['vehicle_id', 'date_gps', 'time_gps', 'cluster_id', 'rfid', 'date_rfid', 'time_rfid']]

rfid_activations.to_csv(I_GPS_RFID_ASSIGNMENT, index=False)

In [9]:
pd.DataFrame({
    'RFID_AMT_captured_in_cluster': [(rfid_activations['cluster_id'] != -1).sum()],
    'RFID_AMT_not_captured_in_cluster': (rfid_activations['cluster_id'] == -1).sum(),
    'RFID_AMT_total': rfid_activations.shape[0],
    'RFID_PERC_captured_in_cluster': (rfid_activations['cluster_id'] != -1).sum() / rfid_activations.shape[0],
    'RFID_PERC_not_captured_in_cluster': (rfid_activations['cluster_id'] == -1).sum() / rfid_activations.shape[0],
})

Unnamed: 0,RFID_AMT_captured_in_cluster,RFID_AMT_not_captured_in_cluster,RFID_AMT_total,RFID_PERC_captured_in_cluster,RFID_PERC_not_captured_in_cluster
0,2164,9,2173,0.995858,0.004142


# 4. RFID activations visualization

In [10]:
MAP_CONFIG = 'MAP_CONFIGS/06_RFID_MERGE_ANALYSIS_JS_SE.json'

kepler = Kepler({'gps': gps_m[gps_m['is_rfid_active'] == True].reset_index()},
                config_path=MAP_CONFIG,
                height=800)
               
kepler.render_kepler_map()
kepler.get_rendered_map()

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [{'dataId': ['gps'], 'id': 'amfe018bq', '…

In [11]:
# kepler.save_map_config(path=MAP_CONFIG, overwrite_config=True)