# About

Now, we will try to perform performance estimate with different time shifts adjustments

In [1]:
import os
from utils.idGenerators import generate_point_id, generate_trip_id
import numpy as np
import pandas as pd
import logging
from shapely.geometry import Point
import plotly.express as px
import geopandas as gpd
import plotly.express as px
from multiprocessing import Pool
from scipy.stats import zscore
from GPSOdyssey import Polaris, Kepler, Void, Vega
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

# 1. Data

* Good RFID candidates
* RFID merge


### 1.1. GPS records

In [2]:
%%time
S_CLUSTERED_GPS = '/Data/Outputs/GPS_Clusters/'


vega = Vega(engine='pandas')
gps = vega.read_from_dir(directory=S_CLUSTERED_GPS,
                             file_extensions='.csv',
                             args={'parse_dates': ['time']},
                             concatenate=True, amt_in_parallel=6)

CPU times: user 746 ms, sys: 225 ms, total: 971 ms
Wall time: 1.13 s


#### 1.1.1 Time shift experiments

In [3]:
def preprocess_gps(gps, time_shift='-02:00'):
    gps = Polaris(gps) \
        .select_columns(['truck_id', 'lon', 'lat', 'time', 'lon_match', 'lat_match', 'cluster_id']) \
        .pandas_df_operation(func_name='rename', arguments={'columns': {'truck_id': 'vehicle_id', 'time': 'datetime'}}) \
        .construct_datetime(datetime='datetime', offset=time_shift) \
        .remove_tz(time_col='datetime') \
        .add_date_col('datetime', 'date') \
        .add_time_col('datetime', 'time') \
        .add_unixtime('datetime', 'unixtime') \
        .df

    gps = generate_trip_id(gps)
    gps = generate_point_id(gps)
    gps['unixtime'] = gps['unixtime'].astype('int')
    return gps


is_after_2020_10_24 = gps['time'].dt.date.astype('str') > '2020-10-24'
gps_2h = gps[~is_after_2020_10_24]
gps_1h = gps[is_after_2020_10_24]


gps = pd.concat([
    preprocess_gps(gps_1h, time_shift='-01:00'),
    preprocess_gps(gps_2h, time_shift='-02:00')
]).reset_index(drop=True)

### 1.2. RFID

In [4]:
S_RFID_OCTOBER = '/Data/Source/RFID/RFID_october.csv'


rfid = pd.read_csv(S_RFID_OCTOBER)
rfid['is_rfid_active'] = True

RFID_COLUMNS = ['date', 'time', 'unixtime', 'vehicle_id',
                'RFID', 'is_rfid_active', 'Latitude', 'Longitude']
rfid = rfid.loc[:, RFID_COLUMNS]
rfid.columns = [str(x).lower() for x in rfid.columns]

rfid = rfid.sort_values(by='unixtime')
rfid['datetime'] = rfid['date'] + ' ' + rfid['time']


rfid = generate_trip_id(rfid)
rfid = generate_point_id(rfid)

# Select only suitalbe gps
JS_SE_TRIPS = gps[~gps['trip_id'].duplicated(keep='last')][['trip_id']]
rfid = rfid.merge(JS_SE_TRIPS, how='inner', on=['trip_id'])

# REMOVE INCORRECT LOCATION RFID RECORD
rfid = rfid[rfid['point_id'] != '2020-10-02 16:38:12 XE-5629T']

### 1.3. RFID & GPS assignment output

In [5]:
I_GPS_RFID_ASSIGNMENT = '/Data/Intermediate/assignments/rfid_gps_clusters_assignments.csv'

# 2. Merge: RFID & clustered GPS

In [6]:
rfid['datetime'] = rfid['datetime'].astype('str')
gps['datetime'] = gps['datetime'].astype('str')

#### 2.1. Merge with ffill of GPS

In [7]:
# 1. Full outer join
merge_columns = ['vehicle_id', 'datetime']
gps_m = gps.merge(rfid, on=merge_columns, how='outer', suffixes=['_gps', '_rfid'])

# 2. Sort values
gps_m.sort_values(merge_columns, inplace=True)

# 3. Fillna
gps_columns = list(list(gps_m.loc[:,:'point_id_gps'].columns))
gps_m[gps_columns] = gps_m[gps_columns].fillna(method='ffill')

# 3. RFID activations visualization

In [8]:
gps_m['vehicle_id'].unique()

array(['XE-5577J', 'XE-5629T', 'XE-5638S'], dtype=object)

In [9]:
gps_m.columns

Index(['vehicle_id', 'lon', 'lat', 'datetime', 'lon_match', 'lat_match',
       'cluster_id', 'date_gps', 'time_gps', 'unixtime_gps', 'trip_id_gps',
       'point_id_gps', 'date_rfid', 'time_rfid', 'unixtime_rfid', 'rfid',
       'is_rfid_active', 'latitude', 'longitude', 'trip_id_rfid',
       'point_id_rfid'],
      dtype='object')

### 3.1. Aggregation of cluster coordinate means & back merge

In [10]:
cluster_group_cols = ['trip_id_gps', 'cluster_id']
gps_clusters_agg = gps_m[gps_m['cluster_id'] != -1]
gps_clusters_agg = gps_clusters_agg.groupby(cluster_group_cols).agg(cluster_lon_mean=('lon_match', 'mean'),
                                                                    cluster_lat_mean=('lat_match', 'mean'),
                                                                    rfid_amt_in_cluster=('is_rfid_active', 'sum')
                                                                   ).reset_index()
gps_m = gps_m.merge(gps_clusters_agg, on=cluster_group_cols, how='left')

In [11]:
gps_m['cluster'] = 'cluster: ' + gps_m['cluster_id'].astype('str')

In [12]:
MAP_CONFIG = 'MAP_CONFIGS/09_RFID_GPS_CLUSTER_VISUALIZATION.json'


kepler = Kepler({'gps': gps_m},
                config_path=MAP_CONFIG,
                height=1000)


kepler.render_kepler_map()
kepler.get_rendered_map()

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [{'dataId': ['gps'], 'id': '1ffcx43y5', '…

In [None]:
# kepler.save_map_to_html_local(path='RFID_GPS_CLUSTER_VISUALIZATION.html', use_existing_render=True)

In [14]:
kepler.save_map_config(path=MAP_CONFIG, overwrite_config=True)

