# About
In notebook 04, we've found that time shifts are different for different type of trucks.
So, in this notebook i'll try to detect time shifts for different trucks and routes

In [1]:
import os
import numpy as np
import pandas as pd
import logging
from shapely.geometry import Point
import plotly.express as px
import geopandas as gpd
import plotly.express as px
from multiprocessing import Pool
from scipy.stats import zscore
from GPSOdyssey import Polaris, Kepler, Void, Vega
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

## Time shifts

* RFID: by default client provides time shifts in +08:00
* GPS: Different for different trucks

In [2]:
def generate_trip_id(gps):
    gps['trip_id'] = gps['date'].astype('str') + ' ' + gps['vehicle_id']
    return gps

def generate_point_id(gps):
    gps['point_id'] = gps['datetime'].astype('str') + ' ' + gps['vehicle_id']
    return gps

## Data

### Map matching report

In [3]:
map_matching_report = pd.read_csv('/Data/Intermediate/MapMatchingReports/MapMatching_october2020.csv')

map_matching_report = Polaris(map_matching_report) \
    .pandas_df_operation(func_name='rename', arguments={'columns': {'truck_id': 'vehicle_id'}}).df

map_matching_report['vehicle_id'] = map_matching_report['vehicle_id'].str.replace(' ', '-')

### 1. Route vehicle assignment

In [5]:
I_ROUTE_VEHICLE_ASSIGNMENT = '/Data/Intermediate/assignments/route_vehicle_assignments_october.csv'

route_vehic_assign = pd.read_csv(I_ROUTE_VEHICLE_ASSIGNMENT)
route_vehic_assign = route_vehic_assign[route_vehic_assign['route_type'] == 'JS-SE'] \
    .groupby(['route_type']).agg({'vehicle_id': 'unique'}).reset_index()

route_vehic_assign = route_vehic_assign.explode('vehicle_id')
route_vehic_assign

Unnamed: 0,route_type,vehicle_id
0,JS-SE,XE-5638S
0,JS-SE,XE-5577J
0,JS-SE,XE-5629T


### 2. RFID

In [5]:
S_RFID_OCTOBER = '/Data/Source/RFID/RFID_october.csv'


rfid = pd.read_csv(S_RFID_OCTOBER)
rfid['is_rfid_active'] = True

RFID_COLUMNS = ['date', 'time', 'unixtime', 'vehicle_id',
                'RFID', 'is_rfid_active', 'Latitude', 'Longitude']
rfid = rfid.loc[:, RFID_COLUMNS]
rfid.columns = [str(x).lower() for x in rfid.columns]

rfid = rfid.sort_values(by='unixtime')
rfid['datetime'] = rfid['date'] + ' ' + rfid['time']


rfid = generate_trip_id(rfid)
rfid = generate_point_id(rfid)

In [6]:
# Select all RFID of JS SE
rfid = rfid.merge(route_vehic_assign[['vehicle_id']],
           on='vehicle_id', how='inner')

### 3. GPS

In [7]:
map_matching_report = map_matching_report.merge(route_vehic_assign[['vehicle_id']],
                          on=['vehicle_id'], how='inner')

In [8]:
%%time
S_CLUSTERED_GPS = '/Data/Source/OctoberGPS/'


vega = Vega(engine='pandas')
gps = vega.read_from_dir(directory=S_CLUSTERED_GPS,
                         file_extensions='.csv',
                         args={'parse_dates': ['time']},
                         concatenate=True, amt_in_parallel=6,
                         filenames=map_matching_report['csv_file'])


gps = Polaris(gps) \
    .select_columns(['truck_id', 'lon', 'lat', 'time', 'lon_match', 'lat_match']) \
    .pandas_df_operation(func_name='rename', arguments={'columns': {'truck_id': 'vehicle_id', 'time': 'datetime'}}) \
    .construct_datetime(datetime='datetime', offset='-02:00') \
    .remove_tz(time_col='datetime') \
    .add_date_col('datetime', 'date') \
    .add_time_col('datetime', 'time') \
    .add_unixtime('datetime', 'unixtime') \
    .df

gps = generate_trip_id(gps)
gps = generate_point_id(gps)
gps['unixtime'] = gps['unixtime'].astype('int')

CPU times: user 11.8 s, sys: 348 ms, total: 12.1 s
Wall time: 12.3 s


# 4. RFID -> GPS

In [9]:
gps['datetime'] = gps['datetime'].astype('str')
rfid['datetime'] = rfid['datetime'].astype('str')

In [10]:
gps_m = gps.merge(rfid[['rfid', 'vehicle_id', 'datetime', 'unixtime',
                        'is_rfid_active', 'latitude', 'longitude']],
                  on=['vehicle_id', 'datetime'], how='outer')

gps_m['is_overlap'] = gps_m['lat_match'].isna()
gps_m.sort_values(['vehicle_id', 'datetime'], inplace=True)

cols2fill = ['lon', 'lat', 'time', 'vehicle_id', 'date', 'datetime', 'time',
             'unixtime_x', 'lon_match', 'lat_match', 'trip_id', 'point_id']
gps_m[cols2fill] = gps_m[cols2fill].fillna(method='ffill')

gps_m['is_rfid_active'] = ~gps_m['rfid'].isna()

In [11]:
gps_m[['latitude', 'longitude']] = gps_m[['latitude', 'longitude']].astype('float', errors='ignore')

In [12]:
gps_m = gps_m[~gps_m['rfid'].isna()]


In [13]:
gps

Unnamed: 0,vehicle_id,lon,lat,datetime,lon_match,lat_match,date,time,unixtime,trip_id,point_id
0,XE 5638S,103.648211,1.330482,2020-10-30 08:16:07,103.648069,1.33034,2020-10-30,08:16:07,1604045767,2020-10-30 XE 5638S,2020-10-30 08:16:07 XE 5638S
1,XE 5638S,103.648185,1.330485,2020-10-30 08:16:12,103.648069,1.33034,2020-10-30,08:16:12,1604045772,2020-10-30 XE 5638S,2020-10-30 08:16:12 XE 5638S
2,XE 5638S,103.648161,1.330488,2020-10-30 08:16:17,103.648069,1.33034,2020-10-30,08:16:17,1604045777,2020-10-30 XE 5638S,2020-10-30 08:16:17 XE 5638S
3,XE 5638S,103.648122,1.330515,2020-10-30 08:16:23,103.648069,1.33034,2020-10-30,08:16:23,1604045783,2020-10-30 XE 5638S,2020-10-30 08:16:23 XE 5638S
4,XE 5638S,103.648069,1.330520,2020-10-30 08:16:29,103.647979,1.33043,2020-10-30,08:16:29,1604045789,2020-10-30 XE 5638S,2020-10-30 08:16:29 XE 5638S
...,...,...,...,...,...,...,...,...,...,...,...
630507,XE 5629T,103.648240,1.330482,2020-10-05 17:55:40,103.648289,1.33012,2020-10-05,17:55:40,1601920540,2020-10-05 XE 5629T,2020-10-05 17:55:40 XE 5629T
630508,XE 5629T,103.648241,1.330485,2020-10-05 17:55:46,103.648289,1.33012,2020-10-05,17:55:46,1601920546,2020-10-05 XE 5629T,2020-10-05 17:55:46 XE 5629T
630509,XE 5629T,103.648241,1.330487,2020-10-05 17:55:52,103.648289,1.33012,2020-10-05,17:55:52,1601920552,2020-10-05 XE 5629T,2020-10-05 17:55:52 XE 5629T
630510,XE 5629T,103.648241,1.330490,2020-10-05 17:55:58,103.648289,1.33012,2020-10-05,17:55:58,1601920558,2020-10-05 XE 5629T,2020-10-05 17:55:58 XE 5629T


# 4. Visualization

In [14]:
MAP_CONF_PATH = 'MAP_CONFIGS/05_TIME_SHIFT_ANALYSIS_1.json'

kepler = Kepler(data={'rfid': rfid, 'gps': gps}, height=800,
                config_path=MAP_CONF_PATH)
kepler.render_kepler_map()
kepler.get_rendered_map()

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [{'dataId': ['rfid'], 'id': 'dqcbs7kgb', …

In [15]:
kepler.save_map_config(path=MAP_CONF_PATH, overwrite_config=True)



In [16]:
# kepler.save_map_to_html_local('time_shifts_of_same_vehicle.html', overwrite_html=True)

# 5. Time shifts

RFID - GPS

In [17]:
route_vehic_assign

Unnamed: 0,route_type,vehicle_id
0,JS-SE,XE-5638S
0,JS-SE,XE-5577J
0,JS-SE,XE-5629T


In [18]:
{
    '2020-10-01 XE-5577J': '-02:00',
    '2020-10-01 XE-5629T': '-02:00',
    '2020-10-01 XE 5638S': '-02:00',
    '2020-10-02 XE 5577J': '-02:00',
}

SyntaxError: EOL while scanning string literal (<ipython-input-18-39a1d34b5381>, line 5)

# Summary

Visual detection is not the way of find time shifts.
Takes extremely much time.