# Mobility data converter

## Import modules

In [1]:
import sys
sys.path.insert(1, '../src/utils/')
from my_utils import *

import pandas as pd
import numpy as np

## Introduction

The goal is to convert data into the following format:

In [2]:
df_example = pd.read_csv('../data/processed/example_5nodes.csv')
df_example.head(2)

Unnamed: 0,timestamp,node_id,latitude,longitude,neighbours,rsu
0,1423807260,101,40.641606,-8.652702,[{'node_id': '102'| 'rssi': '46'| 'rsu': 0}],1
1,1423807260,102,40.641211,-8.653796,[{'node_id': '101'| 'rssi': '46'| 'rsu': 1}|{'...,0


## Variables

In [3]:
# columns from the original dataframe
cols = ['time_index', 'entity_id', 'location', 'speed', 'heading', 'altitude', 'rssi', 'obu_id',
        'road', 'class', 'test', 'receiverid', 'receivertype', 'stationid', 'stationtype',
        'semimajorconf', 'semiminorconf', 'semimajororient', 'altitudeconf', 'headingconf',
        'speedconf', 'vehiclerole', 'drivedirection', 'length', 'width', 'acceleration',
        'accelerationconf', 'curvature', 'curvatureconf', 'yawrate', 'yawrateconf', 'brakepedal',
        'gaspedal', 'emergencybrake', 'collisionwarning', 'accengaged', 'cruisecontrol',
        'speedlimiter', 'specialvehicle']

# selected features
features = ['time_index', 'entity_id', 'location', 'rssi', 'stationid']

In [4]:
# selected id's
rsu_ids = ['p3', 'p5', 'p6', 'p26', 'p19']
obu_ids = [50, 51, 52, 60, 86, 87, 89, 90, 97, 99]

In [5]:
# converts short id's to long id's
rsu_ids = build_posts_ids(rsu_ids)
rsu_ids

['urn:ngsi-ld:Values:aveiro_cam:p3',
 'urn:ngsi-ld:Values:aveiro_cam:p5',
 'urn:ngsi-ld:Values:aveiro_cam:p6',
 'urn:ngsi-ld:Values:aveiro_cam:p26',
 'urn:ngsi-ld:Values:aveiro_cam:p19']

In [6]:
# selected time period
data_init = '2022-03-07 08:00:00.00+00'
data_end = '2022-03-07 10:00:00.00+00'

In [7]:
# file containing the raw data
filename = '../data/raw/cams.part.01.csv'

In [8]:
# radius (meters) for two entities being considered neighbours
radius = 50
# time for resample
time = '5s'

## Loading data

In [9]:
df = pd.read_csv(filename, names=cols) # read file

In [10]:
df['rssi'].describe([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.98])

count    1000000.000000
mean      -17322.096087
std        28832.587239
min       -65535.000000
10%       -65535.000000
20%       -65535.000000
30%          -88.000000
40%          -86.000000
50%          -84.000000
60%          -82.000000
70%          -79.000000
80%          -74.000000
90%          -67.000000
98%          -63.000000
max            0.000000
Name: rssi, dtype: float64

## Converting data

In [11]:
df = df[features] # select columns
df = df.loc[df['entity_id'].isin(rsu_ids)] # select only relevant rsu's
df = df.loc[df['stationid'].isin(obu_ids)] # select only relevant obu's
df = df.loc[(df['time_index']>=data_init) & (df['time_index']<data_end)] # include data_init and exclude data_end
df = df.loc[(df['rssi']!=0)] # filter by rssi values
df = df.sort_values(by='time_index')
df['time_index'] = pd.to_datetime(df['time_index'])
df["time_index"] = df["time_index"].dt.round(time) # round time_index columns
df = df.reset_index(drop=True) # reset index

df.head(2)

Unnamed: 0,time_index,entity_id,location,rssi,stationid
0,2022-03-07 08:01:00+00:00,urn:ngsi-ld:Values:aveiro_cam:p26,0101000000D0381E7DDD4B21C0270BA9EC05524440,-89,87
1,2022-03-07 08:01:00+00:00,urn:ngsi-ld:Values:aveiro_cam:p26,01010000007F9763C3E44B21C04B0169FF03524440,-89,87


In [12]:
df = df.groupby(['time_index', 'entity_id', 'stationid'], as_index=False).agg({'location':'first', 'rssi':'mean'})
df

Unnamed: 0,time_index,entity_id,stationid,location,rssi
0,2022-03-07 08:01:00+00:00,urn:ngsi-ld:Values:aveiro_cam:p26,87,0101000000D0381E7DDD4B21C0270BA9EC05524440,-89.000000
1,2022-03-07 08:01:05+00:00,urn:ngsi-ld:Values:aveiro_cam:p26,87,010100000099507754EB4B21C052DE7D4402524440,-87.833333
2,2022-03-07 08:01:10+00:00,urn:ngsi-ld:Values:aveiro_cam:p26,87,01010000005AB09EA40A4C21C0153B1A87FA514440,-86.600000
3,2022-03-07 08:01:15+00:00,urn:ngsi-ld:Values:aveiro_cam:p26,87,0101000000279133034B4C21C0FD6BD49EED514440,-86.666667
4,2022-03-07 08:01:20+00:00,urn:ngsi-ld:Values:aveiro_cam:p26,87,0101000000277854466D4C21C08462D0AEE7514440,-86.800000
...,...,...,...,...,...
658,2022-03-07 09:52:10+00:00,urn:ngsi-ld:Values:aveiro_cam:p26,86,01010000002943B005184D21C007A0072DDA514440,-85.400000
659,2022-03-07 09:52:15+00:00,urn:ngsi-ld:Values:aveiro_cam:p26,86,0101000000CE7234A20F4D21C0632BC313D5514440,-84.000000
660,2022-03-07 09:52:20+00:00,urn:ngsi-ld:Values:aveiro_cam:p26,86,0101000000027DC7951F4D21C096F551FCCE514440,-76.400000
661,2022-03-07 09:52:25+00:00,urn:ngsi-ld:Values:aveiro_cam:p26,86,01010000006626625B504D21C000DF1225C6514440,-78.800000


In [13]:
df['gps'] = df['location'].map(convert_wkb_to_lat_lon) # convert location to gps
for n, col in enumerate(['latitude', 'longitude']): # extract lat, lon from gps
    df[col] = df['gps'].apply(lambda location: location[n])
df.head(2)

Unnamed: 0,time_index,entity_id,stationid,location,rssi,gps,latitude,longitude
0,2022-03-07 08:01:00+00:00,urn:ngsi-ld:Values:aveiro_cam:p26,87,0101000000D0381E7DDD4B21C0270BA9EC05524440,-89.0,"(40.6408058, -8.6481742)",40.640806,-8.648174
1,2022-03-07 08:01:05+00:00,urn:ngsi-ld:Values:aveiro_cam:p26,87,010100000099507754EB4B21C052DE7D4402524440,-87.833333,"(40.6406942, -8.6482798)",40.640694,-8.64828


In [14]:
df['stationid'].unique()

array([87, 60, 86, 90, 52, 89, 97, 99])

In [15]:
df['entity_id'].unique()

array(['urn:ngsi-ld:Values:aveiro_cam:p26',
       'urn:ngsi-ld:Values:aveiro_cam:p5',
       'urn:ngsi-ld:Values:aveiro_cam:p6',
       'urn:ngsi-ld:Values:aveiro_cam:p19'], dtype=object)

## Saving data