# 3 - Data Class Labeling

This file creates the labels for the following data classes:

- **Road Surface Type**

Description | Label
------------ | -------------
Dirt Road | dirt_road
Cobblestone Road | cobblestone_road
Asphalt Road | asphalt_road

- **Road Surface Condition**

Description | Label
------------ | -------------
Paved Road | paved_road
Unpaved Road | unpaved_road


- **Speed Bump**

Description | Label
------------ | -------------
No Speed Bump | no_speed_bump
Speed Bump in Asphalt | speed_bump_asphalt
Speed Bump in Cobblestone | speed_bump_cobblestone


- **Road Roughness Condition**

Description | Label
------------ | ------------- 
Good | good_road
Regular | regular_road
Bad | bad_road

## Importing Packages

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from geopy.distance import geodesic
%matplotlib inline
# %matplotlib notebook
pd.set_option("float_format", '{:0.10f}'.format)
pd.set_option('display.max_columns', 30)
plt.rcParams['figure.figsize'] = (24.0, 8.0)

## Utility Functions

In [None]:
datasets_folder = "E:\\DataSets\\100 Hz"

In [None]:
data_class_sample_ranges = {
    "pvs_1": {
        "paved_road": [
            (0,30830),
            (35846,38500),
            (43825,69018),
            (71327,96339),
            (109561,118357),
            (118358,144037)
        ],
        "unpaved_road": [
            (30831,35845),
            (38501,43824),
            (69019,71326),
            (96340,109560)
        ],
        "dirt_road": [
            (30831,35845),
            (38501,43824),
            (69019,71326),
            (96340,109560)
        ],
        "cobblestone_road": [
            (35846,38500),
            (43825,69018),
            (71327,96339),
            (109561,118357)
        ],
        "asphalt_road": [
            (0,30830),
            (118358,144037)
        ],
        "no_speed_bump": [
            (0,29864),
            (30175,45783),
            (46092,51122),
            (51513,54077),
            (54385,58180),
            (58552,73915),
            (74221,82991),
            (83250,87005),
            (87329,96009),
            (96313,110532),
            (110797,119624),
            (119941,144037)
        ],
        "speed_bump_asphalt": [
            (29865,30174),
            (119625,119940)
        ],
        "speed_bump_cobblestone": [
            (45784,46091),
            (51123,51512),
            (54078,54384),
            (58181,58551),
            (73916,74220),
            (82992,83249),
            (87006,87328),
            (96010,96312),
            (110533,110796)
        ],
    },
    "pvs_2": {
        "paved_road": [
            (0,7020),
            (51639,78153),
            (78154,98890),
            (98890,124685)
        ],
        "unpaved_road": [
            (7021,51638)
        ],
        "dirt_road": [
            (7021,51638)
        ],
        "cobblestone_road": [
            (78154,98890)
        ],
        "asphalt_road": [
            (0,7020),
            (51639,78153),
            (98890,124685)
        ],
        "no_speed_bump": [
            (0,66980),
            (67319,69586),
            (69869,70261),
            (70532,70824),
            (71106,72654),
            (72955,75355),
            (75589,100097),
            (100399,124685)
        ],
        "speed_bump_asphalt": [
            (66981,67318),
            (69587,69868),
            (70262,70531),
            (70824,71105),
            (72655,72954),
            (75356,75588),
            (100098,100398)
        ],
        "speed_bump_cobblestone": []
    },
    "pvs_3": {
        "paved_road": [
            (0,24494),
            (48210,66640),
            (71585,79296),
            (79297,105817)
        ],
        "unpaved_road": [
            (24495,48209),
            (66641,71584)
        ],
        "dirt_road": [
            (24495,48209),
            (66641,71584)
        ],
        "cobblestone_road": [
            (48210,66640),
            (71585,79296)
        ],
        "asphalt_road": [
            (0,24494),
            (79297,105817)
        ],
        "no_speed_bump": [
            (0,56636),
            (56947,80581),
            (80881,105817)
        ],
        "speed_bump_asphalt": [
            (80582,80880)
        ],
        "speed_bump_cobblestone": [
            (56637,56946)
        ]
    },
    "pvs_4": {
        "paved_road": [
            (0,25775),
            (30976,34094),
            (38802,60711),
            (63106,86730),
            (98333,107348),
            (107349,132493)
        ],
        "unpaved_road": [
            (25776,30975),
            (34095,38801),
            (60712,63105),
            (86731,98332)
        ],
        "dirt_road": [
            (25776,30975),
            (34095,38801),
            (60712,63105),
            (86731,98332)
        ],
        "cobblestone_road": [
            (30976,34094),
            (38802,60711),
            (63106,86730),
            (98333,107348)
        ],
        "asphalt_road": [
            (0,25775),
            (107349,132493)
        ],
        "no_speed_bump": [
            (0,24302),
            (24546,40665),
            (40956,45429),
            (45720,47855),
            (48130,51742),
            (52020,65765),
            (66186,74859),
            (75116,78975),
            (79263,86485),
            (86726,99289),
            (99560,108702),
            (108990,132493)
        ],
        "speed_bump_asphalt": [
            (24303,24545),
            (108703,108989)
        ],
        "speed_bump_cobblestone": [
            (40666,40955),
            (45430,45719),
            (47856,48129),
            (51743,52019),
            (65766,66185),
            (74860,75115),
            (78976,79262),
            (86486,86725),
            (99290,99559)
        ]
    },
    "pvs_5": {
        "paved_road": [
            (0,7095),
            (67635,92254),
            (92255,110397),
            (110398,133878)
        ],
        "unpaved_road": [
            (7096,67634)
        ],
        "dirt_road": [
            (7096,67634)
        ],
        "cobblestone_road": [
            (92255,110397)
        ],
        "asphalt_road": [
            (0,7095),
            (67635,92254),
            (110398,133878)
        ],
        "no_speed_bump": [
            (0,82677),
            (82974,84911),
            (85201,85554),
            (85831,86061),
            (86288,87707),
            (87969,89908),
            (90193,111619),
            (111931,133878)
        ],
        "speed_bump_asphalt": [
            (82678,82973),
            (84912,85200),
            (85555,85830),
            (86062,86287),
            (87708,87968),
            (89909,90192),
            (111620,111930)
        ],
        "speed_bump_cobblestone": []
    },
    "pvs_6": {
        "paved_road": [
            (0,15990),
            (35216,59581),
            (64245,71519),
            (71520,96280)
        ],
        "unpaved_road": [
            (15991,35215),
            (59582,64244)
        ],
        "dirt_road": [
            (15991,35215),
            (59582,64244)
        ],
        "cobblestone_road": [
            (35216,59581),
            (64245,71519)
        ],
        "asphalt_road": [
            (0,15990),
            (71520,96280)
        ],
        "no_speed_bump": [
            (0,50323),
            (50649,72818),
            (73100,96280)
        ],
        "speed_bump_asphalt": [
            (72819,73099)
        ],
        "speed_bump_cobblestone": [
            (50324,50648)
        ]
    },
    "pvs_7": {
        "paved_road": [
            (0,27466),
            (33518,36725),
            (41550,62819),
            (65404,86442),
            (96762,105468),
            (105469,128549)
        ],
        "unpaved_road": [
            (27467,33517),
            (36726,41549),
            (62820,65403),
            (86443,96761)
        ],
        "dirt_road": [
            (27467,33517),
            (36726,41549),
            (62820,65403),
            (86443,96761)
        ],
        "cobblestone_road": [
            (33518,36725),
            (41550,62819),
            (65404,86442),
            (96762,105468)
        ],
        "asphalt_road": [
            (0,27466),
            (105469,128549)
        ],
        "no_speed_bump": [
            (0,26315),
            (26604,43545),
            (43801,48288),
            (48582,50827),
            (51108,54336),
            (54596,67474),
            (67732,75246),
            (75473,78793),
            (79071,86128),
            (86366,97731),
            (97956,106723),
            (107004,128549)
        ],
        "speed_bump_asphalt": [
            (26316,26603),
            (106724,107003)
        ],
        "speed_bump_cobblestone": [
            (43546,43800),
            (48289,48581),
            (50828,51107),
            (54337,54595),
            (67475,67731),
            (75247,75472),
            (78794,79070),
            (86129,86365),
            (97732,97955)
        ]
    },
    "pvs_8": {
        "paved_road": [
            (0,7963),
            (52903,79277),
            (79278,98102),
            (98103,123619)
        ],
        "unpaved_road": [
            (7964,52902)
        ],
        "dirt_road": [
            (7964,52902)
        ],
        "cobblestone_road": [
            (79278,98102)
        ],
        "asphalt_road": [
            (0,7963),
            (52903,79277),
            (98103,123619)
        ],
        "no_speed_bump": [
            (0,68138),
            (68427,70613),
            (70924,71355),
            (71581,71848),
            (72093,73605),
            (73841,76153),
            (76425,99472),
            (99737,123619)
        ],
        "speed_bump_asphalt": [
            (68139,68426),
            (70614,70923),
            (71356,71580),
            (71849,72092),
            (73606,73840),
            (76154,76424),
            (99473,99736)
        ],
        "speed_bump_cobblestone": []
    },
    "pvs_9": {
        "paved_road": [
            (0,17735),
            (36629,54636),
            (58897,66070),
            (66071,91556)
        ],
        "unpaved_road": [
            (17736,36628),
            (54637,58896)
        ],
        "dirt_road": [
            (17736,36628),
            (54637,58896)
        ],
        "cobblestone_road": [
            (36629,54636),
            (58897,66070)
        ],
        "asphalt_road": [
            (0,17735),
            (66071,91556)
        ],
        "no_speed_bump": [
            (0,43904),
            (44240,67509),
            (67816,91556)
        ],
        "speed_bump_asphalt": [
            (67510,67817)
        ],
        "speed_bump_cobblestone": [
            (43905,44239)
        ]
    }
}

In [None]:
GRAVITY = 9.80665
FILE_NAME = 'dataset_labels.csv'

In [None]:
def getData(pvs):
    
    data_class_ranges = data_class_sample_ranges['pvs_' + str(pvs)]
    folder = os.path.join(datasets_folder, "PVS " + str(pvs))
    data_left = pd.read_csv(os.path.join(folder, 'dataset_gps_mpu_left.csv'), float_precision="high")
    data_right = pd.read_csv(os.path.join(folder, 'dataset_gps_mpu_right.csv'), float_precision="high")
    data_labels = pd.read_csv(os.path.join(folder, 'dataset_labels.csv'))
    
    return {
        "folder": folder,
        "data_class_ranges": data_class_ranges,
        "data_left": data_left,
        "data_right": data_right,
        "data_labels": data_labels
    }

In [None]:
# Label each sample based on the data classes sample ranges
def mapToLabel(sample, class_ranges):
    
    for interval in class_ranges:
        
        if(sample >= interval[0] and sample <= interval[1]):
            return 1
        
    return 0

# Adds a data class as a column to the dataframe
def addClassToData(classes, class_name, class_ranges, values_to_map):
    class_values = pd.Series(values_to_map).apply(mapToLabel, args=(class_ranges,))
    classes.insert(len(classes.columns), class_name, class_values, True)

In [None]:
def parameters(data, placement):
    speed = data["speed"].to_numpy()
    acc_x = data["acc_x_" + placement].to_numpy()
    acc_y = data["acc_y_" + placement].to_numpy()
    acc_z = data["acc_z_" + placement].to_numpy()
    acceleration = np.sqrt(np.power(acc_x, 2) + np.power(acc_y, 2) + np.power(acc_z, 2)) - GRAVITY
    return speed, acceleration

def boundaries(pvs, iri, datasets, cluster_by_car=True):
    
    if cluster_by_car:

        if(pvs <= 3):
            pvs_sets = [1,2,3]
        elif pvs <= 6:
            pvs_sets = [4,5,6]
        else:
            pvs_sets = [7,8,9] 
        
        args = []
        for i in pvs_sets:
            for side in ["left", "right"]:
                args.append(datasets["pvs_" + str(i)]["iri_" + side])
    
        train_data = np.concatenate(args)
    else:
        train_data = iri
    
    model = KMeans(n_clusters=3, verbose=0, max_iter=100000)
    model.fit(train_data.reshape(-1, 1))
    X = iri.reshape(-1, 1)
    Y = model.predict(X)
    
    dataframe = pd.DataFrame()
    dataframe.insert(len(dataframe.columns), "iri", iri, True)
    dataframe.insert(len(dataframe.columns), "clusters", Y, True)
    
    bounds = []
    for j in range(0,3):
        roughness = dataframe[dataframe["clusters"] == j][["iri"]]
        bounds.append((roughness.min().values[0], roughness.max().values[0]))

    bounds.sort()
    print("IRI Boundaries " + str(bounds))
    return bounds

In [None]:
# https://www.sciencedirect.com/science/article/pii/S0198971517301333
# https://www.sciencedirect.com/science/article/abs/pii/S1574119218300518
def calcIRIproxy(data, window_size=500, placement="dashboard"):
    
    speed, acceleration = parameters(data, placement)
    
    samples_size = len(acceleration)
    side_size = int(window_size/2)
    iri_proxy = []
    
    for i in range(0, samples_size):
    
        start = max(i - side_size, 0)
        end = min(i + side_size, samples_size + 1)
        N = end - start
        
        acc_sequence = acceleration[start:end]        
        acc_root_mean_square = np.sqrt(np.sum(np.power(acc_sequence, 2))/N)
        
        speed_block = speed[start:end]
        speed_sum = np.sum(speed_block)
    
        car_stopped = speed_sum/N < 1.4
        iri_proxy.append(0 if car_stopped else ((N * acc_root_mean_square)/speed_sum) * 100)

    return np.array(iri_proxy)

def addIRIproxyClass(pvs, datasets, classes, side):
    print(side)
    iri = datasets["pvs_" + str(pvs)]["iri_" + side]
    bounds = boundaries(pvs, iri, datasets)
    addClassToData(classes, 'good_road_' + side, [bounds[0]], iri)
    addClassToData(classes, 'regular_road_' + side, [bounds[1]], iri)
    addClassToData(classes, 'bad_road_' + side, [bounds[2]], iri)

In [None]:
# Creates all data class labels for the passed dataset
def processData(pvs, datasets):
    
    print("Processing PVS " + str(pvs))
    
    classes = pd.DataFrame()
    dataset = datasets["pvs_" + str(pvs)]
    data_class_ranges = dataset["data_class_ranges"]
    folder = dataset["folder"]
    
    data_size = len(dataset["data_left"])
    sample_indexes = range(0, data_size)
    
    addClassToData(classes, 'paved_road', data_class_ranges['paved_road'], sample_indexes)
    addClassToData(classes, 'unpaved_road', data_class_ranges['unpaved_road'], sample_indexes)
    addClassToData(classes, 'dirt_road', data_class_ranges['dirt_road'], sample_indexes)
    addClassToData(classes, 'cobblestone_road', data_class_ranges['cobblestone_road'], sample_indexes)
    addClassToData(classes, 'asphalt_road', data_class_ranges['asphalt_road'], sample_indexes)
    addClassToData(classes, 'no_speed_bump', data_class_ranges['no_speed_bump'], sample_indexes)
    addClassToData(classes, 'speed_bump_asphalt', data_class_ranges['speed_bump_asphalt'], sample_indexes)
    addClassToData(classes, 'speed_bump_cobblestone', data_class_ranges['speed_bump_cobblestone'], sample_indexes)
    
    addIRIproxyClass(pvs, datasets, classes, "left")
    addIRIproxyClass(pvs, datasets, classes, "right")

    file = os.path.join(folder, FILE_NAME)
    classes.to_csv(file, index=False)
        
    print("Saved in ", file)
    dataset["classes"] = classes

# Method that creates all data class labels for all datasets
def createLabels():
    
    datasets = {}
    
    for pvs in range(1,10):
        pvs_name = "pvs_" + str(pvs)
        datasets[pvs_name] = getData(pvs)
        for side in ["left", "right"]:
            data = datasets[pvs_name]["data_" + side]
            datasets[pvs_name]["iri_" + side] = calcIRIproxy(data)
    
    for pvs in range(1,10):
        processData(pvs, datasets)
        
    return datasets

## Data Class Labeling

In [None]:
logs = createLabels()