In [4]:
from hyperopt import hp, fmin, tpe, Trials
from functools import partial
from sklearn import metrics
import pandas as pd
import numpy as np
import math
import warnings
warnings.filterwarnings("ignore")

In [5]:
dataset_path = './groundTruthGenerator/groundTruth'

In [6]:
class BVA:
    def __init__(self, st:float, at:float, lat='x', lng='y', t='timestep') -> None:
        self.st = st
        self.at = at
        self.lat = lat
        self.lng = lng
        self.t = t

    def haversine_distance(self, p1, p2) -> float:
        lat1, lon1 = p1
        lat2, lon2 = p2
        # Radius of the Earth in kilometers
        earth_radius = 6371
        # Convert latitude and longitude from degrees to radians
        lat1 = math.radians(lat1)
        lon1 = math.radians(lon1)
        lat2 = math.radians(lat2)
        lon2 = math.radians(lon2)
        # Haversine formula
        dlat = lat2 - lat1
        dlon = lon2 - lon1
        a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
        c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
        # Calculate the distance
        distance = earth_radius * c
        return distance
    
    def delta_duration(self, d1, d2):
        if type(d1) == pd.Timestamp and type(d2) == pd.Timestamp:
            return (d2 - d1).total_seconds()
        return d2 - d1

    def _BVA(self, trajectories:[[float, float, float]]) -> [bool]:
        pred = [True]
        prv_speed = 0
        crr_speed = 0
        for i in range(len(trajectories)-1):
            lat1, lon1, t1 = trajectories[i]
            lat2, lon2, t2 = trajectories[i+1]
            distance = self.haversine_distance([lat1, lon1], [lat2, lon2])
            time = self.delta_duration(t1, t2)
            prv_speed = crr_speed
            crr_speed = distance/time
            acceleration = (crr_speed - prv_speed) / time
            if crr_speed <= self.st and acceleration <= self.at:
                pred.append(True)
                continue
            pred.append(False)
        return pred

    def predict(self, X:pd.DataFrame) -> [bool]:
        trajectories = X[[self.lat, self.lng, self.t]].values
        return self._BVA(trajectories)

### Hyperparameter Optimization - Hyperopt

In [4]:
def optimize(params, data):
    model = BVA(**params)
    scores = []
    veh_id_unique = data['id'].unique()
    for veh_id in veh_id_unique:
        trajectory = data[data['id'] == veh_id]
        y_true = trajectory['stop']
        y_pred = model.predict(trajectory.drop(columns=['stop']))
        scores.append(metrics.f1_score(y_true, y_pred))
    return -1.0 * np.mean(scores)

In [5]:
param_space = {
    'st': hp.quniform('st',0, 2, 1),
    'at': hp.quniform('at',0, 2, 1),
}

In [6]:
move_stop_train = pd.read_csv(f'{dataset_path}/stop_train.csv')
move_stop_train['index'] = move_stop_train.index
veh_id_unique = move_stop_train['id'].unique()

In [7]:
optimization_function = partial(
    optimize,
    data=move_stop_train,
)

In [8]:
trials = Trials()
best_params = fmin(
    fn=optimization_function,
    space=param_space,
    algo=tpe.suggest,
    max_evals=50,
    trials=trials,
)
print(best_params)

100%|██████████| 50/50 [21:32<00:00, 25.85s/trial, best loss: -0.7950952751851184]
{'at': 2.0, 'st': 0.0}


### Test / Validation

In [9]:
move_stop_test = pd.read_csv(f'{dataset_path}/stop_test.csv')
move_stop_test['index'] = move_stop_test.index
veh_id_unique = move_stop_test['id'].unique()

In [10]:
ac_list = []
pr_list = []
re_list = []
f1_list = []
model = BVA(**best_params)
data = move_stop_test
for veh_id in veh_id_unique:
    trajectory = data[data['id'] == veh_id]
    y_true = trajectory['stop']
    y_pred = model.predict(trajectory.drop(columns=['stop']))

    ac_list.append(metrics.accuracy_score(y_true, y_pred))
    pr_list.append(metrics.precision_score(y_true, y_pred))
    re_list.append(metrics.recall_score(y_true, y_pred))
    f1_list.append(metrics.f1_score(y_true, y_pred))

ac_mean = np.mean(ac_list)
pr_mean = np.mean(pr_list)
re_mean = np.mean(re_list)
f1_mean = np.mean(f1_list)

print('Accuracy mean:', ac_mean)
print('Precision mean:', pr_mean)
print('Recall mean:', re_mean)
print('F1 mean:', f1_mean)

Accuracy mean: 0.9911283020121459
Precision mean: 0.765344108472839
Recall mean: 0.8693115145249825
F1 mean: 0.8032004137344664
