In [23]:
from hyperopt import hp, fmin, tpe, Trials
from functools import partial
from sklearn import metrics
import pandas as pd
import numpy as np
import math
import warnings
warnings.filterwarnings("ignore")

In [24]:
dataset_path = './groundTruthGenerator/groundTruth'

In [25]:
class BDT:
    def __init__(self, dt:float, tt:float, lat='x', lng='y', t='timestep') -> None:
        self.dt = dt
        self.tt = tt
        self.lat = lat
        self.lng = lng
        self.t = t

    def haversine_distance(self, p1, p2) -> float:
        lat1, lon1 = p1
        lat2, lon2 = p2
        # Radius of the Earth in kilometers
        earth_radius = 6371
        # Convert latitude and longitude from degrees to radians
        lat1 = math.radians(lat1)
        lon1 = math.radians(lon1)
        lat2 = math.radians(lat2)
        lon2 = math.radians(lon2)
        # Haversine formula
        dlat = lat2 - lat1
        dlon = lon2 - lon1
        a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
        c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
        # Calculate the distance
        distance = earth_radius * c
        return distance
    

    def _BDT(self, P:[[float, float, float]]) -> [bool]:
        pred = []
        i = 0
        while i < len(P):
            j = i + 1; token = 0
            while j < len(P):
                dist = self.haversine_distance(P[i, :-1], P[i+1, :-1])
                if dist > self.dt:
                    pjt = P[j, -1]
                    pit = P[i, -1]
                    dt = pjt - pit
                    if dt > self.tt:
                        i = j
                        token = 1
                        pred += [True for _ in range( j - i + 1)]
                    break
                j += 1
            if token != 1:
                pred.append(False)
                i += 1
        return pred

    def predict(self, X:pd.DataFrame) -> [bool]:
        trajectories = X[[self.lat, self.lng, self.t]].values
        return self._BDT(trajectories)

### Hyperparameter Optimization - Hyperopt

In [26]:
def optimize(params, data):
    model = BDT(**params)
    f1s = []
    veh_id_unique = data['id'].unique()
    for veh_id in veh_id_unique:
        trajectory = data[data['id'] == veh_id]
        y_true = trajectory['stop']
        y_pred = model.predict(trajectory.drop(columns=['stop']))
        f1s.append(metrics.f1_score(y_true, y_pred))
    return -1.0 * np.mean(f1s)

In [27]:
param_space = {
    'dt': hp.uniform('dt',0, 0.001),
    'tt': hp.uniform('tt',0, 0.001),
}

In [28]:
move_stop_train = pd.read_csv(f'{dataset_path}/stop_train.csv')
move_stop_train['index'] = move_stop_train.index
veh_id_unique = move_stop_train['id'].unique()

In [29]:
optimization_function = partial(
    optimize,
    data=move_stop_train,
)

In [30]:
trials = Trials()
best_params = fmin(
    fn=optimization_function,
    space=param_space,
    algo=tpe.suggest,
    max_evals=15,
    trials=trials,
)
print(best_params)

  0%|          | 0/15 [00:00<?, ?trial/s, best loss=?]

100%|██████████| 15/15 [34:38<00:00, 138.59s/trial, best loss: -0.020531525048725435]
{'dt': 7.300901956605499e-05, 'tt': 0.0007859913389860952}


### Test / Validation

In [31]:
move_stop_test = pd.read_csv(f'{dataset_path}/stop_test.csv')
move_stop_test['index'] = move_stop_test.index
veh_id_unique = move_stop_test['id'].unique()

In [32]:
ac_list = []
pr_list = []
re_list = []
f1_list = []
model = BDT(**best_params)
data = move_stop_test
for veh_id in veh_id_unique:
    trajectory = data[data['id'] == veh_id]
    y_true = trajectory['stop']
    y_pred = model.predict(trajectory.drop(columns=['stop']))

    ac_list.append(metrics.accuracy_score(y_true, y_pred))
    pr_list.append(metrics.precision_score(y_true, y_pred))
    re_list.append(metrics.recall_score(y_true, y_pred))
    f1_list.append(metrics.f1_score(y_true, y_pred))

ac_mean = np.mean(ac_list)
pr_mean = np.mean(pr_list)
re_mean = np.mean(re_list)
f1_mean = np.mean(f1_list)

print('Accuracy mean:', ac_mean)
print('Precision mean:', pr_mean)
print('Recall mean:', re_mean)
print('F1 mean:', f1_mean)

Accuracy mean: 0.03308500661470595
Precision mean: 0.010992386922691007
Recall mean: 0.5043343587228479
F1 mean: 0.020666115015534423
