In [1]:
from hyperopt import hp, fmin, tpe, Trials
from datetime import datetime
from functools import partial
from sklearn import metrics
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [6]:
dataset_path = './groundTruthGenerator/groundTruth'

In [7]:
class LinearSpeedBounded:
    def __init__(self, threshold):
        self.threshold = threshold

    def __call__(self, p1: [float, float, float], p2: [float, float, float]) -> bool:
        id, lat0, lon0, t1 = p1
        id, lat1, lon1, t2 = p2
        tdiff = t2 - t1 
        if not tdiff > 0:
            return False
        len = self.calculate_distance(lat0, lon0, lat1, lon1)
        return len / tdiff <= self.threshold

    @staticmethod
    def calculate_distance(lat1, lon1, lat2, lon2):
        # Implement the logic to calculate the distance between two geographic coordinates here
        # You can use libraries like geopy or haversine for this calculation
        # Example using haversine formula:
        from math import radians, sin, cos, sqrt, atan2

        # Radius of the Earth in km
        radius = 6371.0

        # Convert latitude and longitude from degrees to radians
        lat1 = radians(lat1)
        lon1 = radians(lon1)
        lat2 = radians(lat2)
        lon2 = radians(lon2)

        # Haversine formula
        dlon = lon2 - lon1
        dlat = lat2 - lat1
        a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
        c = 2 * atan2(sqrt(a), sqrt(1 - a))
        distance = radius * c

        return distance


In [8]:
# Maximum Physically Consiste Trajectories
# published in SIGSPATIAL 2019
class MPCT:
    def __init__(self, threshold, Predicate, inx='index', lat='x', lng='y', t='timestep'):
        self.threshold = threshold
        self._Predicate = Predicate
        self.predicate = Predicate(threshold)
        self.inx = inx
        self.lat = lat
        self.lng = lng
        self.t = t

    def _mpct(self, input_data):
        sequences = []
        
        for item in input_data:
            extend_subsequence = False
            
            for sequence in sequences:
                prev = sequence[-1] if sequence else None
                
                if prev is not None and self.predicate(prev, item):
                    sequence.append(item)
                    extend_subsequence = True
            
            if not extend_subsequence:
                sequences.append([item])
        
        sequences.sort(key=lambda seq: len(seq), reverse=True)
        
        max_size = len(sequences[0])
        sequences = [seq for seq in sequences if len(seq) == max_size]
        
        return sequences
    
    def predict(self, df=pd.DataFrame):
        trajectory = df[[self.inx, self.lat, self.lng, self.t]].values
        trajectory = list(map(lambda t: [ t[0], t[1], t[2], t[3]], trajectory))
        sequences = self._mpct(trajectory)
        df['pred'] = False
        stop_indexes = []
        for s in sequences[0]:
            stop_indexes.append(s[0])
        stops = df['index'].isin(stop_indexes)
        df.loc[stops, 'pred'] = True
        return df['pred']

### Hyperparameter Optimization - Hyperopt

In [9]:
Model = MPCT
def optimize(params, data):
    model = Model(**params, Predicate=LinearSpeedBounded)
    f1s = []
    veh_id_unique = data['id'].unique()
    for veh_id in veh_id_unique:
        trajectory = data[data['id'] == veh_id]
        y_true = trajectory['stop']
        y_pred = model.predict(trajectory.drop(columns=['stop']))
        f1s.append(metrics.f1_score(y_true, y_pred))
    return -1.0 * np.mean(f1s)

In [10]:
param_space = {
    'threshold': hp.uniform('threshold',0, 0.000001),
}

In [11]:
move_stop_train = pd.read_csv(f'{dataset_path}/stop_train.csv')
move_stop_train['index'] = move_stop_train.index

In [12]:
optimization_function = partial(
    optimize,
    data=move_stop_train,
)

In [13]:
trials = Trials()
best_params = fmin(
    fn=optimization_function,
    space=param_space,
    algo=tpe.suggest,
    max_evals=15,
    trials=trials,
)
print(best_params)

100%|██████████| 15/15 [51:49<00:00, 207.31s/trial, best loss: -0.6615087541557705]
{'threshold': 7.820434877143118e-07}


### Test / Validation

In [14]:
move_stop_test = pd.read_csv(f'{dataset_path}/stop_test.csv')
move_stop_test['index'] = move_stop_test.index
veh_id_unique = move_stop_test['id'].unique()

In [15]:
ac_list = []
pr_list = []
re_list = []
f1_list = []
model = MPCT(**best_params, Predicate=LinearSpeedBounded)
data = move_stop_test
for veh_id in veh_id_unique:
    trajectory = data[data['id'] == veh_id]
    y_true = trajectory['stop']
    y_pred = model.predict(trajectory.drop(columns=['stop']))

    ac_list.append(metrics.accuracy_score(y_true, y_pred))
    pr_list.append(metrics.precision_score(y_true, y_pred))
    re_list.append(metrics.recall_score(y_true, y_pred))
    f1_list.append(metrics.f1_score(y_true, y_pred))

ac_mean = np.mean(ac_list)
pr_mean = np.mean(pr_list)
re_mean = np.mean(re_list)
f1_mean = np.mean(f1_list)

print('Accuracy mean:', ac_mean)
print('Precision mean:', pr_mean)
print('Recall mean:', re_mean)
print('F1 mean:', f1_mean)

Accuracy mean: 0.9831251053428723
Precision mean: 0.701355101722921
Recall mean: 0.677282392132765
F1 mean: 0.6762428990395408
