In [23]:
from sklearn.model_selection import train_test_split
from datetime import datetime
from sklearn import metrics
import seaborn as sns
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
dataset_path = './groundTruthGenerator/groundTruth'

In [3]:
stop_train = pd.read_csv(f'{dataset_path}/stop_train.csv')
stop_train['index'] = stop_train.index

In [4]:
# Split in train and validation
tra_ids, val_ids = train_test_split(stop_train['id'].unique(), test_size=0.4, random_state=0)

tra_data = stop_train[stop_train['id'].isin(tra_ids)]
val_data = stop_train[stop_train['id'].isin(val_ids)]

In [5]:
tes_data = pd.read_csv(f'{dataset_path}/stop_test.csv')
tes_data['index'] = tes_data.index

In [6]:
features = ['speed']
label = 'stop'
tra_x, tra_y = tra_data[features], tra_data[label]
val_x, val_y = val_data[features], val_data[label]
tes_x, tes_y = tes_data[features], tes_data[label]

In [7]:
from tensorflow import keras

dff = keras.Sequential(
    [
        keras.layers.Dense(
            256, activation="relu", input_shape=(tra_x.shape[-1],)
        ),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(256, activation="relu"),
        keras.layers.Dropout(0.3),
        keras.layers.Dense(1, activation="sigmoid"),
    ]
)
dff.summary()

2023-10-18 16:59:54.366956: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-18 16:59:54.866965: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-18 16:59:54.870423: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               512       
                                                                 
 dense_1 (Dense)             (None, 256)               65792     
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 256)               65792     
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense_3 (Dense)             (None, 1)                 257       
                                                                 
Total params: 132353 (517.00 KB)
Trainable params: 13235

2023-10-18 17:00:00.023664: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-10-18 17:00:00.025065: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [8]:
dff.compile(optimizer='adam',
              loss='binary_crossentropy', 
              metrics=[
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
    keras.metrics.BinaryAccuracy(name='binary_accuracy')
])

In [9]:
callbacks = [keras.callbacks.ModelCheckpoint("DFF/stop_model_at_epoch_{epoch}.h5")]
class_weight = {False: 0.5, True: 1}

In [10]:
dff.fit(
    np.asarray(tra_x).astype(np.float64),
    np.asarray(tra_y).astype(np.float64),
    batch_size=2048,
    epochs=30,
    verbose=2,
    callbacks=callbacks,
    validation_data=(np.asarray(val_x).astype(np.float64),
                     np.asarray(val_y).astype(np.float64)),
    class_weight=class_weight,
)

Epoch 1/30
93/93 - 6s - loss: 0.0370 - precision: 0.7555 - recall: 0.5186 - binary_accuracy: 0.9843 - val_loss: 0.0155 - val_precision: 0.7841 - val_recall: 1.0000 - val_binary_accuracy: 0.9932 - 6s/epoch - 62ms/step
Epoch 2/30
93/93 - 3s - loss: 0.0066 - precision: 0.8474 - recall: 1.0000 - binary_accuracy: 0.9956 - val_loss: 0.0083 - val_precision: 0.8699 - val_recall: 1.0000 - val_binary_accuracy: 0.9963 - 3s/epoch - 35ms/step
Epoch 3/30
93/93 - 3s - loss: 0.0040 - precision: 0.9048 - recall: 1.0000 - binary_accuracy: 0.9975 - val_loss: 0.0051 - val_precision: 0.9328 - val_recall: 1.0000 - val_binary_accuracy: 0.9982 - 3s/epoch - 32ms/step
Epoch 4/30
93/93 - 4s - loss: 0.0030 - precision: 0.9280 - recall: 1.0000 - binary_accuracy: 0.9981 - val_loss: 0.0035 - val_precision: 0.9306 - val_recall: 1.0000 - val_binary_accuracy: 0.9981 - 4s/epoch - 40ms/step
Epoch 5/30
93/93 - 3s - loss: 0.0021 - precision: 0.9526 - recall: 1.0000 - binary_accuracy: 0.9988 - val_loss: 0.0022 - val_precisi

<keras.src.callbacks.History at 0x7ff9147b1970>

In [11]:
ac_list = []
pr_list = []
re_list = []
f1_list = []
data = tes_data
veh_id_unique = data['id'].unique()
for veh_id in veh_id_unique:
    trajectory = data[data['id'] == veh_id]
    y_true = trajectory['stop']
    X = np.asarray(trajectory['speed']).astype('float64')
    y_pred = dff.predict(X).astype('bool')


    ac_list.append(metrics.accuracy_score(y_true, y_pred))
    pr_list.append(metrics.precision_score(y_true, y_pred))
    re_list.append(metrics.recall_score(y_true, y_pred))
    f1_list.append(metrics.f1_score(y_true, y_pred))

ac_mean = np.mean(ac_list)
pr_mean = np.mean(pr_list)
re_mean = np.mean(re_list)
f1_mean = np.mean(f1_list)



In [12]:
print('Accuracy mean:', ac_mean)
print('Precision mean:', pr_mean)
print('Recall mean:', re_mean)
print('F1 mean:', f1_mean)

Accuracy mean: 0.9860863039213453
Precision mean: 0.6394613212078764
Recall mean: 0.889261744966443
F1 mean: 0.723051048989328


### Staga Score

In [24]:
staga = pd.read_csv('./STAGA/gps aggregated.csv')

In [25]:
import math
def haversine_distance(p1, p2) -> float:
    lat1, lon1 = p1
    lat2, lon2 = p2
    # Radius of the Earth in kilometers
    earth_radius = 6371
    # Convert latitude and longitude from degrees to radians
    lat1 = math.radians(lat1)
    lon1 = math.radians(lon1)
    lat2 = math.radians(lat2)
    lon2 = math.radians(lon2)
    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = math.sin(dlat / 2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    # Calculate the distance
    distance = earth_radius * c
    return distance

In [33]:
speed = pd.Series([0])
values = staga[['latitude', 'longitude', 'ts']].values
for i in range(len(values)-1):
    p1 = values[i]
    p2 = values[i+1]
    distance = haversine_distance(p1[:-1], p2[:-1])
    d1 = datetime.fromisoformat(p1[-1]).timestamp()
    d2 = datetime.fromisoformat(p2[-1]).timestamp()
    insterval = d2 - d1
    speed[i+1] = distance / insterval


In [34]:
staga['speed'] = speed

In [35]:
staga['speed'].unique()

array([0.00000000e+00, 3.59177262e-05, 4.45335169e-05, ...,
       8.77481589e-05, 3.98905893e-04, 2.22150193e-04])

In [38]:
X = np.asarray(staga['speed']).astype('float64')
y_true = staga['diary_label'].apply(lambda x: x == 'stop')
y_pred = dff.predict(X).astype('bool')

ac_mean = metrics.accuracy_score(y_true, y_pred)
pr_mean = metrics.precision_score(y_true, y_pred)
re_mean = metrics.recall_score(y_true, y_pred)
f1_mean = metrics.f1_score(y_true, y_pred)



In [39]:
print('Accuracy mean:', ac_mean)
print('Precision mean:', pr_mean)
print('Recall mean:', re_mean)
print('F1 mean:', f1_mean)

Accuracy mean: 0.6421731483290991
Precision mean: 0.6421806929685273
Recall mean: 0.9999746402079503
F1 mean: 0.7820994694302573
