In [1]:
import json
import csv
import pandas as pd
import numpy as np
import os
import glob
import re
import matplotlib.pyplot as plt

from feature_extraction import get_dataframe
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
directory = r"C:\Users\ganes\verimi-extension\grid-sybil\VeReMi_25200_28800_2025-11-15_13.57.9\VeReMi_25200_28800_2025-11-15_13_57_9"
log_files = glob.glob(os.path.join(directory, "traceJSON-*.json"))
ground_truth_file = glob.glob(os.path.join(directory, "traceGroundTruthJSON-*.json"))

In [3]:
len(log_files)

1845

In [4]:
ground_truth_file

['C:\\Users\\ganes\\verimi-extension\\grid-sybil\\VeReMi_25200_28800_2025-11-15_13.57.9\\VeReMi_25200_28800_2025-11-15_13_57_9\\traceGroundTruthJSON-7.json']

In [5]:
logs = get_dataframe(log_files)

In [6]:
ground_truth = get_dataframe(ground_truth_file)

In [7]:
logs.shape

(2142064, 31)

In [8]:
logs['messageID'].nunique()

344927

In [9]:
ground_truth.shape

(353793, 31)

In [10]:
logs.columns

Index(['type', 'rcvTime', 'sendTime', 'sender', 'senderPseudo', 'messageID',
       'pos_x', 'pos_y', 'pos_z', 'pos_noise_x', 'pos_noise_y', 'pos_noise_z',
       'spd_x', 'spd_y', 'spd_z', 'spd_noise_x', 'spd_noise_y', 'spd_noise_z',
       'acl_x', 'acl_y', 'acl_z', 'acl_noise_x', 'acl_noise_y', 'acl_noise_z',
       'hed_x', 'hed_y', 'hed_z', 'hed_noise_x', 'hed_noise_y', 'hed_noise_z',
       'receiver'],
      dtype='object')

In [11]:
logs = logs.drop(columns=['type', 'rcvTime', 'sendTime', 'receiver']) 

In [12]:
logs = logs.drop_duplicates()

In [13]:
logs.shape

(344927, 27)

In [14]:
ground_truth['messageID'].nunique()

353793

In [15]:
gt_messageIDs = set(ground_truth['messageID'])
log_messageIDs = set(logs['messageID'])

In [16]:
messages_in_gt_not_in_logs = gt_messageIDs - log_messageIDs
len(messages_in_gt_not_in_logs)

19142

In [17]:
messages_in_logs_not_in_gt = log_messageIDs - gt_messageIDs
len(messages_in_logs_not_in_gt)

10276

In [18]:
merged = logs.merge(
    ground_truth,
    on="messageID",
    how="inner",
    suffixes=("_log", "_truth")
)

In [19]:
merged.shape

(334651, 57)

In [20]:
# Position error
merged["pos_error"] = np.sqrt(
    (merged["pos_x_log"] - merged["pos_x_truth"])**2 +
    (merged["pos_y_log"] - merged["pos_y_truth"])**2 +
    (merged["pos_z_log"] - merged["pos_z_truth"])**2
)

# Speed error
merged["spd_error"] = np.sqrt(
    (merged["spd_x_log"] - merged["spd_x_truth"])**2 +
    (merged["spd_y_log"] - merged["spd_y_truth"])**2 +
    (merged["spd_z_log"] - merged["spd_z_truth"])**2
)

# Acceleration error
merged["acl_error"] = np.sqrt(
    (merged["acl_x_log"] - merged["acl_x_truth"])**2 +
    (merged["acl_y_log"] - merged["acl_y_truth"])**2 +
    (merged["acl_z_log"] - merged["acl_z_truth"])**2
)

In [21]:
POS_THR = 0    # meters
SPD_THR = 0    # m/s
ACL_THR = 0    # m/s^2

In [22]:
merged["pos_mismatch"] = merged["pos_error"] > POS_THR
merged["spd_mismatch"] = merged["spd_error"] > SPD_THR
merged["acl_mismatch"] = merged["acl_error"] > ACL_THR

merged["is_forged"] = (
    merged["pos_mismatch"] |
    merged["spd_mismatch"] |
    merged["acl_mismatch"]
)

In [23]:
merged['is_forged'].describe()

count     334651
unique         1
top        False
freq      334651
Name: is_forged, dtype: object

In [24]:
merged['is_forged'].describe()

count     334651
unique         1
top        False
freq      334651
Name: is_forged, dtype: object