# **PACKAGES AND INSTALLS**

In [30]:
import tensorflow as tf
import pandas as pd
import numpy as np
import joblib
import matplotlib.pyplot as plt

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.utils.class_weight import compute_class_weight

import warnings
warnings.filterwarnings("ignore")
warnings.warn("This function is deprecated", DeprecationWarning)


# **PATH AND SETTINGS**

In [14]:
DATA_PATH = "./dataset/Telematicsdata.csv"

SENSOR_VARS= ["Vehicle speed", "ENGINE RPM", "ACCELERATION X", "IDLING", "THROTTLE POSITION"]
HARD_BRAKE = -3.0   # m/s²
HARD_ACCEL =  3.0


# **Exploratory Data Analysis and Feature engineering**

In [10]:
# RAW DATA
data = pd.read_csv(DATA_PATH)

# Load & Filter Only Relevant Sensors

data = data[data["variable"].isin(SENSOR_VARS)].copy()
data["time"] = pd.to_datetime(data["timestamp"])
data["value"] = pd.to_numeric(data["value"], errors="coerce")
data = data.dropna(subset=["value"])
wide = (data.pivot_table(
    index=["deviceId", "time"],
    columns="variable",
    values="value",
    aggfunc="last").reset_index().sort_values("time").set_index("time"))

# sensor sanity filters
wide.loc[wide["Vehicle speed"] > 200, "Vehicle speed"] = np.nan
wide.loc[wide["ENGINE RPM"] > 8000, "ENGINE RPM"] = np.nan
wide.loc[wide["ACCELERATION X"].abs() > 10, "ACCELERATION X"] = np.nan
num_cols = wide.select_dtypes(include=[np.number]).columns
wide[num_cols] = wide[num_cols].interpolate()

# derive input variables by 30 sec windows
features = (
    wide[["deviceId"]+SENSOR_VARS]
    .groupby("deviceId")
    .resample("30s")
    .agg(   avg_speed=("Vehicle speed", "mean"),
            speed_std=("Vehicle speed", "std"),
            max_speed=("Vehicle speed", "max"),
            avg_acceleration=("ACCELERATION X", "mean"),
            throttle_variance=("THROTTLE POSITION", "std"),
            rpm_mean=("ENGINE RPM", "mean"),
            idle_time_ratio=("IDLING", lambda x: (x > 0).mean()))
    .fillna(0)
    .reset_index()
)

# HARD EVENTS
# calculate Hard Braking & Acceleration Counts defines physics-based thresholds
events = (
    wide[["deviceId","ACCELERATION X"]]
    .groupby("deviceId")
    .resample("30s")
    .agg(   hard_brake_count=("ACCELERATION X", lambda x: (x < HARD_BRAKE).sum()),
            hard_accel_count=("ACCELERATION X", lambda x: (x > HARD_ACCEL).sum()))
    .fillna(0)
    .reset_index()
)
features = features.merge(events, on=["deviceId", "time"], how="left" ).fillna(0)
features = features[features["avg_speed"] > 5].copy()
# critical Features
# derive output labels
def label_row(r):
    # Aggressive driving
    if r["hard_brake_count"] + r["hard_accel_count"] > 1:
        return 1
    # Distracted driving
    if r["speed_std"] > 12 and r["throttle_variance"] > 5:
        return 2
    # Fatigued driving (monotonous, steady driving)
    if (    r["avg_speed"] > 20 and
            r["speed_std"] < 5 and
            abs(r["avg_acceleration"]) < 0.2):
        return 3
    # Normal driving
    return 0

# 0 normal 1 aggressive 2 distracted 3 fatigue
# put aggressive distracted fatigue as 1

features["label"] = features.apply(label_row, axis=1)
features["label"] = features["label"].replace({2: 1}).replace({3: 1})

print(features["label"].value_counts())
features.info()


label
0    661
1     79
Name: count, dtype: int64
<class 'pandas.core.frame.DataFrame'>
Index: 740 entries, 1 to 24451
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   deviceId           740 non-null    object        
 1   time               740 non-null    datetime64[ns]
 2   avg_speed          740 non-null    float64       
 3   speed_std          740 non-null    float64       
 4   max_speed          740 non-null    float64       
 5   avg_acceleration   740 non-null    float64       
 6   throttle_variance  740 non-null    float64       
 7   rpm_mean           740 non-null    float64       
 8   idle_time_ratio    740 non-null    float64       
 9   hard_brake_count   740 non-null    int64         
 10  hard_accel_count   740 non-null    int64         
 11  label              740 non-null    int64         
dtypes: datetime64[ns](1), float64(7), int64(3), object(1)
memory usage: 75.2+

In [11]:

X = features.drop(columns=["deviceId", "time", "label"])
y = features["label"].astype(int)
assert X.isnull().sum().sum() == 0
assert y.isnull().sum() == 0
assert X.shape[0] == y.shape[0]
assert X.shape[1] >= 6 
print(X.shape,y.shape)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, stratify=y, random_state=42)

print(X_train.shape, X_test.shape)
features["label"].value_counts()


(740, 9) (740,)
(555, 9) (185, 9)


label
0    661
1     79
Name: count, dtype: int64

In [24]:

model = Sequential()
model.add(Dense(units=9,activation='relu'))
model.add(Dense(units=32,activation='relu'))
model.add(Dense(units=16,activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])

early_stopping=EarlyStopping(
    monitor="val_loss",
    min_delta=0.0001,
    patience=15,    
    verbose=1,  
    mode="auto",
    baseline=None,  
    restore_best_weights=False
)

classes = np.unique(y_train)
weights = compute_class_weight(class_weight="balanced", classes=classes, y=y_train)
class_weight = dict(zip(classes, weights))

model.fit(X_train,y_train, epochs = 1000, batch_size=32, validation_split=0.25, callbacks=early_stopping, class_weight=class_weight)



Epoch 1/1000
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - accuracy: 0.8149 - loss: 0.7015 - val_accuracy: 0.8417 - val_loss: 0.6325
Epoch 2/1000
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7764 - loss: 0.6829 - val_accuracy: 0.7122 - val_loss: 0.6346
Epoch 3/1000
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7332 - loss: 0.6642 - val_accuracy: 0.7122 - val_loss: 0.6246
Epoch 4/1000
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7067 - loss: 0.6499 - val_accuracy: 0.6547 - val_loss: 0.6150
Epoch 5/1000
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6875 - loss: 0.6329 - val_accuracy: 0.6619 - val_loss: 0.6002
Epoch 6/1000
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6731 - loss: 0.6178 - val_accuracy: 0.6619 - val_loss: 0.5831
Epoch 7/1000
[1m13/13[0m

<keras.src.callbacks.history.History at 0x24856a21480>

In [32]:
y_prob = model.predict(X_test).ravel()

for t in [0.6, 0.5, 0.4, 0.3]:
    y_pred = (y_prob >= t).astype(int)
    print(f"\nThreshold {t}")
    print(confusion_matrix(y_test, y_pred))

print("confusion_matrix: \n",confusion_matrix(y_test,y_pred))
print("Classification report: \n",classification_report(y_test, y_pred))

model.save("model/driving_model.keras")
joblib.dump(scaler, "model/scaler.pkl")


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 

Threshold 0.6
[[139  26]
 [  9  11]]

Threshold 0.5
[[137  28]
 [  8  12]]

Threshold 0.4
[[136  29]
 [  8  12]]

Threshold 0.3
[[133  32]
 [  7  13]]
confusion_matrix: 
 [[133  32]
 [  7  13]]
Classification report: 
               precision    recall  f1-score   support

           0       0.95      0.81      0.87       165
           1       0.29      0.65      0.40        20

    accuracy                           0.79       185
   macro avg       0.62      0.73      0.64       185
weighted avg       0.88      0.79      0.82       185



['model/scaler.pkl']