In [1]:
import pandas as pd

file_path = 'mixalldata_transformed_euclidean.csv'
df = pd.read_csv(file_path)

print("Column names in the dataset:")
print(df.columns)

Column names in the dataset:
Index(['type', 'sendTime', 'sender', 'senderPseudo', 'messageID', 'class',
       'pos', 'pos_n', 'spd', 'spd_n', 'acl', 'acl_n', 'hed', 'hed_n'],
      dtype='object')


In [2]:
print(df.head())

   type      sendTime  sender  senderPseudo  messageID  class         pos  \
0     4  72002.302942  130137     101301377  422013806      0  268.933600   
1     4  72003.302942  130137     101301377  422023410      0  269.064287   
2     4  72004.302942  130137     101301377  422032081      0  269.236040   
3     4  72005.302942  130137     101301377  422040712      0  272.719875   
4     4  72006.302942  130137     101301377  422052949      0  273.570521   

      pos_n       spd     spd_n       acl     acl_n  hed      hed_n  
0  4.917271  1.212767  0.000000  2.131618  0.001219  1.0  26.631126  
1  5.032356  3.149541  0.001046  2.110864  0.001046  1.0  25.042812  
2  4.933492  5.078437  0.002716  1.794447  0.001670  1.0  24.027949  
3  4.731177  7.237398  0.004379  1.821157  0.001663  1.0  23.398577  
4  4.700795  9.328836  0.006241  2.504642  0.001862  1.0  23.150423  


In [3]:
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

features = ['sendTime', 'sender', 'senderPseudo', 'messageID', 
            'pos', 'pos_n', 'spd', 'spd_n', 'acl', 'acl_n', 'hed', 'hed_n']
X = df[features]
Y = (df['class'] != 0).astype(int)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)

lr = LogisticRegression(random_state=42)

lr.fit(X_train, Y_train)

Y_pred = lr.predict(X_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)
f1 = f1_score(Y_test, Y_pred)

print("Logistic Regression Accuracy:", accuracy)
print("Logistic Regression Precision:", precision)
print("Logistic Regression Recall:", recall)
print("Logistic Regression F1-score:", f1)

Logistic Regression Accuracy: 0.7274000644795778
Logistic Regression Precision: 0.9155241777986095
Logistic Regression Recall: 0.3615954521437635
Logistic Regression F1-score: 0.518431274969242


In [5]:
from sklearn.metrics import precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

features = ['sendTime', 'sender', 'senderPseudo', 'messageID', 
            'pos', 'pos_n', 'spd', 'spd_n', 'acl', 'acl_n', 'hed', 'hed_n']
X = df[features]
Y = (df['class'] != 0).astype(int)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

n_neighbor_amount = 10

knn = KNeighborsClassifier(n_neighbors=n_neighbor_amount)
knn.fit(X_train, Y_train)
Y_pred = knn.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
print("KNN Accuracy when n_neighbors =", n_neighbor_amount, ":", accuracy)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)
f1 = f1_score(Y_test, Y_pred)

print("KNN Precision:", precision)
print("KNN Recall:", recall)
print("KNN F1-score:", f1)

KNN Accuracy when n_neighbors = 10 : 0.6871957956811203
KNN Precision: 0.7908520407663765
KNN Recall: 0.3115466645068978
KNN F1-score: 0.44700218576211165


In [8]:
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

features = ['sendTime', 'sender', 'senderPseudo', 'messageID', 
            'pos', 'pos_n', 'spd', 'spd_n', 'acl', 'acl_n', 'hed', 'hed_n']
X = df[features]
Y = (df['class'] != 0).astype(int)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

rf = RandomForestClassifier(n_estimators=20, random_state=42)

rf.fit(X_train, Y_train)

Y_pred = rf.predict(X_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)
f1 = f1_score(Y_test, Y_pred)

print("Random Forest Accuracy:", accuracy)
print("Random Forest Precision:", precision)
print("Random Forest Recall:", recall)
print("Random Forest F1-score:", f1)

Random Forest Accuracy: 0.9517796050469355
Random Forest Precision: 0.9800036975722077
Random Forest Recall: 0.8995244651679413
Random Forest F1-score: 0.9380410673241085


In [7]:
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

features = ['sendTime', 'sender', 'senderPseudo', 'messageID', 
            'pos', 'pos_n', 'spd', 'spd_n', 'acl', 'acl_n', 'hed', 'hed_n']
X = df[features]
Y = (df['class'] != 0).astype(int)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=42)

dt = DecisionTreeClassifier(random_state=42)

dt.fit(X_train, Y_train)

Y_pred = dt.predict(X_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)
f1 = f1_score(Y_test, Y_pred)

print("Decision Tree Accuracy:", accuracy)
print("Decision Tree Precision:", precision)
print("Decision Tree Recall:", recall)
print("Decision Tree F1-score:", f1)

Decision Tree Accuracy: 0.9168244746948958
Decision Tree Precision: 0.8896159274269628
Decision Tree Recall: 0.9079739951626072
Decision Tree F1-score: 0.8987012194889507


In [10]:
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

features = ['sendTime', 'sender', 'senderPseudo', 'messageID', 
            'pos', 'pos_n', 'spd', 'spd_n', 'acl', 'acl_n', 'hed', 'hed_n']
X = df[features]
Y = (df['class'] != 0).astype(int)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.31, random_state=42)

nb = GaussianNB()

nb.fit(X_train, Y_train)

Y_pred = nb.predict(X_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)
f1 = f1_score(Y_test, Y_pred)

print("Naive Bayes Accuracy:", accuracy)
print("Naive Bayes Precision:", precision)
print("Naive Bayes Recall:", recall)
print("Naive Bayes F1-score:", f1)

Naive Bayes Accuracy: 0.657990631982722
Naive Bayes Precision: 1.0
Naive Bayes Recall: 0.15657673584741225
Naive Bayes F1-score: 0.2707589232852588


In [11]:
from sklearn.metrics import precision_score, recall_score, f1_score
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

features = ['sendTime', 'sender', 'senderPseudo', 'messageID', 
            'pos', 'pos_n', 'spd', 'spd_n', 'acl', 'acl_n', 'hed', 'hed_n']
X = df[features]
Y = (df['class'] != 0).astype(int)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)

# Adjust max_iter and tol
mlp = MLPClassifier(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', random_state=42,
                    max_iter=100, tol=0.001)

mlp.fit(X_train, Y_train)

Y_pred = mlp.predict(X_test)

accuracy = accuracy_score(Y_test, Y_pred)
precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)
f1 = f1_score(Y_test, Y_pred)

print("MLP Accuracy:", accuracy)
print("MLP Precision:", precision)
print("MLP Recall:", recall)
print("MLP F1-score:", f1)



MLP Accuracy: 0.8167355805196553
MLP Precision: 0.9458142597353735
MLP Recall: 0.5817067573769606
MLP F1-score: 0.7203652715059091


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import tensorflow as tf 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

features = ['sendTime', 'sender', 'senderPseudo', 'messageID', 
            'pos', 'pos_n', 'spd', 'spd_n', 'acl', 'acl_n', 'hed', 'hed_n']
X = df[features]
Y = (df['class'] != 0).astype(int)

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

model = Sequential([
    Input(shape=(len(features),)),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

model.fit(X_train, Y_train, epochs=10, batch_size=32, validation_split=0.2)

Y_pred_prob = model.predict(X_test)
Y_pred = (Y_pred_prob > 0.5).astype(int)

loss, accuracy = model.evaluate(X_test, Y_test)

Y_test = np.squeeze(Y_test)
Y_pred = np.squeeze(Y_pred)

precision = precision_score(Y_test, Y_pred)
recall = recall_score(Y_test, Y_pred)
f1 = f1_score(Y_test, Y_pred)

print("ANN Accuracy:", accuracy)
print("ANN Precision:", precision)
print("ANN Recall:", recall)
print("ANN F1-score:", f1)

Epoch 1/10
[1m63897/63897[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 3ms/step - accuracy: 0.6678 - loss: 0.6126 - val_accuracy: 0.4947 - val_loss: 1.5087
Epoch 2/10
[1m63897/63897[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m222s[0m 3ms/step - accuracy: 0.6852 - loss: 0.5942 - val_accuracy: 0.4944 - val_loss: 1.6373
Epoch 3/10
[1m51698/63897[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m34s[0m 3ms/step - accuracy: 0.6869 - loss: 0.5924