In [18]:
import pandas as pd
from sklearn.feature_selection import VarianceThreshold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay, confusion_matrix, cohen_kappa_score, roc_curve, roc_auc_score, f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, BatchNormalization, Input
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.optimizers import Adam
from keras.metrics import AUC
from keras.models import Model
from tensorflow.keras.utils import to_categorical, plot_model
import time
import numpy as np
import ember
import lightgbm as lgb
import warnings

**Loading the Data**

In [31]:
#UPDATE THIS PATH
X_train, y_train, X_test, y_test = ember.read_vectorized_features("/Users/alexandrutodea/Downloads/ember2018_data")



Dataset Contents:

- Training Data: 300,000 malicious samples / 300,000 benign samples / 200,000 unlabeled samples (not taken into account)
- Testing Data: 100,000 malicious samples / 100,000 benign samples

The feature groups in the dataset:

- 0 to 255: Byte Histogram
- 256 to 511: Byte-entropy Histogram
- 512 to 615: String Information
- 616 to 625: General File Information
- 626 to 687: Header Information
- 688 to 942: Section Information
- 943 to 2222: Imported Functions
- 2223 to 2350: Exported Functions
- 2351 to 2380: Data Directories

**Finds the positions of all benign samples in the training set**

In [32]:
ben=np.where(y_train==0)

**Finds the positions of all malicious samples in the training set**

In [33]:
mal=np.where(y_train==1)

**Combines all training indices (both benign and malicious)**

In [34]:
n = np.concatenate((ben, mal), axis=1).reshape(600000)

**Random seed set by the authors of https://ieeexplore.ieee.org/abstract/document/10460035**

In [35]:
np.random.seed(314)

**Randomly shuffles the indices in n**

In [36]:
np.random.shuffle(n)

**Picks benign and malicious samples from y_train in the shuffled order**

In [37]:
y_train = y_train[n]

**Picks benign and malicious samples from X_train in the shuffled order**

In [38]:
X_train = X_train[n]

**Checks the Sizes to Ensure Everything is OK**

In [39]:
X_train_rows, X_train_columns = X_train.shape
X_test_rows, X_test_columns = X_test.shape
y_train_shape = y_train.shape
y_test_shape = y_test.shape
print("Number of rows in X_train: ", X_train_rows)
print("Number of columns in X_train: ", X_train_columns)
print("Number of rows in X_test: ", X_test_rows)
print("Number of columns in X_test: ", X_test_columns)
print("y_train Shape (should be 1D): ", y_train_shape)
print("y_test Shape (should be 1D): ", y_test_shape)

Number of rows in X_train:  600000
Number of columns in X_train:  2381
Number of rows in X_test:  200000
Number of columns in X_test:  2381
y_train Shape (should be 1D):  (600000,)
y_test Shape (should be 1D):  (200000,)


**PART I:** Replicating Results from https://ieeexplore.ieee.org/abstract/document/10460035 (Code Source: https://github.com/CollinConnors/Machine-learning-for-detecting-malware-in-pe-files)

In [40]:
y_train = keras.utils.to_categorical(y_train, 2)
y_test = keras.utils.to_categorical(y_test, 2)

**Defining the Model**


In [41]:
modinput = Input(shape=(X_train.shape[1],), dtype='float32', name='Input')
BNOne = BatchNormalization(name='BatchNormalization_Input')(modinput)
denseLayerOne = Dense(512, activation='tanh', name='Dense1')(BNOne)
denseLayerTwo = Dense(128, activation='tanh', name='Dense2')(denseLayerOne)
BNTwo = BatchNormalization(name='BatchNormalization_Dense2')(denseLayerTwo)
denseLayerThree = Dense(8, activation='tanh', name='Dense3')(BNTwo)
output = Dense(2, activation='softmax', name='Out')(denseLayerThree)

DLModel = Model(modinput, output, name='DLModel')
DLModel.summary()

**Training the Model and Saving Its Weights After the Best Epoch**

In [42]:
# UPDATE THIS PATH
path = "./DLModel.weights.h5"

model_checkpoint_callback = keras.callbacks.ModelCheckpoint(filepath=path, save_weights_only=True,
                                                            monitor='val_acc', mode='max', save_best_only=True)

train_start = time.time()
DLModel.compile(loss="categorical_crossentropy", optimizer='Adam', metrics=['acc', AUC()])
DLModel.fit(X_train, y_train, epochs=75, batch_size=200, verbose=1, validation_data=(X_test, y_test),
          callbacks=[model_checkpoint_callback])
train_end = time.time()

Epoch 1/75
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 5ms/step - acc: 0.9119 - auc_5: 0.9708 - loss: 0.2117 - val_acc: 0.9148 - val_auc_5: 0.9745 - val_loss: 0.2066
Epoch 2/75
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 6ms/step - acc: 0.9521 - auc_5: 0.9910 - loss: 0.1198 - val_acc: 0.9319 - val_auc_5: 0.9802 - val_loss: 0.1797
Epoch 3/75
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 6ms/step - acc: 0.9600 - auc_5: 0.9934 - loss: 0.1016 - val_acc: 0.9258 - val_auc_5: 0.9795 - val_loss: 0.1835
Epoch 4/75
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - acc: 0.9643 - auc_5: 0.9947 - loss: 0.0905 - val_acc: 0.9341 - val_auc_5: 0.9799 - val_loss: 0.1824
Epoch 5/75
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - acc: 0.9666 - auc_5: 0.9954 - loss: 0.0847 - val_acc: 0.9352 - val_auc_5: 0.9797 - val_loss: 0.1842
Epoch 6/75
[1m3000/3000[0m [32m━━━━━━━━━━━

**Evaluating the Model's Performance**


In [43]:
DLModel.load_weights(path)

# Evaluate the model
modStats = DLModel.evaluate(X_test, y_test)
loss = modStats[0]
acc = modStats[1]
auc = modStats[2]

# Get predictions
start_inference = time.time()
y_pred_probs = DLModel.predict(X_test)
end_inference = time.time()
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

# Compute metrics
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')
kappa = cohen_kappa_score(y_true, y_pred)

# Convert training time to minutes and seconds
training_total_seconds = train_end - train_start
train_minutes = int(training_total_seconds // 60)
train_seconds = training_total_seconds % 60

# Convert inference time to minutes and seconds
inference_total_seconds = end_inference - start_inference
inference_minutes = int(inference_total_seconds // 60)
inference_seconds = inference_total_seconds % 60

cm = confusion_matrix(y_true, y_pred)

print("\nConfusion Matrix:")
print(cm)

tn, fp, fn, tp = cm.ravel()
print(f"\nTrue Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")
print(f"True Positives (TP): {tp}")

print(f"Test Accuracy: {acc:.4f}")
print(f"Test AUC: {auc:.4f}")
print(f"Test Loss: {loss:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print(f"Cohen's Kappa Score: {kappa:.4f}")
#print(f"Training Time: {training_time_minutes:.2f} minutes")
print(f"Training Time: {train_minutes} minutes and {train_seconds:.2f} seconds")
print(f"Inference Time: {inference_minutes} minutes and {inference_seconds:.2f} seconds")

[1m6250/6250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 957us/step - acc: 0.9550 - auc_5: 0.9850 - loss: 0.1561
[1m6250/6250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 897us/step

Confusion Matrix:
[[95289  4711]
 [ 5184 94816]]

True Negatives (TN): 95289
False Positives (FP): 4711
False Negatives (FN): 5184
True Positives (TP): 94816
Test Accuracy: 0.9505
Test AUC: 0.9832
Test Loss: 0.1702
Precision: 0.9505
Recall: 0.9505
F1-Score: 0.9505
Cohen's Kappa Score: 0.9011
Training Time: 22 minutes and 55.58 seconds
Inference Time: 0 minutes and 6.08 seconds


**PART II: Applying a Variance Threshold on the Training and Testing Data**

**Dropping features with variance < 0.001**. "No change in the features implies no conditional change on the target from which to learn." (Machine Learning for Tabular Data by Mark Ryan and Luca Massaron)


In [44]:
vt = VarianceThreshold(threshold=1e-3)
X_train_vt = vt.fit_transform(X_train)
X_test_vt = vt.transform(X_test)

**Verifying that the columns were dropped**

In [45]:
print(X_train_vt.shape[1])
print(X_test_vt.shape[1])

1658
1658


**Placing the Dropped Columns in a List**

In [46]:
selected_mask = vt.get_support()
dropped_columns = np.where(~selected_mask)[0]
dropped_columns = dropped_columns.tolist()

**Displaying the Number of Columns That were Dropped from Each Feature Group**

In [47]:
# Define the buckets
buckets = {
    "Byte Histogram": (0, 255),
    "Byte-entropy Histogram": (256, 511),
    "String Information": (512, 615),
    "General File Information": (616, 625),
    "Header Information": (626, 687),
    "Section Information": (688, 942),
    "Imported Functions": (943, 2222),
    "Exported Functions": (2223, 2350),
    "Data Directories": (2351, 2380)
}

# Initialize counter for each bucket
bucket_counts = {name: 0 for name in buckets}

# Classify each dropped column into the corresponding bucket
for val in dropped_columns:
    found = False
    for name, (start, end) in buckets.items():
        if start <= val <= end:
            bucket_counts[name] += 1
            found = True
            break
    if not found:
        print(f"Value {val} does not fall into any defined range.")

# Display the counts
print("Dropped Column Counts by Category:")
for name, count in bucket_counts.items():
    print(f"{name}: {count}")

Dropped Column Counts by Category:
Byte Histogram: 254
Byte-entropy Histogram: 249
String Information: 95
General File Information: 0
Header Information: 27
Section Information: 66
Imported Functions: 32
Exported Functions: 0
Data Directories: 0


**PART III: Training and Evaluating the Random Forest Model on the Feature-Reduced Dataset**

**Resetting y_train and y_test**

In [48]:
X_train_second, y_train, X_test_second, y_test = ember.read_vectorized_features("/Users/alexandrutodea/Downloads/ember2018_data")
y_train = y_train[n]



**Training the Random Forest Model on the Feature-Reduced Training Data**


In [52]:
rfm_reduced = RandomForestClassifier(n_estimators=100) #max_depth=None, min_samples_split=2
rf_train_start = time.time()
rfm_reduced.fit(X_train_vt, y_train)
rf_train_end = time.time()

**Evaluating the Random Forest Model on the Feature-Reduced Testing Data**

In [53]:
# Measure inference time
rf_infer_start = time.time()
rfm_reduced_ypred = rfm_reduced.predict(X_test_vt)
rf_infer_end = time.time()

# Calculate inference time
rf_inference_total_seconds = rf_infer_end - rf_infer_start
rf_infer_minutes = int(rf_inference_total_seconds // 60)
rf_infer_seconds = rf_inference_total_seconds % 60

# Calculate training time
rf_training_total_seconds = rf_train_end - rf_train_start
rf_train_minutes = int(rf_training_total_seconds // 60)
rf_train_seconds = rf_training_total_seconds % 60

cm = confusion_matrix(y_test, rfm_reduced_ypred)
print("\nConfusion Matrix:")
print(cm)

tn, fp, fn, tp = cm.ravel()
print(f"\nTrue Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")
print(f"True Positives (TP): {tp}")

print(f"Accuracy: {accuracy_score(y_test, rfm_reduced_ypred):.4f}")
rfmy_pred_proba = np.array(rfm_reduced.predict_proba(X_test_vt))[:, 1]  # Probabilities for the positive class
print(f"AUC: {roc_auc_score(y_test, rfmy_pred_proba):.4f}")
print(f"Precision: {precision_score(y_test, rfm_reduced_ypred):.4f}")
print(f"Recall: {recall_score(y_test, rfm_reduced_ypred):.4f}")
print(f"F1-Score: {f1_score(y_test, rfm_reduced_ypred):.4f}")
print(f"Cohen's Kappa Score: {cohen_kappa_score(y_test, rfm_reduced_ypred):.4f}")
print(f"Training Time: {rf_train_minutes} minutes and {rf_train_seconds:.2f} seconds")
print(f"Inference Time: {rf_infer_minutes} minutes and {rf_infer_seconds:.2f} seconds")


Confusion Matrix:
[[97881  2119]
 [ 4083 95917]]

True Negatives (TN): 97881
False Positives (FP): 2119
False Negatives (FN): 4083
True Positives (TP): 95917
Accuracy: 0.9690
AUC: 0.9949
Precision: 0.9784
Recall: 0.9592
F1-Score: 0.9687
Cohen's Kappa Score: 0.9380
Training Time: 3 minutes and 13.83 seconds
Inference Time: 0 minutes and 3.64 seconds


**Putting the Results into Perspective**

**1,85% increase in accuracy over the DL Model, which translates to 3700 more correctly labeled samples (2592 false positives correctly reclassified as true negatives; 1101 false negatives correctly reclassified as true positives)**

**This Random Forest model trains 7,1 times faster than the DL Model (3 minutes and 13,83 seconds vs. 22 minutes and 55,58 seconds)**


**The inference time for the 200,000 samples is 1,67 times faster than the DL Model (6,08 seconds vs. 3,64 seconds)**

**PART IV: Training and Evaluating the LightGBM Model on the Feature-Reduced Dataset**

**Training the LightGBM Model on the Feature-Reduced Training Data**

In [67]:
lgbm_reduced = lgb.LGBMClassifier() #boosting_type='gbdt; num_leaves=31: (Maximum number of leaves in one tree); max_depth=-1: (No limit on tree depth); learning_rate=0.1; n_estimators=100
lgb_train_start = time.time()
lgbm_reduced.fit(X_train_vt, y_train)
lgb_train_end = time.time()

[LightGBM] [Info] Number of positive: 300000, number of negative: 300000
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.406354 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 60340
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 1658
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


**Evaluating the LightGBM Model on the Feature-Reduced Testing Data**

In [68]:
lgb_training_total_seconds = lgb_train_end - lgb_train_start
lgb_train_minutes = int(lgb_training_total_seconds // 60)
lgb_train_seconds = lgb_training_total_seconds % 60

with warnings.catch_warnings():
    warnings.simplefilter("ignore", UserWarning)
    lgb_infer_start = time.time()
    lgb_reduced_ypred = lgbm_reduced.predict(X_test_vt)
    lgb_infer_end = time.time()
    lgby_pred_proba = np.array(lgbm_reduced.predict_proba(X_test_vt))[:, 1]  # Probabilities for the positive class

lgb_infer_total_seconds = lgb_infer_end - lgb_infer_start
lgb_infer_minutes = int(lgb_infer_total_seconds // 60)
lgb_infer_seconds = lgb_infer_total_seconds % 60

cm = confusion_matrix(y_test, lgb_reduced_ypred)
print("\nConfusion Matrix:")
print(cm)

tn, fp, fn, tp = cm.ravel()
print(f"\nTrue Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")
print(f"True Positives (TP): {tp}")

print(f"Accuracy: {accuracy_score(y_test, lgb_reduced_ypred):.4f}")
print(f"AUC: {roc_auc_score(y_test, lgby_pred_proba):.4f}")
print(f"Precision: {precision_score(y_test, lgb_reduced_ypred):.4f}")
print(f"Recall: {recall_score(y_test, lgb_reduced_ypred):.4f}")
print(f"F1-Score: {f1_score(y_test, lgb_reduced_ypred):.4f}")
print(f"Cohen's Kappa Score: {cohen_kappa_score(y_test, lgb_reduced_ypred):.4f}")
print(f"Training Time: {lgb_train_minutes} minutes and {lgb_train_seconds:.2f} seconds")
print(f"Inference Time: {lgb_infer_minutes} minutes and {lgb_infer_seconds:.2f} seconds")


Confusion Matrix:
[[91211  8789]
 [ 5097 94903]]

True Negatives (TN): 91211
False Positives (FP): 8789
False Negatives (FN): 5097
True Positives (TP): 94903
Accuracy: 0.9306
AUC: 0.9833
Precision: 0.9152
Recall: 0.9490
F1-Score: 0.9318
Cohen's Kappa Score: 0.8611
Training Time: 0 minutes and 6.33 seconds
Inference Time: 0 minutes and 0.21 seconds


**Putting the Results into Perspective**

**The accuracy is 1,99% worse than the accuracy of the DL model, which means 3980 less correctly labeled samples (4078 less true negatives, 4078 more false positives, 87 less false negatives, 87 more true positives)**

**This LightGBM model was trained 217,31 times faster than the DL Model (6,33 seconds vs 22 minutes and 55,58 seconds)**

**The inference time for the 200,000 samples is 28,95 times faster than the DL Model (6,08 seconds vs. 0,21 seconds)**

**PART V: Training and Evaluating the Random Forest Model on the Full Dataset**

In [58]:
rfm_full = RandomForestClassifier(n_estimators=100) #max_depth=None, min_samples_split=2
rf_train_start = time.time()
rfm_full.fit(X_train, y_train)
rf_train_end = time.time()

In [61]:
# Measure inference time
rf_infer_start = time.time()
rfm_full_ypred = rfm_full.predict(X_test)
rf_infer_end = time.time()

# Calculate inference time
rf_inference_total_seconds = rf_infer_end - rf_infer_start
rf_infer_minutes = int(rf_inference_total_seconds // 60)
rf_infer_seconds = rf_inference_total_seconds % 60

# Calculate training time
rf_training_total_seconds = rf_train_end - rf_train_start
rf_train_minutes = int(rf_training_total_seconds // 60)
rf_train_seconds = rf_training_total_seconds % 60

cm = confusion_matrix(y_test, rfm_full_ypred)
print("\nConfusion Matrix:")
print(cm)

tn, fp, fn, tp = cm.ravel()
print(f"\nTrue Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")
print(f"True Positives (TP): {tp}")

print(f"Accuracy: {accuracy_score(y_test, rfm_full_ypred):.4f}")
rfmy_pred_proba = np.array(rfm_full.predict_proba(X_test))[:, 1]  # Probabilities for the positive class
print(f"AUC: {roc_auc_score(y_test, rfmy_pred_proba):.4f}")
print(f"Precision: {precision_score(y_test, rfm_full_ypred):.4f}")
print(f"Recall: {recall_score(y_test, rfm_full_ypred):.4f}")
print(f"F1-Score: {f1_score(y_test, rfm_full_ypred):.4f}")
print(f"Cohen's Kappa Score: {cohen_kappa_score(y_test, rfm_full_ypred):.4f}")
print(f"Training Time: {rf_train_minutes} minutes and {rf_train_seconds:.2f} seconds")
print(f"Inference Time: {rf_infer_minutes} minutes and {rf_infer_seconds:.2f} seconds")


Confusion Matrix:
[[95854  4146]
 [ 5291 94709]]

True Negatives (TN): 95854
False Positives (FP): 4146
False Negatives (FN): 5291
True Positives (TP): 94709
Accuracy: 0.9528
AUC: 0.9899
Precision: 0.9581
Recall: 0.9471
F1-Score: 0.9525
Cohen's Kappa Score: 0.9056
Training Time: 16 minutes and 19.46 seconds
Inference Time: 0 minutes and 4.32 seconds


**Putting the Results into Perspective**

**The accuracy is 1,62% worse than the accuracy of the RF model trained on the feature-reduced dataset, which means 3240 less correctly labeled samples (2027 less true negatives, 2027 more false positives, 1208 more false negatives, 1208 less true positives)**

**Training an RF model on the feature-reduced dataset was 5,05 times faster (3 minutes and 13,83 seconds vs. 16 minutes and 19,46 seconds)**

**The inference time for the 200,000 samples for the RF model trained on the feature-reduced dataset is 1,19 faster (3,64 seconds vs. 4,32 seconds)**

**PART VI: Training and Evaluating the LightGBM Model on the Full Dataset**

In [69]:
lgbm_full = lgb.LGBMClassifier() #boosting_type='gbdt; num_leaves=31: (Maximum number of leaves in one tree); max_depth=-1: (No limit on tree depth); learning_rate=0.1; n_estimators=100
lgb_train_start = time.time()
lgbm_full.fit(X_train, y_train)
lgb_train_end = time.time()

[LightGBM] [Info] Number of positive: 300000, number of negative: 300000
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 1.081702 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 213082
[LightGBM] [Info] Number of data points in the train set: 600000, number of used features: 2339
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000


In [70]:
lgb_training_total_seconds = lgb_train_end - lgb_train_start
lgb_train_minutes = int(lgb_training_total_seconds // 60)
lgb_train_seconds = lgb_training_total_seconds % 60

with warnings.catch_warnings():
    warnings.simplefilter("ignore", UserWarning)
    lgb_infer_start = time.time()
    lgb_full_ypred = lgbm_full.predict(X_test)
    lgb_infer_end = time.time()
    lgby_pred_proba = np.array(lgbm_full.predict_proba(X_test))[:, 1]  # Probabilities for the positive class

lgb_infer_total_seconds = lgb_infer_end - lgb_infer_start
lgb_infer_minutes = int(lgb_infer_total_seconds // 60)
lgb_infer_seconds = lgb_infer_total_seconds % 60

cm = confusion_matrix(y_test, lgb_full_ypred)
print("\nConfusion Matrix:")
print(cm)

tn, fp, fn, tp = cm.ravel()
print(f"\nTrue Negatives (TN): {tn}")
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")
print(f"True Positives (TP): {tp}")

print(f"Accuracy: {accuracy_score(y_test, lgb_full_ypred):.4f}")
print(f"AUC: {roc_auc_score(y_test, lgby_pred_proba):.4f}")
print(f"Precision: {precision_score(y_test, lgb_full_ypred):.4f}")
print(f"Recall: {recall_score(y_test, lgb_full_ypred):.4f}")
print(f"F1-Score: {f1_score(y_test, lgb_full_ypred):.4f}")
print(f"Cohen's Kappa Score: {cohen_kappa_score(y_test, lgb_full_ypred):.4f}")
print(f"Training Time: {lgb_train_minutes} minutes and {lgb_train_seconds:.2f} seconds")
print(f"Inference Time: {lgb_infer_minutes} minutes and {lgb_infer_seconds:.2f} seconds")


Confusion Matrix:
[[92342  7658]
 [ 4490 95510]]

True Negatives (TN): 92342
False Positives (FP): 7658
False Negatives (FN): 4490
True Positives (TP): 95510
Accuracy: 0.9393
AUC: 0.9858
Precision: 0.9258
Recall: 0.9551
F1-Score: 0.9402
Cohen's Kappa Score: 0.8785
Training Time: 0 minutes and 18.42 seconds
Inference Time: 0 minutes and 0.18 seconds


**Putting the Results into Perspective**

**The accuracy is 0,87% better than the accuracy of the LightGBM model trained on the feature-reduced dataset (1131 false positives correctly reclassified as true negatives; 607 false negatives correctly reclassified as true positives).**

**It underperforms both RF models (the one trained on the feature-reduced dataset as well as the one trained on the full dataset).**

**It took 2,9 times more seconds to train this LightGBM model compared to the LightGBM model trained on the feature-reduced dataset (18,42 seconds vs. 6,33 seconds)**

**The inference time is about the same (0,21 seconds for the LightGBM model trained on the feature-reduced dataset vs. 0,18 seconds for this model)**