In [10]:
# Load train and validation sets
# Import baseline models and metrics
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, f1_score


train = pd.read_csv('/home/danial/Data Science/Fraud Detection/Data/splits/train.csv')
val = pd.read_csv('/home/danial/Data Science/Fraud Detection/Data/splits/validation.csv')

X_train = train.drop('Class', axis=1)
y_train = train['Class']
X_val = val.drop('Class', axis=1)
y_val = val['Class']


In [11]:
# Train and evaluate Logistic Regression
lr = LogisticRegression(max_iter=1000, random_state=42)
lr.fit(X_train, y_train)
y_val_pred_lr = lr.predict(X_val)

lr_acc = accuracy_score(y_val, y_val_pred_lr)
lr_f1 = f1_score(y_val, y_val_pred_lr)

print(f"Logistic Regression - Accuracy: {lr_acc:.4f}, F1-Score: {lr_f1:.4f}")


Logistic Regression - Accuracy: 0.9991, F1-Score: 0.7200


In [12]:
# Train and evaluate Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_val_pred_dt = dt.predict(X_val)

dt_acc = accuracy_score(y_val, y_val_pred_dt)
dt_f1 = f1_score(y_val, y_val_pred_dt)

print(f"Decision Tree - Accuracy: {dt_acc:.4f}, F1-Score: {dt_f1:.4f}")


Decision Tree - Accuracy: 0.9992, F1-Score: 0.7746


In [13]:
# Train and evaluate Gaussian Naive Bayes
nb = GaussianNB()
nb.fit(X_train, y_train)
y_val_pred_nb = nb.predict(X_val)

nb_acc = accuracy_score(y_val, y_val_pred_nb)
nb_f1 = f1_score(y_val, y_val_pred_nb)

print(f"Naive Bayes - Accuracy: {nb_acc:.4f}, F1-Score: {nb_f1:.4f}")


Naive Bayes - Accuracy: 0.9779, F1-Score: 0.1227


In [14]:
# Compare model performances and select initial candidate
results = {
    "Logistic Regression": {"Accuracy": lr_acc, "F1-Score": lr_f1},
    "Decision Tree": {"Accuracy": dt_acc, "F1-Score": dt_f1},
    "Naive Bayes": {"Accuracy": nb_acc, "F1-Score": nb_f1}
}

for model, scores in results.items():
    print(f"{model}: Accuracy={scores['Accuracy']:.4f}, F1-Score={scores['F1-Score']:.4f}")

best_model = max(results, key=lambda m: results[m]['F1-Score'])
print(f"\nSelected initial candidate model: {best_model}")


Logistic Regression: Accuracy=0.9991, F1-Score=0.7200
Decision Tree: Accuracy=0.9992, F1-Score=0.7746
Naive Bayes: Accuracy=0.9779, F1-Score=0.1227

Selected initial candidate model: Decision Tree
