In [1]:
# Load train and validation sets and separate features & target
import pandas as pd

train = pd.read_csv('/home/danial/Data Science/Fraud Detection/Data/splits/train.csv')
val = pd.read_csv('/home/danial/Data Science/Fraud Detection/Data/splits/validation.csv')

X_train = train.drop('Class', axis=1)
y_train = train['Class']

X_val = val.drop('Class', axis=1)
y_val = val['Class']


In [2]:
# Create Pipeline for Logistic Regression with scaling
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

lr_pipeline = Pipeline([
    ('scaler', StandardScaler()),   # Scale numeric features
    ('classifier', LogisticRegression(max_iter=1000, random_state=42))
])


In [3]:
# Train Logistic Regression pipeline
lr_pipeline.fit(X_train, y_train)

# Predict on train and validation
y_train_pred_lr = lr_pipeline.predict(X_train)
y_val_pred_lr = lr_pipeline.predict(X_val)

# Evaluate performance
from sklearn.metrics import accuracy_score, f1_score

print("Logistic Regression - Train Accuracy:", accuracy_score(y_train, y_train_pred_lr),
      "F1-Score:", f1_score(y_train, y_train_pred_lr))
print("Logistic Regression - Validation Accuracy:", accuracy_score(y_val, y_val_pred_lr),
      "F1-Score:", f1_score(y_val, y_val_pred_lr))


Logistic Regression - Train Accuracy: 0.9992145192249186 F1-Score: 0.74822695035461
Logistic Regression - Validation Accuracy: 0.9990707039417641 F1-Score: 0.7096774193548387


In [4]:
# Create Pipeline for Decision Tree (no scaling needed)
from sklearn.tree import DecisionTreeClassifier

dt_pipeline = Pipeline([
    ('classifier', DecisionTreeClassifier(random_state=42))
])

# Train Decision Tree pipeline
dt_pipeline.fit(X_train, y_train)

# Predict on train and validation
y_train_pred_dt = dt_pipeline.predict(X_train)
y_val_pred_dt = dt_pipeline.predict(X_val)

# Evaluate performance
print("Decision Tree - Train Accuracy:", accuracy_score(y_train, y_train_pred_dt),
      "F1-Score:", f1_score(y_train, y_train_pred_dt))
print("Decision Tree - Validation Accuracy:", accuracy_score(y_val, y_val_pred_dt),
      "F1-Score:", f1_score(y_val, y_val_pred_dt))


Decision Tree - Train Accuracy: 1.0 F1-Score: 1.0
Decision Tree - Validation Accuracy: 0.9991739590593459 F1-Score: 0.7746478873239436


In [5]:
# Save trained pipelines for later use
import joblib

joblib.dump(lr_pipeline, '/home/danial/Data Science/Fraud Detection/Models/lr_pipeline.pkl')
joblib.dump(dt_pipeline, '/home/danial/Data Science/Fraud Detection/Models/dt_pipeline.pkl')


['/home/danial/Data Science/Fraud Detection/Models/dt_pipeline.pkl']