# Credit Card Fraud Detection with Logistic Regression

### 1. Dataset Overview
This dataset contains transaction details including 28 anonymized features (V1-V28), time, amount, and a binary target column (`Class`).

In [4]:

# Load the dataset and libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
import joblib

# Load dataset
file_path = 'creditcard.csv'
data = pd.read_csv(file_path)

# Target variable and feature scaling
scaler = StandardScaler()
target = 'Class'
X = data.drop(columns=[target])
y = data[target]
X['Amount'] = scaler.fit_transform(X[['Amount']])

# Save the scaler for later use
scaler_path = "scaler.pkl"
joblib.dump(scaler, scaler_path)
    

['scaler.pkl']

### 2. Data Sampling for Performance
We sample 20,000 rows due to computational constraints. This will be enough however to train our model propperly.

In [7]:

# Sample dataset
subset_size = 20000
data_subset = data.sample(n=subset_size, random_state=42)
X_subset = data_subset.drop(columns=[target])
y_subset = data_subset[target]
X_subset['Amount'] = scaler.fit_transform(X_subset[['Amount']])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_subset, y_subset, test_size=0.2, random_state=42, stratify=y_subset
)
    

### 3. Model Training and Evaluation

In [6]:

# Train logistic regression model
logistic_model = LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42)
logistic_model.fit(X_train, y_train)

# Evaluate the model
y_pred = logistic_model.predict(X_test)
y_pred_proba = logistic_model.predict_proba(X_test)[:, 1]
classification_metrics = classification_report(y_test, y_pred, output_dict=True)
roc_auc = roc_auc_score(y_test, y_pred_proba)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"ROC-AUC Score: {roc_auc}")
print(f"Classification Report: {classification_metrics}")
print(f"Confusion Matrix: {conf_matrix}")

# Save the trained model
model_path = "logistic_model.pkl"
joblib.dump(logistic_model, model_path)

print(f"Scaler saved to {scaler_path}")
print(f"Model saved to {model_path}")

ROC-AUC Score: 0.9555715304252523
Classification Report: {'0': {'precision': 0.9996886351842241, 'recall': 0.96484022838826, 'f1-score': 0.9819553471301865, 'support': 9983}, '1': {'precision': 0.038356164383561646, 'recall': 0.8235294117647058, 'f1-score': 0.07329842931937172, 'support': 17}, 'accuracy': 0.9646, 'macro avg': {'precision': 0.5190223997838929, 'recall': 0.8941848200764829, 'f1-score': 0.5276268882247791, 'support': 10000}, 'weighted avg': {'precision': 0.998054369983863, 'recall': 0.9646, 'f1-score': 0.9804106303699082, 'support': 10000}}
Confusion Matrix: [[9632  351]
 [   3   14]]
Scaler saved to scaler.pkl
Model saved to logistic_model.pkl
