In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score
from sklearn.svm import SVC
from imblearn.under_sampling import RandomUnderSampler

In [2]:
df = pd.read_csv("creditcard.csv")

In [3]:
# Define the feature columns and target column (assuming 'class' is the target variable)
X = df.drop('Class', axis=1)  # Features (drop the target column)
y = df['Class']  # Target variable (fraud labels)

In [4]:
# Split dataset (Stratify to maintain class distribution)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [5]:
# Apply Scaling to Features (NOT the target column)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test) 

In [6]:
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)
svm_model.fit(X_train_scaled, y_train)

In [7]:
y_pred = svm_model.predict(X_test_scaled)

In [8]:
# Generate classification report as a dictionary
report = classification_report(y_test, y_pred, output_dict=True)

# Extract precision and recall for the '1' class (fraudulent transactions)
precision = report['1']['precision']
recall = report['1']['recall']

# Store results in a DataFrame
results_df = pd.DataFrame({"Precision": [precision], "Recall": [recall]}, index=["SVM"])
print(results_df)

     Precision    Recall
SVM   0.957746  0.693878


In [9]:
from imblearn.combine import SMOTEENN

# Apply SMOTE + Edited Nearest Neighbors (Undersampling)
smoteenn = SMOTEENN(n_jobs=-1, random_state=42)
X_train_balanced, y_train_balanced = smoteenn.fit_resample(X_train_scaled, y_train)

# Train the SVM model on the hybrid-resampled data
svm_smoteenn = SVC(class_weight='balanced')
svm_smoteenn.fit(X_train_balanced, y_train_balanced)

# Evaluate the model on the test data
y_pred_smoteenn = svm_smoteenn.predict(X_test_scaled)

# Compute Precision & Recall
precision = precision_score(y_test, y_pred_smoteenn)
recall = recall_score(y_test, y_pred_smoteenn)

# Store results
results_df.loc["SVM (Balanced)"] = [precision, recall]

# Print results
print(results_df)

                Precision    Recall
SVM              0.957746  0.693878
SVM (Balanced)   0.086139  0.887755


In [10]:
from sklearn.metrics import confusion_matrix

In [19]:
# Compute confusion matrix
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
print(f"False Positives (FP): {fp}")
print(f"False Negatives (FN): {fn}")

False Positives (FP): 3
False Negatives (FN): 30


In [15]:
# Define costs
FP_cost = 5
FN_cost = 50

In [18]:
# Compute total cost
total_cost = (fp * FP_cost) + (fn * FN_cost)

print(f"Total Cost of Misclassification: £{total_cost}")

Total Cost of Misclassification: £1515


In [21]:
import time

In [22]:
start_pred_time = time.time()
y_pred = svm_smoteenn.predict(X_test)
end_pred_time = time.time()

pred_time = end_pred_time - start_pred_time
print(f"Prediction Time: {pred_time:.4f} seconds")



Prediction Time: 294.2073 seconds
