In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, precision_recall_curve, auc


In [2]:
# Load the dataset
data = pd.read_csv('creditcard.csv')


In [3]:
# Preprocessing
# Scale 'Amount'
scaler = StandardScaler()
data['Amount'] = scaler.fit_transform(data['Amount'].values.reshape(-1, 1))


In [4]:
# Drop 'Time' as it's not used in this example
data.drop(['Time'], axis=1, inplace=True)


In [6]:
data = data.dropna()
y = data['Class']



In [7]:
y = y.fillna(y.median())


In [9]:
# Split the dataset
X = data.drop('Class', axis=1)
y = data['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)


In [10]:
# Model Selection
clf = RandomForestClassifier(random_state=42, class_weight='balanced')


In [11]:
# Model Evaluation
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00     41538
         1.0       0.98      0.80      0.88        79

    accuracy                           1.00     41617
   macro avg       0.99      0.90      0.94     41617
weighted avg       1.00      1.00      1.00     41617



In [12]:
# Calculate AUPRC
precision, recall, _ = precision_recall_curve(y_test, y_pred)
auprc = auc(recall, precision)
print(f"AUPRC: {auprc}")


AUPRC: 0.8911139063523213
