In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.utils import resample
from imblearn.over_sampling import SMOTE

In [2]:
data = pd.read_csv('C:/Users/koona/OneDrive/Pictures/JAYACHANDRA/Brainwave_Matrix_Intern/TASK 2/credit_card_fraud/creditcard_2023.csv')

In [14]:
print(data.head())
print(data.info())
print(data['Class'].value_counts())

   id        V1        V2        V3        V4        V5        V6        V7  \
0   0 -0.260648 -0.469648  2.496266 -0.083724  0.129681  0.732898  0.519014   
1   1  0.985100 -0.356045  0.558056 -0.429654  0.277140  0.428605  0.406466   
2   2 -0.260272 -0.949385  1.728538 -0.457986  0.074062  1.419481  0.743511   
3   3 -0.152152 -0.508959  1.746840 -1.090178  0.249486  1.143312  0.518269   
4   4 -0.206820 -0.165280  1.527053 -0.448293  0.106125  0.530549  0.658849   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0 -0.130006  0.727159  ... -0.110552  0.217606 -0.134794  0.165959  0.126280   
1 -0.133118  0.347452  ... -0.194936 -0.605761  0.079469 -0.577395  0.190090   
2 -0.095576 -0.261297  ... -0.005020  0.702906  0.945045 -1.154666 -0.605564   
3 -0.065130 -0.205698  ... -0.146927 -0.038212 -0.214048 -1.893131  1.003963   
4 -0.212660  1.049921  ... -0.106984  0.729727 -0.161666  0.312561 -0.414116   

        V26       V27       V28    Amount  C

In [4]:
X = data.drop('Class', axis=1)  
y = data['Class']  

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [6]:
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [7]:
print("Before resampling:/n", y_train.value_counts())
print("After resampling:/n", pd.Series(y_train_resampled).value_counts())

Before resampling:/n Class
0    227452
1    227452
Name: count, dtype: int64
After resampling:/n Class
0    227452
1    227452
Name: count, dtype: int64


In [8]:
model = RandomForestClassifier(random_state=42, n_estimators=100, max_depth=10, class_weight='balanced')
model.fit(X_train_resampled, y_train_resampled)

In [9]:
y_pred = model.predict(X_test)

In [10]:
print("Confusion Matrix:/n", confusion_matrix(y_test, y_pred))
print("Classification Report:/n", classification_report(y_test, y_pred))
print("ROC-AUC Score:", roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]))

Confusion Matrix:/n [[56855     8]
 [   16 56847]]
Classification Report:/n               precision    recall  f1-score   support

           0       1.00      1.00      1.00     56863
           1       1.00      1.00      1.00     56863

    accuracy                           1.00    113726
   macro avg       1.00      1.00      1.00    113726
weighted avg       1.00      1.00      1.00    113726

ROC-AUC Score: 0.9999961146171176


In [11]:
import joblib
joblib.dump(model, 'credit_card_fraud_model.pkl')

['credit_card_fraud_model.pkl']

In [13]:
# Future predictions
loaded_model = joblib.load('credit_card_fraud_model.pkl')
sample_transaction = X_test.iloc[0].values.reshape(1, -1)
prediction = loaded_model.predict(sample_transaction)
print("Fraudulent" if prediction[0] == 1 else "Non-fraudulent")

Fraudulent


