In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

In [None]:
df = pd.read_csv("creditcard.csv")

In [None]:
print(df.head())
print(df["Class"].value_counts())

   Time        V1        V2        V3        V4        V5        V6        V7  \
0     0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1     0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2     1 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3     1 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4     2 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

In [None]:
X = df.drop("Class", axis=1)
y = df["Class"]

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
!pip install imblearn

Collecting imblearn
  Downloading imblearn-0.0-py2.py3-none-any.whl.metadata (355 bytes)
Downloading imblearn-0.0-py2.py3-none-any.whl (1.9 kB)
Installing collected packages: imblearn
Successfully installed imblearn-0.0


In [None]:
# Check for NaNs in y
print("Number of NaNs in y:", y.isna().sum())

# Drop rows where y is NaN (and corresponding X rows)
if y.isna().sum() > 0:
    non_nan_indices = ~y.isna()
    X = X[non_nan_indices]
    y = y[non_nan_indices]


Number of NaNs in y: 1


In [None]:
# Drop rows where the target 'Class' is NaN
df = df.dropna(subset=["Class"])


In [None]:
# Prepare features and labels again
X = df.drop("Class", axis=1)
y = df["Class"]


In [None]:
print("Any NaNs left in y?", y.isna().sum())


Any NaNs left in y? 0


In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42, k_neighbors=1)
X_resampled, y_resampled = smote.fit_resample(X_scaled, y)
print("After resampling:", np.bincount(y_resampled))


After resampling: [3970 3970]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.3, random_state=42
)

In [None]:
log_model = LogisticRegression(max_iter=1000)
log_model.fit(X_train, y_train)


In [None]:
y_pred_log = log_model.predict(X_test)
print("Logistic Regression Results:")
print(classification_report(y_test, y_pred_log))
print(confusion_matrix(y_test, y_pred_log))


Logistic Regression Results:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      1206
         1.0       1.00      1.00      1.00      1176

    accuracy                           1.00      2382
   macro avg       1.00      1.00      1.00      2382
weighted avg       1.00      1.00      1.00      2382

[[1203    3]
 [   0 1176]]


In [None]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)


In [None]:
y_pred_rf = rf_model.predict(X_test)
print("Random Forest Results:")
print(classification_report(y_test, y_pred_rf))
print(confusion_matrix(y_test, y_pred_rf))


Random Forest Results:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      1206
         1.0       1.00      1.00      1.00      1176

    accuracy                           1.00      2382
   macro avg       1.00      1.00      1.00      2382
weighted avg       1.00      1.00      1.00      2382

[[1206    0]
 [   0 1176]]
