In [10]:
!pip install imbalanced-learn

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

# Load dataset
df = pd.read_csv("creditcard.csv")

# Clean Class column
df['Class'] = pd.to_numeric(df['Class'], errors='coerce')
df = df.dropna(subset=['Class'])
df['Class'] = df['Class'].astype(int)

print("Class Distribution:")
print(df['Class'].value_counts())

# Split features & target
X = df.drop('Class', axis=1)
y = df['Class']

# Normalize data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-Test split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# Handle imbalance using SMOTE
sm = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = sm.fit_resample(X_train, y_train)

# Train Logistic Regression
model = LogisticRegression()
model.fit(X_train_resampled, y_train_resampled)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Class Distribution:
Class
0    49461
1      148
Name: count, dtype: int64

Confusion Matrix:
[[9754  135]
 [   0   33]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99      9889
           1       0.20      1.00      0.33        33

    accuracy                           0.99      9922
   macro avg       0.60      0.99      0.66      9922
weighted avg       1.00      0.99      0.99      9922

