In [1]:
!pip install imbalanced-learn




In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [5]:
df=pd.read_csv("credit_card_fraud_dataset.csv")
df.head()

Unnamed: 0,TransactionID,TransactionDate,Amount,MerchantID,TransactionType,Location,IsFraud
0,1,2024-04-03 14:15:35.462794,4189.27,688,refund,San Antonio,0
1,2,2024-03-19 13:20:35.462824,2659.71,109,refund,Dallas,0
2,3,2024-01-08 10:08:35.462834,784.0,394,purchase,New York,0
3,4,2024-04-13 23:50:35.462850,3514.4,944,purchase,Philadelphia,0
4,5,2024-07-12 18:51:35.462858,369.07,475,purchase,Phoenix,0


In [7]:
df.drop(columns=['TransactionID', 'TransactionDate'], inplace=True)
df = pd.get_dummies(df)  
X = df.drop('IsFraud', axis=1)
y = df['IsFraud']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

In [9]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

def build_ann(input_shape):
    model = Sequential([
        Dense(32, activation='relu', input_shape=(input_shape,)),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [15]:
import warnings
warnings.filterwarnings('ignore')
model_base = build_ann(X_train_scaled.shape[1])
model_base.fit(X_train_scaled, y_train, epochs=10, batch_size=32, verbose=0)
y_pred_base = (model_base.predict(X_test_scaled) > 0.5).astype("int32")

print("\n Classification Report (Before Balancing):")
report_before = classification_report(y_test, y_pred_base, output_dict=True)
print(classification_report(y_test, y_pred_base))

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 584us/step

 Classification Report (Before Balancing):
              precision    recall  f1-score   support

           0       0.99      1.00      0.99     19800
           1       0.00      0.00      0.00       200

    accuracy                           0.99     20000
   macro avg       0.49      0.50      0.50     20000
weighted avg       0.98      0.99      0.99     20000



In [17]:
sm = SMOTE(random_state=42)
X_train_bal, y_train_bal = sm.fit_resample(X_train_scaled, y_train)

model_bal = build_ann(X_train_bal.shape[1])
model_bal.fit(X_train_bal, y_train_bal, epochs=10, batch_size=32, verbose=0)
y_pred_bal = (model_bal.predict(X_test_scaled) > 0.5).astype("int32")

print("\nClassification Report (After Balancing with SMOTE):")
report_after = classification_report(y_test, y_pred_bal, output_dict=True)
print(classification_report(y_test, y_pred_bal))


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 705us/step

Classification Report (After Balancing with SMOTE):
              precision    recall  f1-score   support

           0       0.99      0.61      0.76     19800
           1       0.01      0.40      0.02       200

    accuracy                           0.61     20000
   macro avg       0.50      0.50      0.39     20000
weighted avg       0.98      0.61      0.75     20000



In [19]:
comparison = pd.DataFrame({
    'Metric': ['Precision', 'Recall', 'F1-Score'],
    'Before Balancing': [
        round(report_before['1']['precision'], 4),
        round(report_before['1']['recall'], 4),
        round(report_before['1']['f1-score'], 4)
    ],
    'After SMOTE': [
        round(report_after['1']['precision'], 4),
        round(report_after['1']['recall'], 4),
        round(report_after['1']['f1-score'], 4)
    ]
})

print("\n Performance Comparison:")
print(comparison.to_string(index=False))


 Performance Comparison:
   Metric  Before Balancing  After SMOTE
Precision               0.0       0.0102
   Recall               0.0       0.3950
 F1-Score               0.0       0.0200


In [21]:
print("\n Analysis:")
print("SMOTE (Synthetic Minority Oversampling) helped balance the dataset by generating synthetic minority class samples.")
print("This often improves recall and F1-score, especially for fraud detection where missing a fraud case is critical.")
print("However, it may reduce precision slightly due to more false positives.")


 Analysis:
SMOTE (Synthetic Minority Oversampling) helped balance the dataset by generating synthetic minority class samples.
This often improves recall and F1-score, especially for fraud detection where missing a fraud case is critical.
However, it may reduce precision slightly due to more false positives.
