In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTE
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
import matplotlib.pyplot as plt


In [3]:
data = pd.DataFrame(pd.read_csv('credit_card_fraud_dataset.csv'))


In [4]:
data.loc[4, 'IsFraud'] = 1


In [5]:
data['TransactionDate'] = pd.to_datetime(data['TransactionDate'])
data['Hour'] = data['TransactionDate'].dt.hour
data['DayOfWeek'] = data['TransactionDate'].dt.dayofweek
data['DayOfMonth'] = data['TransactionDate'].dt.day


In [6]:
data = pd.get_dummies(data, columns=['TransactionType', 'Location'])

data = data.drop(['TransactionID', 'TransactionDate'], axis=1)


In [7]:
X = data.drop('IsFraud', axis=1)
y = data['IsFraud']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

In [9]:
def create_model():
    model = Sequential()
    model.add(Dense(16, activation='relu', input_dim=X_train.shape[1]))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = create_model()
model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)

y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)

print("Baseline Model (Imbalanced Data) Performance:")
print(classification_report(y_test, y_pred))





Baseline Model (Imbalanced Data) Performance:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99     29686
           1       0.00      0.00      0.00       314

    accuracy                           0.99     30000
   macro avg       0.49      0.50      0.50     30000
weighted avg       0.98      0.99      0.98     30000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [10]:
rus = RandomUnderSampler(random_state=42)
X_train_rus, y_train_rus = rus.fit_resample(X_train, y_train)

model_rus = create_model()
model_rus.fit(X_train_rus, y_train_rus, epochs=50, batch_size=32, verbose=0)

y_pred_rus = model_rus.predict(X_test)
y_pred_rus = (y_pred_rus > 0.5).astype(int)

print("\nUndersampled Model Performance:")
print(classification_report(y_test, y_pred_rus))


Undersampled Model Performance:
              precision    recall  f1-score   support

           0       0.99      0.49      0.65     29686
           1       0.01      0.51      0.02       314

    accuracy                           0.49     30000
   macro avg       0.50      0.50      0.34     30000
weighted avg       0.98      0.49      0.65     30000



In [11]:
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

model_smote = create_model()
model_smote.fit(X_train_smote, y_train_smote, epochs=50, batch_size=32, verbose=0)

y_pred_smote = model_smote.predict(X_test)
y_pred_smote = (y_pred_smote > 0.5).astype(int)

print("\nSMOTE Oversampled Model Performance:")
print(classification_report(y_test, y_pred_smote))


SMOTE Oversampled Model Performance:
              precision    recall  f1-score   support

           0       0.99      0.71      0.83     29686
           1       0.01      0.32      0.02       314

    accuracy                           0.70     30000
   macro avg       0.50      0.51      0.42     30000
weighted avg       0.98      0.70      0.82     30000

