In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,LabelEncoder
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras import layers, models
from datetime import datetime
from sklearn.metrics import classification_report


In [None]:

df = pd.read_csv('credit_card_fraud_dataset.csv')  
df.head()

Unnamed: 0,TransactionID,TransactionDate,Amount,MerchantID,TransactionType,Location,IsFraud
0,1,2024-04-03 14:15:35.462794,4189.27,688,refund,San Antonio,0
1,2,2024-03-19 13:20:35.462824,2659.71,109,refund,Dallas,0
2,3,2024-01-08 10:08:35.462834,784.0,394,purchase,New York,0
3,4,2024-04-13 23:50:35.462850,3514.4,944,purchase,Philadelphia,0
4,5,2024-07-12 18:51:35.462858,369.07,475,purchase,Phoenix,0


In [None]:
df = df.drop(['TransactionID', 'TransactionDate'], axis=1)

df['TransactionType'] = LabelEncoder().fit_transform(df['TransactionType'])  
df['Location'] = LabelEncoder().fit_transform(df['Location'])  


X = df.drop('IsFraud', axis=1)
y = df['IsFraud']


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, stratify=y, random_state=42)


In [None]:

sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)


In [None]:

model = models.Sequential([
    layers.Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])


In [20]:

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [None]:

log_dir = "logs/fraud_" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_cb = tf.keras.callbacks.TensorBoard(log_dir=log_dir)


In [None]:

history = model.fit(X_train_res, y_train_res,
                    epochs=10,
                    batch_size=2048,
                    validation_data=(X_test, y_test),
                    callbacks=[tensorboard_cb])


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:


model_unbalanced = tf.keras.models.clone_model(model)
model_unbalanced.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_unbalanced.fit(X_train, y_train, epochs=10, batch_size=2048, verbose=0)
y_pred_unbalanced = (model_unbalanced.predict(X_test) > 0.5).astype(int)

y_pred_balanced = (model.predict(X_test) > 0.5).astype(int)

print("Before SMOTE:")
print(classification_report(y_test, y_pred_unbalanced))

print("After SMOTE:")
print(classification_report(y_test, y_pred_balanced))


Before SMOTE:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99     19800
           1       0.00      0.00      0.00       200

    accuracy                           0.99     20000
   macro avg       0.49      0.50      0.50     20000
weighted avg       0.98      0.99      0.99     20000

After SMOTE:
              precision    recall  f1-score   support

           0       0.99      0.66      0.79     19800
           1       0.01      0.31      0.02       200

    accuracy                           0.66     20000
   macro avg       0.50      0.49      0.41     20000
weighted avg       0.98      0.66      0.78     20000



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
