In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import class_weight
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping


DATASET = "kaggle_norm"   

def load_data(dataset_choice="kaggle_raw"):
    if dataset_choice == "kaggle_raw":
        df = pd.read_csv("creditcard.csv")
        X = df.drop("Class", axis=1).values
        y = df["Class"].values

    elif dataset_choice == "kaggle_norm":
        
        df = pd.read_csv("creditcardfraud_normalised.csv")
       
        X = df.drop("Class", axis=1).values
        y = df["Class"].values

    elif dataset_choice == "devnet":
        df = pd.read_csv("fraud.csv")
        X = df.drop("label", axis=1).values
        y = df["label"].values

    else:
        raise ValueError("dataset_choice must be 'kaggle_raw', 'kaggle_norm', or 'devnet'")

    return X, y

# -------------------------------------------------
# 2. Load chosen dataset
# -------------------------------------------------
X, y = load_data(DATASET)
print("Dataset:", DATASET)
print("X shape:", X.shape, "fraud ratio:", y.mean())


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


class_weights_arr = class_weight.compute_class_weight(
    class_weight="balanced",
    classes=np.unique(y_train),
    y=y_train
)
class_weights = {i: class_weights_arr[i] for i in range(len(class_weights_arr))}
print("Class weights:", class_weights)


input_dim = X_train.shape[1]

model = Sequential([
    Dense(64, activation="relu", input_shape=(input_dim,)),
    Dropout(0.3),
    Dense(32, activation="relu"),
    Dropout(0.3),
    Dense(16, activation="relu"),
    Dense(1, activation="sigmoid")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

model.summary()

early_stop = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=2048,
    validation_split=0.2,
    class_weight=class_weights,
    callbacks=[early_stop],
    verbose=1
)

y_pred_prob = model.predict(X_test).ravel()
y_pred = (y_pred_prob >= 0.5).astype(int)

print("ROC-AUC:", roc_auc_score(y_test, y_pred_prob))
print("\nConfusion matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification report:\n", classification_report(y_test, y_pred, digits=4))
