In [None]:
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

from tensorflow import keras
from tensorflow.keras.models import load_model

# Measure script execution time
t0 = time.time()

# Set visualization style
sns.set_style("whitegrid")

t1 = time.time()
print("Time consumed for imports:", t1 - t0, "seconds")

# Load dataset
data = pd.read_excel("ProjectCreditCard.xlsx")
print(data.head())

t2 = time.time()
print("Time consumed for loading data:", t2 - t1, "seconds")

# Basic dataset exploration
data.info()
pd.set_option("display.float", "{:.2f}".format)
print("Total missing values:", data.isnull().sum().sum())
print("Dataset columns:", data.columns)

t3 = time.time()
print("Time consumed for initial exploration:", t3 - t2, "seconds")

# Class distribution visualization
LABELS = ["Normal", "Fraud"]
count_classes = data['Class'].value_counts()
count_classes.plot(kind='bar', rot=0)
plt.title("Transaction Class Distribution")
plt.xticks(range(2), LABELS)
plt.xlabel("Class")
plt.ylabel("Frequency")
plt.show()

# Data separation
fraud = data[data['Class'] == 1]
normal = data[data['Class'] == 0]
print(f"Fraudulent transactions: {fraud.shape}")
print(f"Non-fraudulent transactions: {normal.shape}")

t4 = time.time()
print("Time consumed for data separation:", t4 - t3, "seconds")

# Data preprocessing
X = data.drop('Class', axis=1)
y = data['Class']
X_train_v, X_test, y_train_v, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_train, X_validate, y_train, y_validate = train_test_split(X_train_v, y_train_v, test_size=0.2, random_state=42)

# Standardization
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_validate = scaler.transform(X_validate)
X_test = scaler.transform(X_test)

# Save the fitted scaler
joblib.dump(scaler, "scaler.pkl")
print("Scaler saved successfully!")

class_counts = y_train.value_counts()
print("Class counts:", class_counts)

w_p = class_counts.get(0, 0) / len(y_train)  # Non-fraud weight
w_n = class_counts.get(1, 0) / len(y_train)  # Fraud weight

print(f"Fraud weight: {w_n}, Non-Fraud weight: {w_p}")

t6 = time.time()
print("Time consumed for preprocessing:", t6 - t4, "seconds")

# ANN Model
model = keras.Sequential([
    keras.layers.Dense(256, activation='relu', input_shape=(X_train.shape[-1],)),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(1, activation='sigmoid'),
])

model.compile(optimizer=keras.optimizers.Adam(1e-4), loss='binary_crossentropy',
              metrics=[keras.metrics.Precision(), keras.metrics.Recall()])

model.summary()

# Callbacks and training
callbacks = [keras.callbacks.ModelCheckpoint('fraud_model_at_epoch_{epoch}.keras')]
class_weight = {0: w_p, 1: w_n}

r = model.fit(X_train, y_train, validation_data=(X_validate, y_validate),
              batch_size=2048, epochs=300, callbacks=callbacks, class_weight=class_weight)

t9 = time.time()
print("Time consumed for training:", t9 - t6, "seconds")

# Save the trained model
model.save("fraud_model.h5")
print("Model saved successfully!")

# Model Evaluation
score = model.evaluate(X_test, y_test)
print("Model evaluation:", score)

# Plot loss
plt.figure(figsize=(12, 6))
plt.plot(r.history['loss'], label='Loss')
plt.plot(r.history['val_loss'], label='Val Loss')
plt.title("Loss over epochs")
plt.legend()
plt.show()

# Reload and test model
print("Reloading the model for validation...")
fraud_model = load_model("fraud_model.h5")

# Recompile the model to include metrics
fraud_model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy", "Precision", "Recall"])

# Load the saved scaler
scaler = joblib.load("scaler.pkl")

# Apply the same preprocessing
X_test = scaler.transform(X_test)

# Make predictions
y_pred = fraud_model.predict(X_test)
y_pred = (y_pred > 0.5).astype(int)  # Convert probabilities to binary class labels

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.4f}")

# Display confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:
", conf_matrix)

# Classification report
class_report = classification_report(y_test, y_pred, target_names=["Normal", "Fraud"])
print("Classification Report:
", class_report)
