In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/fruaddfdata/balanced_fraud_df.csv


In [2]:
!pip install mlflow


Collecting mlflow
  Downloading mlflow-2.17.1-py3-none-any.whl.metadata (29 kB)
Collecting mlflow-skinny==2.17.1 (from mlflow)
  Downloading mlflow_skinny-2.17.1-py3-none-any.whl.metadata (30 kB)
Collecting graphene<4 (from mlflow)
  Downloading graphene-3.4.1-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting cachetools<6,>=5.0.0 (from mlflow-skinny==2.17.1->mlflow)
  Downloading cachetools-5.5.0-py3-none-any.whl.metadata (5.3 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.17.1->mlflow)
  Downloading databricks_sdk-0.36.0-py3-none-any.whl.metadata (38 kB)
Collecting graphql-core<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading graphql_core-3.2.5-py3-none-any.whl.metadata (10 kB)
Collecting graphql-relay<3.3,>=3.1 (from graphene<4->mlflow)
  Downloading graphql_relay-3.2.0-py3-none-any.whl.metadata (12 kB)
Downloading mlflow-2.17.1-py3-none-any.whl (26.7 MB)
[2K   [9

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, SimpleRNN, LSTM, Dropout, Input
from tensorflow.keras.utils import to_categorical
import shap  # For model interpretability
import mlflow  # For experiment tracking
import mlflow.keras  # To log Keras models with MLflow
import logging  # For logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class ModelEvaluator:
    def __init__(self, data_path, target_column, scale_data=True):
        self.data = pd.read_csv(data_path)
        self.target_column = target_column
        self.scale_data = scale_data
        self.results = {}  # To store model evaluation results
        self.prepare_data()

    def prepare_data(self):
        X = self.data.drop(columns=[self.target_column])
        y = self.data[self.target_column]

        # Convert categorical target to numeric (for classification) if needed
        if y.dtype == 'O':  # Check if target is categorical
            y = pd.Categorical(y).codes
        y = to_categorical(y)  # Convert to one-hot for multi-class classification

        # Handle non-numeric data in features
        self.handle_non_numeric_data(X)
        
        # Scale features if required
        if self.scale_data:
            scaler = StandardScaler()
            X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

        # Split into training and test sets
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        
        # Define input shape based on the training data
        self.input_shape = (self.X_train.shape[1],)

    def handle_non_numeric_data(self, X):
        """Convert datetime and categorical columns to numeric."""
        for column in X.columns:
            if X[column].dtype == 'object':
                if pd.to_datetime(X[column], errors='coerce').notnull().all():
                    X[column] = pd.to_datetime(X[column])
                    X[column + '_year'] = X[column].dt.year
                    X[column + '_month'] = X[column].dt.month
                    X[column + '_day'] = X[column].dt.day
                    X.drop(columns=[column], inplace=True)
                else:
                    X[column] = X[column].astype('category').cat.codes  # Convert categorical to numeric codes

    def build_mlp(self):
        model = Sequential([
            Dense(128, activation='relu', input_shape=self.input_shape),
            Dense(64, activation='relu'),
            Dense(self.y_train.shape[1], activation='softmax')
        ])
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    def build_cnn(self):
        model = Sequential([
            Input(shape=(self.input_shape[0], 1)),
            Conv1D(64, kernel_size=3, activation='relu'),
            Flatten(),
            Dense(self.y_train.shape[1], activation='softmax')
        ])
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    def build_rnn(self):
        model = Sequential([
            Input(shape=(self.input_shape[0], 1)),
            SimpleRNN(64, activation='relu'),
            Dense(self.y_train.shape[1], activation='softmax')
        ])
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    def build_lstm(self):
        model = Sequential([
            Input(shape=(self.input_shape[0], 1)),
            LSTM(64, activation='relu'),
            Dense(self.y_train.shape[1], activation='softmax')
        ])
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        return model

    def train_model(self, model, model_name):
        logging.info(f"Training model: {model_name}")

        # Reshape data for CNN/RNN/LSTM models
        if 'CNN' in model_name or 'RNN' in model_name or 'LSTM' in model_name:
            X_train_reshaped = np.expand_dims(self.X_train.values, axis=2)
            X_test_reshaped = np.expand_dims(self.X_test.values, axis=2)
        else:
            X_train_reshaped = self.X_train.values
            X_test_reshaped = self.X_test.values

        with mlflow.start_run(run_name=model_name):
            history = model.fit(X_train_reshaped, self.y_train, validation_data=(X_test_reshaped, self.y_test),
                                epochs=20, batch_size=32, verbose=1)

            mlflow.keras.log_model(model, model_name)
            mlflow.log_params({"optimizer": "adam", "loss": "categorical_crossentropy", "epochs": 10, "batch_size": 32})
            for epoch, accuracy in enumerate(history.history['accuracy']):
                mlflow.log_metric(f"train_accuracy_epoch_{epoch+1}", accuracy)
            for epoch, val_accuracy in enumerate(history.history['val_accuracy']):
                mlflow.log_metric(f"val_accuracy_epoch_{epoch+1}", val_accuracy)

        return model

    def evaluate_model(self, model, model_name):
        logging.info(f"Evaluating model: {model_name}")
        
        y_pred = np.argmax(model.predict(self.X_test.values), axis=1)
        y_true = np.argmax(self.y_test, axis=1)

        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred, average='weighted')
        recall = recall_score(y_true, y_pred, average='weighted')
        f1 = f1_score(y_true, y_pred, average='weighted')

        self.results[model_name] = {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1_score': f1
        }

        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("precision", precision)
        mlflow.log_metric("recall", recall)
        mlflow.log_metric("f1_score", f1)
        logging.info(f"Model evaluation completed for {model_name}")

    def explain_model(self, model, model_name):
        logging.info(f"Explaining model: {model_name}")
        
        # Configure SHAP kernel explainer for interpretability
        explainer = shap.KernelExplainer(model.predict, self.X_test.values[:100])
        shap_values = explainer.shap_values(self.X_test.values[:100])

        # Generate SHAP summary plot and save it as PNG
        plt.figure()
        shap.summary_plot(shap_values, self.X_test.values[:100], plot_type="bar", show=False)
        
        # Create directory if it doesn't exist
        if not os.path.exists("explainability_plots"):
            os.makedirs("explainability_plots")
        
        # Save the plot
        plot_path = f"explainability_plots/{model_name}_explainability.png"
        plt.savefig(plot_path)
        plt.close()  # Close plot to avoid displaying in the notebook directly
        
        logging.info(f"Explainability summary for {model_name} saved at {plot_path}")
    def show_results(self):
        """Displays the evaluation metrics for each model in a readable format."""
        if not self.results:
            logging.info("No results to display. Please run evaluate_model() first.")
            return

        print(f"{'Model Name':<10} {'Accuracy':<10} {'Precision':<10} {'Recall':<10} {'F1 Score':<10}")
        print("-" * 60)

        for model_name, metrics in self.results.items():
            print(f"{model_name:<10} {metrics['accuracy']:<10.4f} {metrics['precision']:<10.4f} "
                  f"{metrics['recall']:<10.4f} {metrics['f1_score']:<10.4f}")

        print("\n" + "-" * 60 + "\n")

In [4]:
# Step 1: Initialize the evaluator
evaluator = ModelEvaluator(data_path='/kaggle/input/fruaddfdata/balanced_fraud_df.csv', target_column='class')

# Step 2: Prepare the data
evaluator.prepare_data()


Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected,

In [5]:
# Step 3: Build and train models
mlp_model = evaluator.build_mlp()
cnn_model = evaluator.build_cnn()
rnn_model = evaluator.build_rnn()
lstm_model = evaluator.build_lstm()

Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.


In [6]:
evaluator.train_model(mlp_model, model_name='MLP')
evaluator.train_model(cnn_model, model_name='CNN')
evaluator.train_model(rnn_model, model_name='RNN')
evaluator.train_model(lstm_model, model_name='LSTM')

Epoch 1/20


I0000 00:00:1730210457.642578     140 service.cc:145] XLA service 0x58bf098b1bd0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730210457.642632     140 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0


[1m108/708[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m0s[0m 1ms/step - accuracy: 0.6550 - loss: 0.6187

I0000 00:00:1730210458.562862     140 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.7167 - loss: 0.5460 - val_accuracy: 0.7485 - val_loss: 0.4932
Epoch 2/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7586 - loss: 0.4804 - val_accuracy: 0.7533 - val_loss: 0.4843
Epoch 3/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7666 - loss: 0.4666 - val_accuracy: 0.7542 - val_loss: 0.4798
Epoch 4/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7709 - loss: 0.4583 - val_accuracy: 0.7584 - val_loss: 0.4765
Epoch 5/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7649 - loss: 0.4610 - val_accuracy: 0.7577 - val_loss: 0.4762
Epoch 6/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7717 - loss: 0.4524 - val_accuracy: 0.7564 - val_loss: 0.4817
Epoch 7/20
[1m708/708[0m [32m━━━━━━━



Epoch 1/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.7109 - loss: 0.5503 - val_accuracy: 0.7432 - val_loss: 0.5079
Epoch 2/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7497 - loss: 0.4957 - val_accuracy: 0.7446 - val_loss: 0.4961
Epoch 3/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7553 - loss: 0.4860 - val_accuracy: 0.7462 - val_loss: 0.4919
Epoch 4/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7631 - loss: 0.4733 - val_accuracy: 0.7538 - val_loss: 0.4903
Epoch 5/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7624 - loss: 0.4729 - val_accuracy: 0.7534 - val_loss: 0.4830
Epoch 6/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7640 - loss: 0.4698 - val_accuracy: 0.7570 - val_loss: 0.4814
Epoch 7/20
[1m708/708[0m 



Epoch 1/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - accuracy: 0.7439 - loss: 0.5155 - val_accuracy: 0.7534 - val_loss: 0.4879
Epoch 2/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7634 - loss: 0.4722 - val_accuracy: 0.7534 - val_loss: 0.4815
Epoch 3/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7662 - loss: 0.4675 - val_accuracy: 0.7563 - val_loss: 0.4823
Epoch 4/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7678 - loss: 0.4656 - val_accuracy: 0.7575 - val_loss: 0.4748
Epoch 5/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7620 - loss: 0.4720 - val_accuracy: 0.7584 - val_loss: 0.4748
Epoch 6/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7690 - loss: 0.4630 - val_accuracy: 0.7587 - val_loss: 0.4734
Epoch 7/20
[1m708/708[0m 



Epoch 1/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - accuracy: 0.7187 - loss: 0.5699 - val_accuracy: 0.7303 - val_loss: 0.5129
Epoch 2/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7533 - loss: 0.4890 - val_accuracy: 0.7490 - val_loss: 0.4892
Epoch 3/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7568 - loss: 0.4763 - val_accuracy: 0.7522 - val_loss: 0.4821
Epoch 4/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7664 - loss: 0.4706 - val_accuracy: 0.7561 - val_loss: 0.4786
Epoch 5/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7666 - loss: 0.4673 - val_accuracy: 0.7533 - val_loss: 0.4784
Epoch 6/20
[1m708/708[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.7612 - loss: 0.4711 - val_accuracy: 0.7563 - val_loss: 0.4752
Epoch 7/20
[1m708/708[0m 



<Sequential name=sequential_3, built=True>

In [7]:
# Step 4: Evaluate models
evaluator.evaluate_model(mlp_model, model_name='MLP')
evaluator.evaluate_model(cnn_model, model_name='CNN')
evaluator.evaluate_model(rnn_model, model_name='RNN')
evaluator.evaluate_model(lstm_model, model_name='LSTM')

[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


In [8]:
# Step 5: Show results
evaluator.show_results()

Model Name Accuracy   Precision  Recall     F1 Score  
------------------------------------------------------------
MLP        0.7398     0.7791     0.7398     0.7309    
CNN        0.7603     0.8346     0.7603     0.7468    
RNN        0.7598     0.8344     0.7598     0.7462    
LSTM       0.7580     0.8307     0.7580     0.7445    

------------------------------------------------------------



In [9]:
# Step 5: Explain the Model
evaluator.explain_model(trained_model, model_name='MLP_Model')

NameError: name 'trained_model' is not defined