<a href="https://colab.research.google.com/github/alfi-sh1143/git-remote-add-origin-https-github.com-alfi1143-traffic-deadlock-manual/blob/main/Copy_of_XAI_for_Credit_Card_Fraud_Detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install shap lime -q
!pip install xgboost -q

import pandas as pd
import numpy as np
import shap
import lime
import lime.lime_tabular
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import xgboost as xgb


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/275.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m266.2/275.7 kB[0m [31m8.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for lime (setup.py) ... [?25l[?25hdone


In [2]:
url = "/content/creditcard.csv"
df = pd.read_csv(url)

df.head()


FileNotFoundError: [Errno 2] No such file or directory: '/content/creditcard.csv'

In [None]:
# Drop rows with NaN values in the 'Class' column
df_cleaned = df.dropna(subset=['Class'])

X = df_cleaned.drop('Class', axis=1)
y = df_cleaned['Class']

# Train-test split with stratification due to imbalance
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

print("Data split successfully.")
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

In [None]:
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


In [None]:
explainer = shap.Explainer(model)
shap_values = explainer(X_test)

# Summary plot
shap.summary_plot(shap_values, X_test)


In [None]:
# Force plot for one prediction
shap.initjs()

# Generate SHAP Explanation object for a single instance
# Pass the single instance as a DataFrame with one row
single_instance_explanation = explainer(X_test.iloc[[0]])

# Pass the Explanation object for the single instance to shap.force_plot
shap.force_plot(single_instance_explanation)

In [None]:
explainer_lime = lime.lime_tabular.LimeTabularExplainer(X_train.values,
            feature_names=X_train.columns,
            class_names=["Not Fraud", "Fraud"],
            verbose=True, mode='classification')

i = 2  # Pick one test instance
exp = explainer_lime.explain_instance(X_test.values[i], model.predict_proba, num_features=10)
exp.show_in_notebook(show_table=True)


## Model Interpretation with LIME

LIME (Local Interpretable Model-agnostic Explanations) is a technique that explains the predictions of any black-box model by approximating it with a local interpretable model around the prediction.

Let's generate LIME explanations for a few instances from the test set to understand why the model made those specific predictions.

In [None]:
# Assuming model and X_test are available from previous steps

# Create a LIME explainer
# Need to provide training data (or a representative sample) and feature names
# The data needs to be a numpy array for LIME
explainer_lime = lime.lime_tabular.LimeTabularExplainer(
    training_data=X_train.values,
    feature_names=X_train.columns.tolist(),
    class_names=['Not Fraud', 'Fraud'],
    mode='classification'
)

# Select a few instances to explain (e.g., the first few from the test set)
# and potentially an instance predicted as fraud if available
instances_to_explain = X_test.head(2)

# Find an instance predicted as fraud in the test set, if any
fraud_indices = y_test[y_test == 1].index
if len(fraud_indices) > 0:
    # Get the actual instance from X_test using the index
    fraud_instance = X_test.loc[[fraud_indices[0]]]
    instances_to_explain = pd.concat([instances_to_explain, fraud_instance])
    print("Adding a fraud instance to explanations.")

# Ensure unique indices in case the head instances were also fraud
instances_to_explain = instances_to_explain[~instances_to_explain.index.duplicated(keep='first')]


print(f"\nGenerating LIME explanations for {len(instances_to_explain)} instances...")

# Generate and display LIME explanations
for index, instance in instances_to_explain.iterrows():
    print(f"\nExplaining instance with index: {index}")
    # LIME expects a single sample as a numpy array
    explanation = explainer_lime.explain_instance(
        data_row=instance.values,
        predict_fn=model.predict_proba, # Use predict_proba for classification
        num_features=len(X_train.columns) # Explain using all features
    )

    # Display the explanation in the notebook
    print(f"LIME Explanation for instance {index}:")
    explanation.as_html() # Display as HTML for better visualization

In [None]:
# Select a single instance (index 1 in this case)
sample = X_test.iloc[1]
pred = model.predict([sample])
print("Prediction:", "Fraud" if pred[0] else "Not Fraud")

# Generate SHAP Explanation object for the single instance
# Pass the single instance as a DataFrame with one row
single_instance_explanation = explainer(X_test.iloc[[1]])

# Pass the Explanation object for the single instance to shap.force_plot
shap.initjs() # Initialize JS visualization
shap.force_plot(single_instance_explanation)

In [None]:
print("Input Values:")
display(sample.to_frame().T)

print("Prediction:", "✅ Fraud" if pred[0] else "❌ Not Fraud")


In [None]:
# Safe loop with proper SHAP explanation object
for i in [1, 30, 150]:  # or any sample indexes
    sample = X_test.iloc[i]
    pred = model.predict([sample])[0]

    print(f"\n🔍 Sample {i} — Prediction: {'Fraud' if pred else 'Not Fraud'}")

    # Get SHAP explanation for this instance only
    sample_shap_value = shap.Explanation(
        values=shap_values[i].values,
        base_values=shap_values[i].base_values,
        data=sample,
        feature_names=X_test.columns
    )

    # Display force plot (no error!)
    shap.initjs()
    display(shap.force_plot(sample_shap_value.base_values, sample_shap_value.values, sample_shap_value.data))


In [None]:
# Find fraud indexes in test set
fraud_indices = y_test[y_test == 1].index.tolist()

print("Fraud sample indexes in test set:", fraud_indices)


In [None]:
# Find not-fraud indexes in test set
not_fraud_indices = y_test[y_test == 0].index.tolist()

print("Not Fraud sample indexes in test set:", not_fraud_indices)



In [None]:
# Step 1: Pick a fraud sample from X_test
# Find indices of fraud transactions in the test set
fraud_indices_in_test = y_test[y_test == 1].index.tolist()

if not fraud_indices_in_test:
    print("No fraud samples found in the test set (X_test). Cannot generate force plot for a fraud prediction.")
else:
    # Select the first fraud index found in the test set
    index_to_explain = fraud_indices_in_test[0]
    sample = X_test.loc[index_to_explain]

    # Step 2: Predict using the model
    pred = model.predict([sample])[0]
    print(f"Prediction for index {index_to_explain}:", "✅ Fraud" if pred else "❌ Not Fraud")

    # Step 3: SHAP explanation for this instance
    # Generate Explanation object for the single instance
    single_instance_explanation = explainer(X_test.loc[[index_to_explain]])

    # Step 4: Show force plot (explain why it predicted)
    shap.initjs()
    display(shap.force_plot(single_instance_explanation))

In [None]:
import numpy as np # Import numpy if not already imported

# Fraud sample (actual: 1, predicted: 0 ❌)
# We need to ensure this index is in X_test and is a fraud case.
# From previous runs, we know 77348 is in X_test and is fraud.
fraud_index = 102442

# Get the sample from X_test using the original index label
sample_fraud = X_test.loc[fraud_index]

# Predict using the model for verification
pred_fraud = model.predict([sample_fraud])[0]
print(f"🔴 Fraud Sample - Index {fraud_index}")
print("👉 Actual: Fraud (1), Predicted:", "✅ Fraud" if pred_fraud else "❌ Not Fraud")

# Get the positional index of the fraud sample within X_test
# This is needed to index the shap_values Explanation object correctly
try:
    positional_index = X_test.index.get_loc(fraud_index)
    print(f"Positional index of original index {fraud_index} in X_test: {positional_index}")
except KeyError:
    print(f"Error: Original index {fraud_index} not found in X_test. Please select a fraud index from X_test.")
    positional_index = None # Set to None if not found

if positional_index is not None:
    # SHAP Explanation
    # explainer is the SHAP explainer object from previous cells
    # shap_values is the Explanation object for the entire X_test from previous cells

    # Access the SHAP values, base values, and data for the specific instance
    # using the positional index
    ex_fraud = shap.Explanation(
        values=shap_values[positional_index].values,
        base_values=shap_values[positional_index].base_values,
        data=shap_values[positional_index].data,
        feature_names=X_test.columns.tolist() # Use feature names from X_test
    )

    # Display force plot
    shap.initjs()
    display(shap.force_plot(ex_fraud))

else:
    print("Cannot generate force plot because the fraud index was not found in X_test.")

In [None]:
# Not Fraud sample (actual: 0, predicted: 0 ✅)
not_fraud_index = y_test[y_test == 0].index[0]  # Pick first non-fraud index

# Get the sample from X_test using the original index label
sample_nf = X_test.loc[not_fraud_index]
pred_nf = model.predict([sample_nf])[0]
print(f"\n🟢 Not Fraud Sample - Index {not_fraud_index}")
print("👉 Actual: Not Fraud (0), Predicted:", "✅ Not Fraud" if pred_nf == 0 else "❌ Fraud")

# Get the positional index of the non-fraud sample within X_test
# This is needed to index the shap_values Explanation object correctly
try:
    positional_index_nf = X_test.index.get_loc(not_fraud_index)
    print(f"Positional index of original index {not_fraud_index} in X_test: {positional_index_nf}")
except KeyError:
    print(f"Error: Original index {not_fraud_index} not found in X_test.")
    positional_index_nf = None # Set to None if not found

if positional_index_nf is not None:
    # SHAP Explanation
    # explainer is the SHAP explainer object from previous cells
    # shap_values is the Explanation object for the entire X_test from previous cells

    # Access the SHAP values, base values, and data for the specific instance
    # using the positional index
    ex_nf = shap.Explanation(
        values=shap_values[positional_index_nf].values,
        base_values=shap_values[positional_index_nf].base_values,
        data=shap_values[positional_index_nf].data,
        feature_names=X_test.columns.tolist() # Use feature names from X_test
    )

    # Display force plot
    shap.initjs()
    display(shap.force_plot(ex_nf))

else:
     print("Cannot generate force plot because the non-fraud index was not found in X_test.")

In [None]:
shap.initjs()
print("\n🔍 Force Plot for FRAUD (Predicted as  Fraud ❌)")
display(shap.force_plot(ex_fraud.base_values, ex_fraud.values, ex_fraud.data))

print("\n🔍 Force Plot for NOT FRAUD (Predicted as Not Fraud ✅)")
display(shap.force_plot(ex_nf.base_values, ex_nf.values, ex_nf.data))


In [None]:
# Assuming explainer and X_test are available from previous steps
# Assuming shap_values is the Explanation object for the entire X_test

# Safe loop with proper SHAP explanation object
for i in [102442, 263020, 77348]:  # Corrected list of indices (using one confirmed fraud: 77348, and one confirmed not fraud: 263020, and another confirmed fraud: 102442)
    # Get the sample from X_test using the original index label
    # Use .loc for label-based indexing
    try:
        sample = X_test.loc[i]
        pred = model.predict([sample])[0]

        print(f"\n🔍 Sample {i} — Prediction: {'Fraud' if pred else 'Not Fraud'}")

        # Get the positional index of the sample within X_test
        # This is needed to index the shap_values Explanation object correctly
        positional_index = X_test.index.get_loc(i)

        # Get SHAP explanation for this instance using the positional index
        # Access the SHAP values, base values, and data for the specific instance
        ex_sample = shap.Explanation(
            values=shap_values[positional_index].values,
            base_values=shap_values[positional_index].base_values,
            data=shap_values[positional_index].data,
            feature_names=X_test.columns.tolist() # Use feature names from X_test
        )

        # Display force plot (no error!)
        shap.initjs()
        display(shap.force_plot(ex_sample))

    except KeyError:
        print(f"\n⚠️ Warning: Original index {i} not found in X_test. Skipping this instance.")
    except Exception as e:
        print(f"\n❌ Error processing index {i}: {e}")