In [None]:
# IMPORTING LIBRARIES
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import (
    precision_score, recall_score, f1_score, accuracy_score,
    roc_auc_score, roc_curve, confusion_matrix,
    classification_report, precision_recall_curve, auc
)
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline as ImbPipeline
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Set style for better visualizations
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

In [None]:
# LOADING DATASET
print("ETHEREUM FRAUD DETECTION - XGBOOST")

# Load the dataset
df = pd.read_csv("/content/transaction_dataset.csv")

print(f"Dataset Shape: {df.shape}")
print(f"   Rows: {df.shape[0]:,} | Columns: {df.shape[1]}")

ETHEREUM FRAUD DETECTION - XGBOOST
Dataset Shape: (9841, 51)
   Rows: 9,841 | Columns: 51


In [None]:
# EDA
print("EXPLORATORY DATA ANALYSIS")

# Display basic info
print("Dataset Info:")
print(df.info())

print("\nFirst Few Rows:")
print(df.head())

print("\nStatistical Summary:")
print(df.describe())

# Check for missing values
print("\nMissing Values:")
missing = df.isnull().sum()
missing_pct = (missing / len(df)) * 100
missing_df = pd.DataFrame({
    'Missing Count': missing,
    'Percentage': missing_pct
})
print(missing_df[missing_df['Missing Count'] > 0])

if missing.sum() == 0:
    print("No missing values found!")

# Check class distribution
fraud_col = None
for col in ['FLAG', 'Class', 'isFraud', 'Fraud', 'is_fraud']:
    if col in df.columns:
        fraud_col = col
        break

if fraud_col:
    print(f"\nClass Distribution ({fraud_col}):")
    class_dist = df[fraud_col].value_counts()
    print(class_dist)
    fraud_pct = (class_dist.get(1, 0) / len(df)) * 100
    clean_pct = (class_dist.get(0, 0) / len(df)) * 100
    print(f"Fraud Percentage: {fraud_pct:.2f}%")
    print(f"Clean Percentage: {clean_pct:.2f}%")

    imbalance_ratio = class_dist.get(0, 0) / max(class_dist.get(1, 1), 1)
    print(f"Imbalance Ratio: {imbalance_ratio:.2f}:1")

    y = df[fraud_col]
else:
    print("Warning: Could not find fraud label column!")
    print("Available columns:", df.columns.tolist())
    y = pd.Series([0] * len(df))

EXPLORATORY DATA ANALYSIS
Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9841 entries, 0 to 9840
Data columns (total 51 columns):
 #   Column                                                Non-Null Count  Dtype  
---  ------                                                --------------  -----  
 0   Unnamed: 0                                            9841 non-null   int64  
 1   Index                                                 9841 non-null   int64  
 2   Address                                               9841 non-null   object 
 3   FLAG                                                  9841 non-null   int64  
 4   Avg min between sent tnx                              9841 non-null   float64
 5   Avg min between received tnx                          9841 non-null   float64
 6   Time Diff between first and last (Mins)               9841 non-null   float64
 7   Sent tnx                                              9841 non-null   int64  
 8   Received Tnx      

In [None]:
# DATA PREPROCESSING
print("DATA PREPROCESSING")

# Separate features and target
X = df.drop(columns=[fraud_col] if fraud_col else [], errors='ignore')

# Identify numeric and categorical columns
numeric_cols = X.select_dtypes(include=[np.number]).columns.tolist()
categorical_cols = X.select_dtypes(include=['object', 'category']).columns.tolist()

print(f"Numeric Features ({len(numeric_cols)}): {numeric_cols[:5]}{'...' if len(numeric_cols) > 5 else ''}")
print(f"Categorical Features ({len(categorical_cols)}): {categorical_cols}")

# Handle categorical features
if categorical_cols:
    print("Encoding categorical features...")
    le = LabelEncoder()
    for col in categorical_cols:
        X[col] = le.fit_transform(X[col].astype(str))
    print("Categorical encoding complete!")

# Handle missing values
if X.isnull().sum().sum() > 0:
    print("Handling missing values...")
    X = X.fillna(X.median())
    print("Missing values filled with median!")

# Remove any non-numeric columns
X = X.select_dtypes(include=[np.number])

print(f"Final feature matrix shape: {X.shape}")

DATA PREPROCESSING
Numeric Features (47): ['Unnamed: 0', 'Index', 'Avg min between sent tnx', 'Avg min between received tnx', 'Time Diff between first and last (Mins)']...
Categorical Features (3): ['Address', ' ERC20 most sent token type', ' ERC20_most_rec_token_type']
Encoding categorical features...
Categorical encoding complete!
Handling missing values...
Missing values filled with median!
Final feature matrix shape: (9841, 50)


In [None]:
# VISUALISATION - FEATURE DISTRIBUTIONS
print("GENERATING VISUALIZATIONS")

# Plot distributions of key features
n_features_to_plot = min(6, len(X.columns))
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
fig.suptitle('Feature Distributions', fontsize=16, fontweight='bold')

for idx, col in enumerate(X.columns[:n_features_to_plot]):
    ax = axes[idx // 3, idx % 3]
    ax.hist(X[col], bins=50, edgecolor='black', alpha=0.7, color='steelblue')
    ax.set_title(f'{col}', fontweight='bold')
    ax.set_xlabel('Value')
    ax.set_ylabel('Frequency')

plt.tight_layout()
plt.savefig('feature_distributions.png', dpi=300, bbox_inches='tight')
print("Saved: feature_distributions.png")
plt.close()

# Correlation heatmap
print("Generating correlation heatmap...")
plt.figure(figsize=(14, 12))
correlation_matrix = X.corr()

# For large datasets, show only top correlations
if len(X.columns) > 20:
    top_features = X.var().nlargest(20).index
    correlation_matrix = X[top_features].corr()

sns.heatmap(correlation_matrix, annot=False, cmap='coolwarm',
            center=0, square=True, linewidths=0.5, cbar_kws={'label': 'Correlation'})
plt.title('Feature Correlation Heatmap', fontsize=16, fontweight='bold', pad=20)
plt.tight_layout()
plt.savefig('correlation_heatmap.png', dpi=300, bbox_inches='tight')
print("Saved: correlation_heatmap.png")
plt.close()

# Class distribution visualization
if fraud_col:
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    # Bar plot
    class_counts = y.value_counts()
    axes[0].bar(['Clean', 'Fraud'], class_counts.values, color=['green', 'red'], alpha=0.7)
    axes[0].set_ylabel('Count', fontweight='bold')
    axes[0].set_title('Class Distribution', fontweight='bold')
    axes[0].grid(axis='y', alpha=0.3)

    # Pie chart
    axes[1].pie(class_counts.values, labels=['Clean', 'Fraud'], autopct='%1.2f%%',
                colors=['green', 'red'], startangle=90)
    axes[1].set_title('Class Proportion', fontweight='bold')

    plt.tight_layout()
    plt.savefig('class_distribution.png', dpi=300, bbox_inches='tight')
    print("✓ Saved: class_distribution.png")
    plt.close()


GENERATING VISUALIZATIONS
✓ Saved: feature_distributions.png
Generating correlation heatmap...
✓ Saved: correlation_heatmap.png
✓ Saved: class_distribution.png


In [None]:
# TRAIN/TEST SPLIT
print("\n" + "=" * 80)
print("TRAIN/TEST SPLIT")
print("=" * 80)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y if fraud_col else None
)

print(f"Training set: {X_train.shape[0]:,} samples")
print(f"   Clean: {(y_train == 0).sum():,} | Fraud: {(y_train == 1).sum():,}")
print(f"Test set: {X_test.shape[0]:,} samples")
print(f"   Clean: {(y_test == 0).sum():,} | Fraud: {(y_test == 1).sum():,}")

# Normalize features
print("\nNormalizing features with StandardScaler...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
print("Feature scaling complete!")


TRAIN/TEST SPLIT
Training set: 6,888 samples
   Clean: 5,363 | Fraud: 1,525
Test set: 2,953 samples
   Clean: 2,299 | Fraud: 654

Normalizing features with StandardScaler...
Feature scaling complete!


In [None]:
# HANDLE CLASS IMBALANCE
print("HANDLING CLASS IMBALANCE")

if fraud_col and y_train.sum() > 0:
    imbalance_ratio = (y_train == 0).sum() / (y_train == 1).sum()

    if imbalance_ratio > 5:
        print(f"Severe class imbalance detected (ratio: {imbalance_ratio:.2f}:1)")
        print("Applying SMOTE + Random Under-sampling...")

        # Calculate sampling strategy
        n_fraud = (y_train == 1).sum()
        n_clean = (y_train == 0).sum()

        # SMOTE to oversample minority class
        over_sample_ratio = min(0.5, n_fraud / n_clean * 3)
        # Under-sample majority class
        under_sample_ratio = min(0.8, n_fraud * 2 / n_clean)

        over = SMOTE(sampling_strategy=over_sample_ratio, random_state=42)
        under = RandomUnderSampler(sampling_strategy=under_sample_ratio, random_state=42)

        steps = [('over', over), ('under', under)]
        pipeline = ImbPipeline(steps=steps)

        X_train_resampled, y_train_resampled = pipeline.fit_resample(X_train_scaled, y_train)

        print(f"Resampling complete!")
        print(f"   Before: Clean={n_clean:,}, Fraud={n_fraud:,}")
        print(f"   After: Clean={(y_train_resampled == 0).sum():,}, Fraud={(y_train_resampled == 1).sum():,}")

        X_train_final = X_train_resampled
        y_train_final = y_train_resampled
    else:
        print(f"Class imbalance is manageable (ratio: {imbalance_ratio:.2f}:1)")
        print("Using scale_pos_weight parameter in XGBoost")
        X_train_final = X_train_scaled
        y_train_final = y_train
else:
    X_train_final = X_train_scaled
    y_train_final = y_train

HANDLING CLASS IMBALANCE
Class imbalance is manageable (ratio: 3.52:1)
Using scale_pos_weight parameter in XGBoost


In [None]:
# MODEL TRAINING - XGBOOST
print("MODEL TRAINING - XGBOOST")

# Calculate scale_pos_weight for imbalanced data
if fraud_col and y_train_final.sum() > 0:
    scale_pos_weight = (y_train_final == 0).sum() / (y_train_final == 1).sum()
else:
    scale_pos_weight = 1

print(f"Scale Pos Weight: {scale_pos_weight:.2f}")

# Define parameter grid for hyperparameter tuning
print("\nPerforming hyperparameter tuning with GridSearchCV...")

param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.3],
    'n_estimators': [100, 200, 300],
    'min_child_weight': [1, 3, 5],
    'gamma': [0, 0.1, 0.2],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0]
}

# Initial model
xgb_model = xgb.XGBClassifier(
    objective='binary:logistic',
    scale_pos_weight=scale_pos_weight,
    random_state=42,
    eval_metric='logloss',
    use_label_encoder=False
)

# GridSearchCV with cross-validation
grid_search = GridSearchCV(
    estimator=xgb_model,
    param_grid=param_grid,
    scoring='f1',
    cv=3,
    verbose=1,
    n_jobs=-1
)

print("Training XGBoost with cross-validation...")
print("This may take a few minutes...")

grid_search.fit(X_train_final, y_train_final)

# Best model
best_model = grid_search.best_estimator_

print("\nTraining complete!")
print(f"Best parameters found:")
for param, value in grid_search.best_params_.items():
    print(f"   {param}: {value}")
print(f"Best cross-validation F1-score: {grid_search.best_score_:.4f}")

# Train final model with best parameters
print("\nTraining final model with best parameters...")
final_model = xgb.XGBClassifier(
    **grid_search.best_params_,
    objective='binary:logistic',
    scale_pos_weight=scale_pos_weight,
    random_state=42,
    eval_metric='logloss',
    use_label_encoder=False
)

final_model.fit(X_train_final, y_train_final)
print("Final model training complete!")

MODEL TRAINING - XGBOOST
Scale Pos Weight: 3.52

Performing hyperparameter tuning with GridSearchCV...
Training XGBoost with cross-validation...
This may take a few minutes...
Fitting 3 folds for each of 2187 candidates, totalling 6561 fits

Training complete!
Best parameters found:
   colsample_bytree: 0.8
   gamma: 0
   learning_rate: 0.01
   max_depth: 3
   min_child_weight: 1
   n_estimators: 100
   subsample: 0.8
Best cross-validation F1-score: 1.0000

Training final model with best parameters...
Final model training complete!


In [None]:
# PREDICTIONS AND SCORING
print("PREDICTIONS AND SCORING")

# Get predictions
y_pred = final_model.predict(X_test_scaled)
y_pred_proba = final_model.predict_proba(X_test_scaled)[:, 1]

# Create fraud risk rating (1-10 scale)
# Higher probability of fraud = lower rating (more risky)
fraud_risk_rating = (1 - y_pred_proba) * 9 + 1
fraud_risk_rating = np.round(fraud_risk_rating, 1)

print(f"Prediction Statistics:")
print(f"   Predicted Fraud: {y_pred.sum():,} ({y_pred.sum() / len(y_pred) * 100:.2f}%)")
print(f"   Predicted Clean: {(y_pred == 0).sum():,} ({(y_pred == 0).sum() / len(y_pred) * 100:.2f}%)")

print(f"\nFraud Probability Statistics:")
print(f"   Min: {y_pred_proba.min():.4f}")
print(f"   Max: {y_pred_proba.max():.4f}")
print(f"   Mean: {y_pred_proba.mean():.4f}")
print(f"   Median: {np.median(y_pred_proba):.4f}")

print(f"\nFraud Risk Rating (1-10) Statistics:")
print(f"   Min: {fraud_risk_rating.min():.1f}")
print(f"   Max: {fraud_risk_rating.max():.1f}")
print(f"   Mean: {fraud_risk_rating.mean():.1f}")
print(f"   Median: {np.median(fraud_risk_rating):.1f}")

PREDICTIONS AND SCORING
Prediction Statistics:
   Predicted Fraud: 654 (22.15%)
   Predicted Clean: 2,299 (77.85%)

Fraud Probability Statistics:
   Min: 0.1830
   Max: 0.8141
   Mean: 0.3273
   Median: 0.1915

Fraud Risk Rating (1-10) Statistics:
   Min: 2.7
   Max: 8.4
   Mean: 7.1
   Median: 8.3


In [None]:
# MODEL EVALUATION
print("MODEL EVALUATION")

if y_test.sum() > 0:
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=0)
    recall = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)
    roc_auc = roc_auc_score(y_test, y_pred_proba)

    print(f"Performance Metrics:")
    print(f"   Accuracy:  {accuracy:.4f}")
    print(f"   Precision: {precision:.4f}")
    print(f"   Recall:    {recall:.4f}")
    print(f"   F1-Score:  {f1:.4f}")
    print(f"   ROC-AUC:   {roc_auc:.4f}")

    print("\nConfusion Matrix:")
    cm = confusion_matrix(y_test, y_pred)
    print(f"                 Predicted")
    print(f"              Clean  Fraud")
    print(f"Actual Clean  {cm[0,0]:5d}  {cm[0,1]:5d}")
    print(f"       Fraud  {cm[1,0]:5d}  {cm[1,1]:5d}")

    print("\nClassification Report:")
    print(classification_report(y_test, y_pred,
                                target_names=['Clean', 'Fraud'],
                                zero_division=0))
else:
    print("No fraud samples in test set - skipping evaluation")
    roc_auc = None

MODEL EVALUATION
Performance Metrics:
   Accuracy:  1.0000
   Precision: 1.0000
   Recall:    1.0000
   F1-Score:  1.0000
   ROC-AUC:   1.0000

Confusion Matrix:
                 Predicted
              Clean  Fraud
Actual Clean   2299      0
       Fraud      0    654

Classification Report:
              precision    recall  f1-score   support

       Clean       1.00      1.00      1.00      2299
       Fraud       1.00      1.00      1.00       654

    accuracy                           1.00      2953
   macro avg       1.00      1.00      1.00      2953
weighted avg       1.00      1.00      1.00      2953



In [None]:
# FEATURE IMPORTANCE
print("FEATURE IMPORTANCE ANALYSIS")

feature_importance = pd.DataFrame({
    'Feature': X.columns,
    'Importance': final_model.feature_importances_
}).sort_values('Importance', ascending=False)

print("Top 10 Most Important Features:")
print(feature_importance.head(10).to_string(index=False))

# Plot feature importance
plt.figure(figsize=(12, 8))
top_features = feature_importance.head(20)
plt.barh(range(len(top_features)), top_features['Importance'], color='steelblue')
plt.yticks(range(len(top_features)), top_features['Feature'])
plt.xlabel('Importance Score', fontweight='bold', fontsize=12)
plt.ylabel('Features', fontweight='bold', fontsize=12)
plt.title('Top 20 Feature Importances - XGBoost', fontweight='bold', fontsize=14)
plt.gca().invert_yaxis()
plt.grid(axis='x', alpha=0.3)
plt.tight_layout()
plt.savefig('feature_importance.png', dpi=300, bbox_inches='tight')
print("\nSaved: feature_importance.png")
plt.close()

# VISUALIZATIONS - RESULTS
print("GENERATING RESULT VISUALIZATIONS")

# Confusion Matrix Heatmap
if y_test.sum() > 0:
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=True,
                xticklabels=['Clean', 'Fraud'],
                yticklabels=['Clean', 'Fraud'])
    plt.title('Confusion Matrix', fontweight='bold', fontsize=14)
    plt.ylabel('Actual Label', fontweight='bold', fontsize=12)
    plt.xlabel('Predicted Label', fontweight='bold', fontsize=12)
    plt.tight_layout()
    plt.savefig('confusion_matrix.png', dpi=300, bbox_inches='tight')
    print("Saved: confusion_matrix.png")
    plt.close()

# Fraud Probability Distribution
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

axes[0].hist(y_pred_proba[y_test == 0], bins=50, alpha=0.7, label='Clean', color='green')
axes[0].hist(y_pred_proba[y_test == 1], bins=50, alpha=0.7, label='Fraud', color='red')
axes[0].set_xlabel('Fraud Probability', fontweight='bold')
axes[0].set_ylabel('Frequency', fontweight='bold')
axes[0].set_title('Fraud Probability Distribution', fontweight='bold')
axes[0].legend()
axes[0].grid(alpha=0.3)

axes[1].hist(fraud_risk_rating[y_test == 0], bins=50, alpha=0.7, label='Clean', color='green')
axes[1].hist(fraud_risk_rating[y_test == 1], bins=50, alpha=0.7, label='Fraud', color='red')
axes[1].set_xlabel('Fraud Risk Rating (1-10)', fontweight='bold')
axes[1].set_ylabel('Frequency', fontweight='bold')
axes[1].set_title('Fraud Risk Rating Distribution', fontweight='bold')
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.savefig('fraud_probability_distribution.png', dpi=300, bbox_inches='tight')
print("✓ Saved: fraud_probability_distribution.png")
plt.close()

# ROC Curve
if roc_auc is not None:
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)

    plt.figure(figsize=(10, 8))
    plt.plot(fpr, tpr, color='darkorange', lw=2,
             label=f'XGBoost (AUC = {roc_auc:.4f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--',
             label='Random Classifier (AUC = 0.50)')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate', fontweight='bold', fontsize=12)
    plt.ylabel('True Positive Rate', fontweight='bold', fontsize=12)
    plt.title('ROC Curve - XGBoost Fraud Detection', fontweight='bold', fontsize=14)
    plt.legend(loc="lower right", fontsize=12)
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.savefig('roc_curve.png', dpi=300, bbox_inches='tight')
    print("Saved: roc_curve.png")
    plt.close()

# Precision-Recall Curve
if y_test.sum() > 0:
    precision_vals, recall_vals, _ = precision_recall_curve(y_test, y_pred_proba)
    pr_auc = auc(recall_vals, precision_vals)

    plt.figure(figsize=(10, 8))
    plt.plot(recall_vals, precision_vals, color='darkorange', lw=2,
             label=f'XGBoost (AUC = {pr_auc:.4f})')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall', fontweight='bold', fontsize=12)
    plt.ylabel('Precision', fontweight='bold', fontsize=12)
    plt.title('Precision-Recall Curve - XGBoost', fontweight='bold', fontsize=14)
    plt.legend(loc="lower left", fontsize=12)
    plt.grid(alpha=0.3)
    plt.tight_layout()
    plt.savefig('precision_recall_curve.png', dpi=300, bbox_inches='tight')
    print("Saved: precision_recall_curve.png")
    plt.close()

FEATURE IMPORTANCE ANALYSIS
Top 10 Most Important Features:
                                Feature  Importance
                             Unnamed: 0    0.452673
                       Total ERC20 tnxs    0.141565
              ERC20_most_rec_token_type    0.130724
Time Diff between first and last (Mins)    0.095096
                      ERC20 max val rec    0.056555
             ERC20 most sent token type    0.041905
                    total ether balance    0.020444
           Avg min between received tnx    0.016405
                           min val sent    0.014043
                     ERC20 avg val sent    0.010523

Saved: feature_importance.png
GENERATING RESULT VISUALIZATIONS
Saved: confusion_matrix.png
✓ Saved: fraud_probability_distribution.png
✓ Saved: roc_curve.png
✓ Saved: precision_recall_curve.png


In [None]:
# OUTPUT RESULTS
print("FINAL RESULTS OUTPUT")

# Create results DataFrame
results_df = pd.DataFrame({
    'Transaction_Index': X_test.index,
    'Fraud_Probability': y_pred_proba,
    'Prediction': ['Fraud' if p == 1 else 'Clean' for p in y_pred],
    'Fraud_Risk_Rating_1_10': fraud_risk_rating,
    'Actual_Label': ['Fraud' if y == 1 else 'Clean' for y in y_test],
    'Correct_Prediction': y_pred == y_test
})

# Sort by fraud risk (highest probability = highest risk)
results_df = results_df.sort_values('Fraud_Probability', ascending=False)

print("\nSample Results (Top 10 Highest Risk):")
print(results_df.head(10).to_string(index=False))

print("\nSample Results (Top 10 Lowest Risk):")
print(results_df.tail(10).to_string(index=False))

# Save results
results_df.to_csv('fraud_detection_results_xgboost.csv', index=False)
print("\nFull results saved to: fraud_detection_results_xgboost.csv")

# Save feature importance
feature_importance.to_csv('feature_importance_xgboost.csv', index=False)
print("Feature importance saved to: feature_importance_xgboost.csv")

# SUMMARY
print("SUMMARY")
print(f"Total transactions analyzed: {len(results_df):,}")
print(f"Flagged as fraud: {(y_pred == 1).sum():,} ({(y_pred == 1).sum() / len(y_pred) * 100:.2f}%)")
print(f"Flagged as clean: {(y_pred == 0).sum():,} ({(y_pred == 0).sum() / len(y_pred) * 100:.2f}%)")

if y_test.sum() > 0:
    print(f"\nModel Performance Summary:")
    print(f"   • Accuracy:  {accuracy:.2%} (overall correctness)")
    print(f"   • Precision: {precision:.2%} (of flagged frauds, how many were correct)")
    print(f"   • Recall:    {recall:.2%} (of actual frauds, how many were caught)")
    print(f"   • F1-Score:  {f1:.4f} (harmonic mean of precision and recall)")
    print(f"   • ROC-AUC:   {roc_auc:.4f} (overall discriminative ability)")

    # Calculate additional metrics
    tn, fp, fn, tp = cm.ravel()
    specificity = tn / (tn + fp)
    print(f"   • Specificity: {specificity:.2%} (true negative rate)")

    if tp + fn > 0:
        fraud_detection_rate = tp / (tp + fn)
        print(f"   • Fraud Detection Rate: {fraud_detection_rate:.2%}")


print("FRAUD DETECTION ANALYSIS COMPLETE!")
print("Generated Files:")
print("   • feature_distributions.png")
print("   • correlation_heatmap.png")
print("   • class_distribution.png")
print("   • feature_importance.png")
print("   • confusion_matrix.png")
print("   • fraud_probability_distribution.png")
print("   • roc_curve.png")
print("   • precision_recall_curve.png")
print("   • fraud_detection_results_xgboost.csv")
print("   • feature_importance_xgboost.csv")

FINAL RESULTS OUTPUT

Sample Results (Top 10 Highest Risk):
 Transaction_Index  Fraud_Probability Prediction  Fraud_Risk_Rating_1_10 Actual_Label  Correct_Prediction
              9665            0.81414      Fraud                     2.7        Fraud                True
              9614            0.81414      Fraud                     2.7        Fraud                True
              9305            0.81414      Fraud                     2.7        Fraud                True
              9015            0.81414      Fraud                     2.7        Fraud                True
              8389            0.81414      Fraud                     2.7        Fraud                True
              9704            0.81414      Fraud                     2.7        Fraud                True
              8679            0.81414      Fraud                     2.7        Fraud                True
              9742            0.81414      Fraud                     2.7        Fraud       