# Risk and Return Prediction for E-commerce Products

This notebook provides an interactive analysis of the risk and return prediction models.


In [None]:
import sys
import os
sys.path.append(os.path.join(os.path.dirname(os.getcwd())))

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.kaggle_data_loader import load_kaggle_dataset
from src.preprocessing import preprocess_data
from src.models import train_multiple_models, RiskReturnPredictor
from src.evaluation import (plot_confusion_matrix, plot_roc_curve, 
                           plot_feature_importance, plot_model_comparison,
                           print_evaluation_summary)

# Set style
plt.style.use('default')
sns.set_palette("husl")


## 1. Load or Generate Dataset


In [None]:
# Load Kaggle dataset
kaggle_path = '../data/raw/ecommerce_returns_kaggle.csv'
df = load_kaggle_dataset(kaggle_path)
print(f"Dataset shape: {df.shape}")
print(f"Return rate: {df['returned'].mean():.2%}")
df.head()


## 2. Data Exploration


In [None]:
# Explore the dataset
df.describe()


In [None]:
# Return rate by category
return_by_category = df.groupby('product_category')['returned'].mean().sort_values(ascending=False)
plt.figure(figsize=(10, 6))
plt.barh(return_by_category.index, return_by_category.values)
plt.xlabel('Return Rate')
plt.title('Return Rate by Product Category')
plt.grid(True, alpha=0.3, axis='x')
plt.tight_layout()
plt.show()


## 3. Preprocess Data


In [None]:
# Preprocess for return prediction
X_train, X_test, y_train, y_test, feature_names, scaler, label_encoders = \
    preprocess_data(df, target='returned', test_size=0.2, random_state=42)

print(f"Training set: {len(X_train)} samples")
print(f"Test set: {len(X_test)} samples")
print(f"Features: {len(feature_names)}")


## 4. Train Models


In [None]:
# Train multiple models
results = train_multiple_models(X_train, X_test, y_train, y_test, feature_names)


## 5. Model Evaluation


In [None]:
# Print evaluation summary
print_evaluation_summary(results)


In [None]:
# Model comparison
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
plot_model_comparison(results, metric='accuracy')
plt.show()


In [None]:
# Detailed analysis for best model
best_model_name = max(results.keys(), key=lambda x: results[x]['metrics']['f1_score'])
best_result = results[best_model_name]

fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Confusion matrix
plot_confusion_matrix(y_test, best_result['y_pred'], best_model_name, ax=axes[0])

# ROC curve
plot_roc_curve(y_test, best_result['y_pred_proba'], best_model_name, ax=axes[1])

# Feature importance
if best_result['feature_importance']:
    plot_feature_importance(best_result['feature_importance'], best_model_name, 
                          top_n=15, ax=axes[2])

plt.tight_layout()
plt.show()


## 6. Risk Prediction


In [None]:
# Preprocess for risk prediction
X_train_risk, X_test_risk, y_train_risk, y_test_risk, feature_names_risk, scaler_risk, label_encoders_risk = \
    preprocess_data(df, target='risk_level', test_size=0.2, random_state=42)

# Train risk prediction model
risk_predictor = RiskReturnPredictor(model_type='xgboost', random_state=42)
risk_predictor.train(X_train_risk, y_train_risk)
risk_metrics, y_pred_risk, y_pred_proba_risk = risk_predictor.evaluate(X_test_risk, y_test_risk)

print("Risk Prediction Results:")
print(f"  Accuracy:  {risk_metrics['accuracy']:.4f}")
print(f"  Precision: {risk_metrics['precision']:.4f}")
print(f"  Recall:    {risk_metrics['recall']:.4f}")
print(f"  F1-Score:  {risk_metrics['f1_score']:.4f}")
print(f"  ROC-AUC:   {risk_metrics['roc_auc']:.4f}")


## 7. Make Predictions on New Data


In [None]:
# Example: Predict on new samples
sample_indices = np.random.choice(len(X_test), 10, replace=False)

for idx in sample_indices:
    return_prob = results[best_model_name]['predictor'].predict_proba(X_test.iloc[[idx]])[0, 1]
    risk_prob = risk_predictor.predict_proba(X_test_risk.iloc[[idx]])[0, 1]
    
    print(f"Sample {idx}: Return Probability = {return_prob:.2%}, Risk Probability = {risk_prob:.2%}")
