In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)

In [4]:
df = pd.read_csv("mock_ecommerce_returns_v2.csv")

In [5]:
def estimate_refurb_cost(row):
    base = {'Electronics': 0.18, 'Apparel': 0.07, 'Furniture': 0.12, 
            'Books': 0.03, 'Appliances': 0.15, 'Toys': 0.08, 'Accessories': 0.1, 'Home Decor': 0.09}
    cond = {'Defective': 1.3, 'Damaged': 1.2, 'Size issue': 0.5, 
            'Color issue': 0.4, 'Customer error': 0.8, 'Wrong item': 0.7}
    rate = base.get(row['category'], 0.1) * cond.get(row['return_reason'], 1)
    return round(row['original_price'] * rate, 2)

In [6]:
def estimate_shipping_cost(row):
    rate = 0.35  # ₹ per km·kg
    return round(row['weight_kg'] * row['return_distance_km'] * rate, 2)


In [7]:
def estimate_recycle_value(row):
    rec = {'Electronics': (0.15, 60), 'Furniture': (0.10, 25), 
           'Books': (0.9, 5), 'Apparel': (0.3, 8), 'Appliances': (0.12, 50),
           'Toys': (0.2, 10), 'Accessories': (0.25, 15), 'Home Decor': (0.25, 12)}
    perc, val = rec.get(row['category'], (0.1, 10))
    return round(row['weight_kg'] * perc * val, 2)


In [8]:
def estimate_donation_value(row):
    donate_rate = {'Apparel': 0.4, 'Books': 0.5, 'Furniture': 0.3, 
                   'Electronics': 0.2, 'Appliances': 0.25, 'Toys': 0.35, 
                   'Accessories': 0.3, 'Home Decor': 0.3}
    rate = donate_rate.get(row['category'], 0.3)
    return round(row['resale_price'] * rate, 2)


In [9]:
df['refurb_cost'] = df.apply(lambda x: estimate_refurb_cost(x) if x['refurb_cost'] == 0 else x['refurb_cost'], axis=1)
df['return_shipping_cost'] = df.apply(lambda x: estimate_shipping_cost(x) if x['return_shipping_cost'] == 0 else x['return_shipping_cost'], axis=1)
df['recycle_value'] = df.apply(lambda x: estimate_recycle_value(x) if x['recycle_value'] == 0 else x['recycle_value'], axis=1)
df['donation_value'] = df.apply(lambda x: estimate_donation_value(x) if x['donation_value'] == 0 else x['donation_value'], axis=1)



In [10]:
def best_option(row):
    profit_resell = row['resale_price'] - (row['refurb_cost'] + row['return_shipping_cost'] + row['inspection_cost'])
    
    if profit_resell > 0:
        return 'Resell'
    elif row['resale_price'] > row['refurb_cost']:
        return 'Refurbish'
    elif row['donation_value'] > row['recycle_value']:
        return 'Donate'
    else:
        return 'Refund'


In [11]:
df['best_option'] = df.apply(best_option, axis=1)

In [12]:
features = [
    'original_price', 'resale_price', 'refurb_cost',
    'return_shipping_cost', 'inspection_cost', 'handling_cost',
    'recycle_value', 'donation_value', 'weight_kg', 'return_distance_km'
]

X = df[features]
y = df['best_option']


In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
model = DecisionTreeClassifier(max_depth=6, random_state=42)
model.fit(X_train, y_train)

In [15]:
y_pred = model.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))



Classification Report:
               precision    recall  f1-score   support

      Donate       0.80      1.00      0.89         8
      Refund       1.00      0.92      0.96        26
   Refurbish       0.00      0.00      0.00         2
      Resell       0.97      1.00      0.98        64

    accuracy                           0.96       100
   macro avg       0.69      0.73      0.71       100
weighted avg       0.94      0.96      0.95       100


Confusion Matrix:
 [[ 8  0  0  0]
 [ 2 24  0  0]
 [ 0  0  0  2]
 [ 0  0  0 64]]


In [16]:
def compute_trv(row, decision):
    """Total Recovered Value based on chosen action."""
    if decision == 'Resell':
        return row['resale_price'] - (row['refurb_cost'] + row['return_shipping_cost'] + row['inspection_cost'])
    elif decision == 'Refurbish':
        return row['resale_price'] - row['refurb_cost']
    elif decision == 'Donate':
        return row['donation_value']
    elif decision == 'Refund':
        return -row['return_shipping_cost']  # loss
    else:
        return 0


In [17]:
def estimate_co2_saved(row, decision):
    """Estimate CO2 saved in kg based on avoided shipping & landfill impact."""
    baseline_co2 = row['weight_kg'] * row['return_distance_km'] * 0.005  # baseline emission if shipped back
    # Action-based emission reduction
    if decision == 'Resell':
        saved = baseline_co2 * 0.3
    elif decision == 'Refurbish':
        saved = baseline_co2 * 0.5
    elif decision == 'Donate':
        saved = baseline_co2 * 0.7
    elif decision == 'Refund':
        saved = baseline_co2 * 0.9  # returnless refund
    else:
        saved = 0
    return round(saved, 3)


In [18]:
sample = {
    'original_price': 2500,
    'resale_price': 1200,
    'refurb_cost': 0,
    'return_shipping_cost': 0,
    'inspection_cost': 50,
    'handling_cost': 30,
    'recycle_value': 0,
    'donation_value': 0,
    'weight_kg': 2.0,
    'return_distance_km': 600,
    'category': 'Electronics',
    'return_reason': 'Damaged'
}


In [19]:
sample_df = pd.DataFrame([sample])
for col, func in {
    'refurb_cost': estimate_refurb_cost,
    'return_shipping_cost': estimate_shipping_cost,
    'recycle_value': estimate_recycle_value,
    'donation_value': estimate_donation_value
}.items():
    sample_df[col] = sample_df.apply(func, axis=1)


In [20]:
X_sample = sample_df[features]
predicted_option = model.predict(X_sample)[0]

In [21]:
sample_df['TRV'] = sample_df.apply(lambda x: compute_trv(x, predicted_option), axis=1)
sample_df['CO2_saved_kg'] = sample_df.apply(lambda x: estimate_co2_saved(x, predicted_option), axis=1)

In [22]:
print("\nPredicted Best Option:", predicted_option)
print("\n Estimated Sustainability Metrics:")
print(sample_df[['refurb_cost', 'return_shipping_cost', 'recycle_value', 
                 'donation_value', 'TRV', 'CO2_saved_kg']])


Predicted Best Option: Resell

 Estimated Sustainability Metrics:
   refurb_cost  return_shipping_cost  recycle_value  donation_value    TRV  \
0        540.0                 420.0           18.0           240.0  190.0   

   CO2_saved_kg  
0           1.8  
