In [1]:
"""
Baseline Model - Simple Logistic Regression
Use this as your starting point
"""

import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load data
train = pd.read_csv('ecommerce_returns_train.csv')
test = pd.read_csv('ecommerce_returns_test.csv')

def preprocess(df):
    """Simple preprocessing pipeline"""
    df_processed = df.copy()

    # Encode categorical: product_category
    le_category = LabelEncoder()
    df_processed['product_category_encoded'] = le_category.fit_transform(
        df_processed['product_category']
    )

    # Handle missing sizes (Fashion items only have sizes)
    if df_processed['size_purchased'].notna().any():
        most_common_size = df_processed['size_purchased'].mode()[0]
        df_processed['size_purchased'].fillna(most_common_size, inplace=True)

        le_size = LabelEncoder()
        df_processed['size_encoded'] = le_size.fit_transform(
            df_processed['size_purchased']
        )

    # Feature selection
    feature_cols = [
        'customer_age', 'customer_tenure_days', 'product_category_encoded',
        'product_price', 'days_since_last_purchase', 'previous_returns',
        'product_rating', 'size_encoded', 'discount_applied'
    ]

    X = df_processed[feature_cols]
    y = df_processed['is_return']

    return X, y

# Prepare data
X_train, y_train = preprocess(train)
X_test, y_test = preprocess(test)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train baseline model
baseline_model = LogisticRegression(random_state=42, max_iter=1000)
baseline_model.fit(X_train_scaled, y_train)

# Predictions
y_pred = baseline_model.predict(X_test_scaled)

# Basic evaluation
print("Baseline Model Performance")
print("=" * 50)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Save artifacts
joblib.dump(baseline_model, 'baseline_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

print("\n" + "=" * 50)
print("YOUR TASK: Evaluate thoroughly and improve this baseline")
print("=" * 50)

Baseline Model Performance
Accuracy: 0.7475

Classification Report:
              precision    recall  f1-score   support

           0       0.75      1.00      0.86      1495
           1       0.00      0.00      0.00       505

    accuracy                           0.75      2000
   macro avg       0.37      0.50      0.43      2000
weighted avg       0.56      0.75      0.64      2000


YOUR TASK: Evaluate thoroughly and improve this baseline


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_processed['size_purchased'].fillna(most_common_size, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_processed['size_purchased'].fillna(most_common_size, inplace=True)
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average

In [None]:
"""
The baseline show a terrible performance precision, recall,
F1 is 0 so this indicate is not predict anything the model is not working well,
the only thing is "good" is the accuracy because a good accuracy is above 70% and this have 75%
but this is not relevant because is not predicting the orders will be return
"""

In [None]:
"""
1. Define "success" in business terms
A/ the ideal bussiness is not doesn't returns because this cause the company doesn't have
    to pay 400k for month but this is very difficult for not said impossible because all the
    bussiness have a rate to return.
2. Recommend 2-3 metrics aligned with business goals
A/ count orders return vs all orders for day/month and cost split in return cost and intervencion cost
   as well the count of order with sucessful intervention
3. Analyze false positive vs. false negative trade-offs
  the threshold for this is 0.5 we should try to reduce this fixing the model for try to increase
  the FP because will spend less money
4. Calculate financial impact of predictions
currently the model loss money 9,090
5. Determine optimal threshold
0.14
"""

In [3]:
!pip3 install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [16]:
import pandas as pd
from catboost import CatBoostClassifier, Pool
from sklearn.metrics import accuracy_score, classification_report

# Load data
train = pd.read_csv('ecommerce_returns_train.csv')
test = pd.read_csv('ecommerce_returns_test.csv')

def preprocess(df):
    """Simple preprocessing pipeline"""
    df_processed = df.copy()

    # Encode categorical: product_category
    le_category = LabelEncoder()
    df_processed['product_category_encoded'] = le_category.fit_transform(
        df_processed['product_category']
    )

    # Handle missing sizes (Fashion items only have sizes)
    if df_processed['size_purchased'].notna().any():
        most_common_size = df_processed['size_purchased'].mode()[0]
        df_processed['size_purchased'].fillna(most_common_size, inplace=True)

        le_size = LabelEncoder()
        df_processed['size_encoded'] = le_size.fit_transform(
            df_processed['size_purchased']
        )

    # Feature selection
    feature_cols = [
        'customer_age', 'customer_tenure_days', 'product_category_encoded',
        'product_price', 'days_since_last_purchase', 'previous_returns',
        'product_rating', 'size_encoded', 'discount_applied'
    ]

    X = df_processed[feature_cols]
    y = df_processed['is_return']

    return X, y

# Prepare data
X_train, y_train = preprocess(train)
X_test, y_test = preprocess(test)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 3. Identify categorical variables
cat_features = X_train.select_dtypes(include=['object', 'category']).columns.tolist()

# 4. Convert into CatBoost Pools
train_pool = Pool(X_train, y_train, cat_features=cat_features)
test_pool = Pool(X_test, y_test, cat_features=cat_features)

# 5. Train CatBoost model
model = CatBoostClassifier(
    iterations=500,
    learning_rate=0.05,
    depth=6,
    loss_function='Logloss',
    eval_metric='F1',
    class_weights=[1, 6],  # <-- FIX
    random_seed=42,
    verbose=100

)

model.fit(train_pool, eval_set=test_pool)

# 6. Predict
y_pred = model.predict(X_test)



# 7. Basic evaluation (same format as your output)
print("Baseline Model Performance")
print("=" * 60)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))




The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_processed['size_purchased'].fillna(most_common_size, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_processed['size_purchased'].fillna(most_common_size, inplace=True)


0:	learn: 0.8022229	test: 0.8021178	best: 0.8021178 (0)	total: 7.65ms	remaining: 3.82s
100:	learn: 0.8134410	test: 0.7996749	best: 0.8034074 (60)	total: 1.1s	remaining: 4.36s
200:	learn: 0.8247961	test: 0.7864945	best: 0.8034074 (60)	total: 2.12s	remaining: 3.15s
300:	learn: 0.8427448	test: 0.7694335	best: 0.8034074 (60)	total: 2.96s	remaining: 1.96s
400:	learn: 0.8558744	test: 0.7527207	best: 0.8034074 (60)	total: 3.69s	remaining: 910ms
499:	learn: 0.8664386	test: 0.7479173	best: 0.8034074 (60)	total: 4.19s	remaining: 0us

bestTest = 0.8034074271
bestIteration = 60

Shrink model to first 61 iterations.
Baseline Model Performance
Accuracy: 0.2665

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.02      0.04      1495
           1       0.26      1.00      0.41       505

    accuracy                           0.27      2000
   macro avg       0.60      0.51      0.22      2000
weighted avg       0.77      0.27      0.13      

In [17]:
"""
Based in the previous

Shrink model to first 61 iterations.
Baseline Model Performance
============================================================
Accuracy: 0.2665

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.02      0.04      1495
           1       0.26      1.00      0.41       505

    accuracy                           0.27      2000
   macro avg       0.60      0.51      0.22      2000
weighted avg       0.77      0.27      0.13      2000


based in this results the model have a so much better behavior because is detecting
the people will return so this is very important for the company because they can try
to intervent the return but somethimes they will try to intervent people dont want to
return but this is so much better than doesn't detect this population.
"""

