# Imports and definitions

In [1]:
from pathlib import Path

import polars as pl

from sklearn.model_selection import StratifiedKFold, cross_val_score

from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier


_ = pl.Config.set_tbl_cols(None)
_ = pl.Config.set_fmt_str_lengths(500)
_ = pl.Config.set_fmt_float("full")

In [2]:
import warnings
warnings.filterwarnings('ignore', category=RuntimeWarning, module='sklearn')

In [3]:
base_dir = Path('/Users/danlab/code/magenta-task/')
code_dir = base_dir / 'notebooks'
data_dir = code_dir / "data"
features_dir = data_dir / 'features'
train_dir = data_dir / 'train'


# Load data

In [4]:
%%time

features = pl.read_parquet(features_dir / 'features_cleaned-v0.parquet')
train = pl.read_parquet(train_dir / 'data-v0-80.parquet')

CPU times: user 33.2 ms, sys: 14.7 ms, total: 47.9 ms
Wall time: 62.5 ms


In [5]:
features.head()

rating_account_id,customer_id,age,contract_lifetime_days,remaining_binding_days,has_special_offer,is_magenta1_customer,available_gb,gross_mrc,has_done_upselling,completion_rate,is_bounded,is_huawei,is_oneplus,is_samsung,is_xiaomi,is_iphone,n_contracts_per_customer,avg_monthly_usage_gb,total_usage_gb,max_monthly_usage_gb,months_with_roaming,ever_used_roaming,zero_usage_months,active_usage_months,max_delta_1mo_increase,max_delta_1mo_decrease,months_with_delta_1mo_increase,months_with_delta_1mo_decrease,months_with_no_delta_1mo_change,avg_delta_2mo,delta_2mo_volatility,max_delta_2mo_increase,max_delta_2mo_decrease,months_with_delta_2mo_increase,months_with_delta_2mo_decrease,months_with_no_delta_2mo_change,max_delta_3mo_increase,max_delta_3mo_decrease,months_with_delta_3mo_increase,months_with_delta_3mo_decrease,months_with_no_delta_3mo_change,last_1_delta_1mo,last_2_delta_1mo,last_3_delta_1mo,last_1_delta_2mo,last_2_delta_2mo,last_1_delta_3mo,n_rechnungsanfragen,n_produkte&services-tarifdetails,n_prolongation,n_produkte&services-tarifwechsel,days_since_last_rechnungsanfragen,days_since_last_produkte&services-tarifdetails,days_since_last_prolongation,days_since_last_produkte&services-tarifwechsel,times_in_p1,times_in_p2,times_in_p3,times_in_p4,times_in_p5
str,str,i64,i64,i64,bool,bool,i64,f64,bool,f64,bool,bool,bool,bool,bool,bool,u32,f64,f64,f64,u32,bool,u32,u32,f64,f64,u32,u32,u32,f64,f64,f64,f64,u32,u32,u32,f64,f64,u32,u32,u32,f64,f64,f64,f64,f64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i32,i32,i32,i32,i32
"""289094""","""4.161115""",36,878,325,False,False,20,70.0,False,0.73,True,False,False,False,False,True,1,0.28,1.1,0.8,1,True,1,3,-0.1,-0.6,0,3,0,-0.45,0.35,-0.2,-0.7,0,2,0,-0.8,-0.8,0,1,0,-0.1,-0.1,-0.6,-0.2,-0.7,-0.8,0,0,0,0,-1,-1,-1,-1,4,0,0,0,0
"""677626""","""2.429976""",34,998,614,False,False,0,5.0,False,0.62,True,False,False,True,False,False,1,0.65,2.6,1.0,1,True,0,4,0.7,-0.3,1,2,0,0.0,0.71,0.5,-0.5,1,1,0,0.2,0.2,1,0,0,0.7,-0.2,-0.3,0.5,-0.5,0.2,0,0,1,1,-1,-1,87,118,0,0,0,0,4
"""769928""","""3.875044""",36,37,-26,False,True,50,16.94,False,3.36,False,False,False,True,False,False,2,0.6,2.4,1.0,0,False,0,4,0.4,-0.7,1,2,0,-0.1,0.28,0.1,-0.3,1,1,0,-0.6,-0.6,0,1,0,-0.3,0.4,-0.7,0.1,-0.3,-0.6,0,0,0,0,-1,-1,-1,-1,4,0,0,0,0
"""873260""","""4.649933""",50,503,-149,False,True,20,30.2,True,1.42,False,False,False,False,False,True,1,0.38,1.5,0.9,0,False,0,4,0.6,-0.7,2,1,0,0.35,0.64,0.8,-0.1,1,1,0,0.1,0.1,1,0,0,-0.7,0.6,0.2,-0.1,0.8,0.1,0,0,0,0,-1,-1,-1,-1,4,0,0,0,0
"""692379""","""4.382165""",46,80,-25,False,True,40,60.71,False,1.45,False,True,False,False,False,False,1,0.55,2.2,0.8,2,True,0,4,0.6,-0.4,1,2,0,-0.1,0.71,0.4,-0.6,1,1,0,0.0,0.0,0,0,1,0.6,-0.2,-0.4,0.4,-0.6,0.0,0,0,0,0,-1,-1,-1,-1,4,0,0,0,0


In [6]:
train.head()

age,contract_lifetime_days,remaining_binding_days,has_special_offer,is_magenta1_customer,available_gb,gross_mrc,completion_rate,is_bounded,is_huawei,is_oneplus,is_samsung,is_xiaomi,is_iphone,n_contracts_per_customer,avg_monthly_usage_gb,total_usage_gb,max_monthly_usage_gb,months_with_roaming,ever_used_roaming,zero_usage_months,active_usage_months,max_delta_1mo_increase,max_delta_1mo_decrease,months_with_delta_1mo_increase,months_with_delta_1mo_decrease,months_with_no_delta_1mo_change,avg_delta_2mo,delta_2mo_volatility,max_delta_2mo_increase,max_delta_2mo_decrease,months_with_delta_2mo_increase,months_with_delta_2mo_decrease,months_with_no_delta_2mo_change,max_delta_3mo_increase,max_delta_3mo_decrease,months_with_delta_3mo_increase,months_with_delta_3mo_decrease,months_with_no_delta_3mo_change,last_1_delta_1mo,last_2_delta_1mo,last_3_delta_1mo,last_1_delta_2mo,last_2_delta_2mo,last_1_delta_3mo,n_rechnungsanfragen,n_produkte&services-tarifdetails,n_prolongation,n_produkte&services-tarifwechsel,days_since_last_rechnungsanfragen,days_since_last_produkte&services-tarifdetails,days_since_last_prolongation,days_since_last_produkte&services-tarifwechsel,times_in_p1,times_in_p2,times_in_p3,times_in_p4,times_in_p5,has_done_upselling
i64,i64,i64,bool,bool,i64,f64,f64,bool,bool,bool,bool,bool,bool,u32,f64,f64,f64,u32,bool,u32,u32,f64,f64,u32,u32,u32,f64,f64,f64,f64,u32,u32,u32,f64,f64,u32,u32,u32,f64,f64,f64,f64,f64,f64,i64,i64,i64,i64,i64,i64,i64,i64,i32,i32,i32,i32,i32,bool
41,20,15,True,False,10,63.37,0.57,True,False,False,True,False,False,2,3.38,13.5,4.9,0,False,0,4,0.9,-2.3,1,2,0,-2.15,1.06,-1.4,-2.9,0,2,0,-2.0,-2.0,0,1,0,-0.6,-2.3,0.9,-2.9,-1.4,-2.0,1,3,0,2,11,108,-1,140,1,3,0,0,0,False
31,30,-23,False,False,50,16.94,4.29,False,False,False,False,False,True,3,8.9,35.6,10.9,1,True,0,4,2.9,-4.4,1,2,0,-3.3,2.55,-1.5,-5.1,0,2,0,-2.2,-2.2,0,1,0,2.9,-4.4,-0.7,-1.5,-5.1,-2.2,0,0,1,1,-1,-1,134,149,4,0,0,0,0,False
25,1794,182,True,True,10,48.78,0.91,True,False,False,False,False,True,4,48.32,193.3,68.9,0,False,0,4,23.7,-36.2,1,2,0,4.95,24.68,22.4,-12.5,1,1,0,-13.8,-13.8,0,1,0,-36.2,23.7,-1.3,-12.5,22.4,-13.8,0,2,0,0,-1,2,-1,-1,0,0,0,0,4,True
91,1375,-242,False,False,20,39.49,1.21,False,False,False,True,False,False,5,36.45,145.8,53.2,0,False,0,4,16.3,-21.8,1,2,0,-17.2,16.55,-5.5,-28.9,0,2,0,-12.6,-12.6,0,1,0,-7.1,-21.8,16.3,-28.9,-5.5,-12.6,0,0,0,0,-1,-1,-1,-1,0,0,0,0,4,False
44,292,-93,True,False,10,24.9,1.47,False,False,False,True,False,False,1,8.18,32.7,11.8,2,True,0,4,4.9,-1.8,2,1,0,2.35,4.74,5.7,-1.0,1,1,0,3.9,3.9,1,0,0,4.9,0.8,-1.8,5.7,-1.0,3.9,0,0,0,0,-1,-1,-1,-1,0,0,2,1,1,False


---

# Models

## Choosing evaluation metrics for model performance

When selecting metrics to evaluate the models, there are several options depending on the nature of the campaign and business objectives:

1. **Precision**
This is preferred when the campaign involves **human interactions and is time-constrained**. In this case, the goal is to **maximize the effective use of limited time** and minimize wasted agent effort.

2. **Recall** 
This is more suitable for **online or telematic campaigns**, where it is feasible to contact all potential customers at lower marginal cost. In this case, the priority is to **reach as many relevant targets as possible**, capturing maximum revenue opportunity even if it includes some false positives (this is may be a problem because false positives still have costs: customer annoyance, brand perception, potential churn).

3. **F1-Score**
The F1-score is ideal for **balancing optimization**, particularly in the following scenarios:
    - **Mixed deployment** where one model serves both human and automated channels  
    - **Customer experience considerations** where you must balance reach with relevance to avoid over-targeting

## Prepare the data

In [7]:

X = train.select(pl.exclude(['rating_account_id', 'customer_id', 'has_done_upselling']))
y = train.select('has_done_upselling')


skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [8]:
performance_dict  = {
    'Algorithm': list(),
    'Score': list(),
    'Metric': list()
}

## Support Vector Machine

In [9]:
%%time
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

metric = 'precision'

# Linear Support Vector Machine
svm = LinearSVC()
cv_scores = cross_val_score(svm, X_scaled, y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('Linear Support Vector Machine')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Mean CV precision: 0.0000 (+/- 0.0000)
CPU times: user 978 ms, sys: 119 ms, total: 1.1 s
Wall time: 1.06 s


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [10]:
%%time
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

metric = 'accuracy'

# Linear Support Vector Machine
svm = LinearSVC()
cv_scores = cross_val_score(svm, X_scaled, y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('Linear Support Vector Machine')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

Mean CV accuracy: 0.9295 (+/- 0.0000)
CPU times: user 941 ms, sys: 106 ms, total: 1.05 s
Wall time: 998 ms


Model predicts always 0

## RandomForest

In [11]:
%%time

metric = 'f1'

# Random Forest Classifier
rf = RandomForestClassifier()

cv_scores = cross_val_score(rf, X, y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('Random Forest')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

Mean CV f1: 0.0000 (+/- 0.0000)
CPU times: user 1min, sys: 823 ms, total: 1min
Wall time: 1min 1s


In [12]:
%%time

metric = 'accuracy'

# Random Forest Classifier
rf = RandomForestClassifier()

cv_scores = cross_val_score(rf, X, y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('Random Forest')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

Mean CV accuracy: 0.9295 (+/- 0.0000)
CPU times: user 1min 2s, sys: 1.79 s, total: 1min 4s
Wall time: 1min 6s


In [13]:
%%time

metric = 'recall'

# Random Forest Classifier
rf = RandomForestClassifier()

cv_scores = cross_val_score(rf, X, y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('Random Forest')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

Mean CV recall: 0.0000 (+/- 0.0000)
CPU times: user 1min 1s, sys: 1.31 s, total: 1min 2s
Wall time: 1min 4s


Model predicts always 0

## HistGradientBoostingClassifier

In [14]:
%%time

metric = 'recall'

# Histogram-based Gradient Boosting
hist_boost = HistGradientBoostingClassifier()

cv_scores = cross_val_score(hist_boost, X, y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('HistGradientBoostingClassifier')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

Mean CV recall: 0.0000 (+/- 0.0000)
CPU times: user 11.2 s, sys: 4.97 s, total: 16.2 s
Wall time: 2.15 s


In [15]:
%%time

metric = 'accuracy'

# Histogram-based Gradient Boosting
hist_boost = HistGradientBoostingClassifier()

cv_scores = cross_val_score(hist_boost, X, y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('HistGradientBoostingClassifier')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

Mean CV accuracy: 0.9295 (+/- 0.0000)
CPU times: user 11.9 s, sys: 7.49 s, total: 19.4 s
Wall time: 2.83 s


In [16]:
%%time

metric = 'f1'

# Histogram-based Gradient Boosting
hist_boost = HistGradientBoostingClassifier()

cv_scores = cross_val_score(hist_boost, X, y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('HistGradientBoostingClassifier')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

Mean CV f1: 0.0000 (+/- 0.0000)
CPU times: user 12.1 s, sys: 6.82 s, total: 18.9 s
Wall time: 2.52 s


## xGBoost

In [17]:
%%time

metric = 'f1'

# XGBoost Classifier
xgb = XGBClassifier(eval_metric='logloss')

cv_scores = cross_val_score(xgb, X, y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('XGBoost')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

Mean CV f1: 0.0035 (+/- 0.0050)
CPU times: user 4.66 s, sys: 1.8 s, total: 6.47 s
Wall time: 1.81 s


In [18]:
%%time

metric = 'accuracy'

# XGBoost Classifier
xgb = XGBClassifier(eval_metric='logloss')

cv_scores = cross_val_score(xgb, X, y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('XGBoost')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

Mean CV accuracy: 0.9291 (+/- 0.0004)
CPU times: user 4.31 s, sys: 1.35 s, total: 5.66 s
Wall time: 1.57 s


In [19]:
%%time

metric = 'precision'

# XGBoost Classifier
xgb = XGBClassifier(eval_metric='logloss')

cv_scores = cross_val_score(xgb, X, y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('XGBoost')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

Mean CV precision: 0.1844 (+/- 0.2392)
CPU times: user 4.36 s, sys: 1.35 s, total: 5.71 s
Wall time: 1.67 s


## LightGBM

In [20]:
%%time

metric = 'precision'

# LightGBM Classifier
lgbm = LGBMClassifier(verbose=0)

cv_scores = cross_val_score(lgbm, X.to_numpy(), y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('LightGBM')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Mean CV precision: 0.0000 (+/- 0.0000)
CPU times: user 4.61 s, sys: 4.58 s, total: 9.19 s
Wall time: 3.27 s


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [21]:
%%time

metric = 'f1'

# LightGBM Classifier
lgbm = LGBMClassifier(verbose=0)

cv_scores = cross_val_score(lgbm, X.to_numpy(), y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('LightGBM')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)



Mean CV f1: 0.0000 (+/- 0.0000)
CPU times: user 4.61 s, sys: 4.57 s, total: 9.18 s
Wall time: 3.23 s




In [22]:
%%time

metric = 'accuracy'

# LightGBM Classifier
lgbm = LGBMClassifier(verbose=0)

cv_scores = cross_val_score(lgbm, X.to_numpy(), y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('LightGBM')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)



Mean CV accuracy: 0.9295 (+/- 0.0001)
CPU times: user 4.47 s, sys: 4.39 s, total: 8.86 s
Wall time: 3.02 s




Model predicts always 0

## Catboost

In [23]:
%%time

metric = 'accuracy'

# CatBoost Classifier
catboost = CatBoostClassifier(verbose=0)

cv_scores = cross_val_score(catboost, X.to_numpy(), y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('CatBoost')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

Mean CV accuracy: 0.9295 (+/- 0.0001)
CPU times: user 3min 10s, sys: 27.5 s, total: 3min 38s
Wall time: 30.5 s


In [24]:
%%time

metric = 'precision'

# CatBoost Classifier
catboost = CatBoostClassifier(verbose=0)

cv_scores = cross_val_score(catboost, X.to_numpy(), y.to_numpy().ravel(), cv=skf, scoring=metric)
print(f"Mean CV {metric}: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

performance_dict['Algorithm'].append('CatBoost')
performance_dict['Score'].append(cv_scores.mean())
performance_dict['Metric'].append(metric)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Mean CV precision: 0.1000 (+/- 0.4000)
CPU times: user 3min 12s, sys: 25.8 s, total: 3min 37s
Wall time: 29.2 s


Model predicts always 0

---

# Results

In [26]:
performance_df = pl.DataFrame(performance_dict)
performance_df

Algorithm,Score,Metric
str,f64,str
"""Linear Support Vector Machine""",0,"""precision"""
"""Linear Support Vector Machine""",0.9295124999999999,"""accuracy"""
"""Random Forest""",0,"""f1"""
"""Random Forest""",0.9295124999999999,"""accuracy"""
"""Random Forest""",0,"""recall"""
…,…,…
"""LightGBM""",0,"""precision"""
"""LightGBM""",0,"""f1"""
"""LightGBM""",0.9295,"""accuracy"""
"""CatBoost""",0.9294874999999999,"""accuracy"""


Looking at the initial performances of all the models, it seems that **no model** can really capture and generalize the pattern behind the target. Also the fact that the target column is really **imbalanced** contributes to this low performances