In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, confusion_matrix
from catboost import CatBoostClassifier

# Carregar dataset
df = pd.read_csv("dataset.csv")

# Eliminar id
df = df.drop(columns=["id"])

# Separar target
X = df.drop(columns=["target_variable"])
y = df["target_variable"]

# Definir variables categòriques (les que són binàries o categories)
categorical_features = [
    "product_A_recommended",
    "product_A",
    "product_C",
    "product_D",
    "competitor_Z",
    "competitor_X",
    "competitor_Y",
    "cust_in_iberia"
]

# Train/test split estratificat
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Crear i entrenar model CatBoost
model = CatBoostClassifier(
    iterations=500,          # nombre d’iteracions (arbres)
    depth=6,                 # profunditat màxima dels arbres
    learning_rate=0.05,      # taxa d’aprenentatge
    loss_function="Logloss", # funció de pèrdua
    eval_metric="F1",        # mètrica d’avaluació
    random_seed=42,
    verbose=100              # mostra progress cada 100 iteracions
)

model.fit(X_train, y_train, cat_features=categorical_features)

# Prediccions
y_val_proba = model.predict_proba(X_test)[:, 1]

# Threshold tuning per maximitzar F1
thresholds = np.linspace(0.1, 0.9, 81)
best_f1, best_t = -1, None
for t in thresholds:
    y_val_pred = (y_val_proba >= t).astype(int)
    f1 = f1_score(y_test, y_val_pred)
    if f1 > best_f1:
        best_f1, best_t = f1, t

print("Best threshold:", best_t)
print("Test F1 at best threshold:", best_f1)

# Altres mètriques amb el llindar òptim
y_test_pred = (y_val_proba >= best_t).astype(int)
print("Accuracy:", accuracy_score(y_test, y_test_pred))
print("Precision:", precision_score(y_test, y_test_pred))
print("Recall:", recall_score(y_test, y_test_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))


ModuleNotFoundError: No module named 'catboost'