In [24]:
# src/train_models.py

import os
import joblib
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from imblearn.over_sampling import SMOTE
from sklearn.metrics import confusion_matrix, make_scorer, roc_auc_score
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split


In [25]:


# üìÇ Chargement du fichier complet
df_full = pd.read_csv("../data/processed/train_clean.csv") 

# üéØ S√©paration features / target
X = df_full.drop(columns=["TARGET"])
y = df_full["TARGET"]

# ‚úÇÔ∏è Split 80/20 (stratifi√©)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

# üíæ Sauvegarde
df_train_split = X_train.copy()
df_train_split["TARGET"] = y_train
df_test_split = X_test.copy()
df_test_split["TARGET"] = y_test

df_train_split.to_csv("../data/processed/train_split.csv", index=False)
df_test_split.to_csv("../data/processed/test_split.csv", index=False)


In [26]:
# Chargement des donn√©es depuis preprocessing sauvegard√©
print("üìÖ Chargement des donn√©es depuis train_clean.csv...")
data_path = Path.cwd() / "data" / "processed"
if not data_path.exists():
    data_path = Path.cwd().parent / "data" / "processed"
df = pd.read_csv(data_path / "train_split.csv")

X = df.drop(columns=["TARGET", "SK_ID_CURR"], errors='ignore')
y = df["TARGET"]


üìÖ Chargement des donn√©es depuis train_clean.csv...


In [28]:
# Conversion bool√©ens
def bool_to_int(df):
    df = df.copy()
    for col in df.select_dtypes(include='bool'):
        df[col] = df[col].astype(int)
    return df

def bool_to_str(df):
    df = df.copy()
    for col in df.select_dtypes(include='bool'):
        df[col] = df[col].astype(str)
    return df



In [29]:
# Imputation simple pour SMOTE
def fill_na_for_smote(X):
    X = X.copy()
    for col in X.select_dtypes(include=["number"]):
        X[col] = X[col].fillna(X[col].mean())
    for col in X.select_dtypes(include=["object", "category"]):
        X[col] = X[col].fillna(X[col].mode()[0])
    return X



In [30]:
# Score m√©tier

def business_score(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    fn = cm[1][0]
    fp = cm[0][1]
    return 10 * fn + fp

scorer = make_scorer(business_score, greater_is_better=False)


In [31]:

# Pr√©processeur

def get_preprocessor(X):
    numeric_features = X.select_dtypes(include=['int64', 'float64']).columns.tolist()
    categorical_features = X.select_dtypes(include=['object', 'bool']).columns.tolist()

    numeric_transformer = Pipeline([
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])

    categorical_transformer = Pipeline([
        ('imputer', SimpleImputer(strategy='most_frequent')),
        ('encoder', OneHotEncoder(handle_unknown='ignore'))
    ])

    return ColumnTransformer([
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])



In [33]:
# Suppression outliers

def remove_outliers_iqr(X, y):
    X_numeric = X.select_dtypes(include=["number"])
    Q1 = X_numeric.quantile(0.25)
    Q3 = X_numeric.quantile(0.75)
    IQR = Q3 - Q1
    mask = ~((X_numeric < (Q1 - 1.5 * IQR)) | (X_numeric > (Q3 + 1.5 * IQR))).any(axis=1)
    return X[mask], y[mask]



In [34]:
# Mod√®les
models = {
    "RandomForest": RandomForestClassifier(random_state=42),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    "LightGBM": LGBMClassifier()
}



In [35]:
# Pr√©traitements
preprocessings = {
    "pipeline": lambda X, y: (get_preprocessor(bool_to_str(X)), bool_to_str(X), y),
    "smote": lambda X, y: (
        get_preprocessor(bool_to_int(X)),
        *SMOTE(random_state=42).fit_resample(bool_to_int(fill_na_for_smote(X)), y)
    ),
    "custom": lambda X, y: (get_preprocessor(bool_to_str(X)), *remove_outliers_iqr(bool_to_str(X), y))
}


In [37]:
#Cr√©ation dossier
os.makedirs("models", exist_ok=True)

# Entra√Ænement
for preproc_name, preproc_fn in preprocessings.items():
    for model_name, model in models.items():
        print(f"\nüöÄ Entra√Ænement: {model_name} + {preproc_name}")
        preprocessor, X_prep, y_prep = preproc_fn(X, y)

        pipeline = Pipeline([
            ('preprocessor', preprocessor),
            ('clf', model)
        ])

        param_grid = {
            'clf__n_estimators': [50, 100],
            'clf__max_depth': [5, 10]
        } if hasattr(model, 'n_estimators') else {}

        grid = GridSearchCV(pipeline, param_grid=param_grid, scoring=scorer, cv=3)

        with mlflow.start_run(run_name=f"{model_name}_{preproc_name}"):
            grid.fit(X_prep, y_prep)

            best_model = grid.best_estimator_
            cost = -grid.best_score_
            auc = roc_auc_score(y_prep, best_model.predict_proba(X_prep)[:, 1])

            mlflow.log_params(grid.best_params_)
            mlflow.log_metric("business_cost", cost)
            mlflow.log_metric("auc", auc)
            mlflow.sklearn.log_model(best_model, f"{model_name}_{preproc_name}")

            filename = f"models/{model_name}_{preproc_name}.pkl"
            joblib.dump(best_model, filename)
            print(f"‚úÖ Mod√®le enregistr√©: {filename} | Score m√©tier: {cost:.2f} | AUC: {auc:.4f}")



üöÄ Entra√Ænement: RandomForest + pipeline




‚úÖ Mod√®le enregistr√©: models/RandomForest_pipeline.pkl | Score m√©tier: 66200.00 | AUC: 0.7220

üöÄ Entra√Ænement: XGBoost + pipeline


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


‚úÖ Mod√®le enregistr√©: models/XGBoost_pipeline.pkl | Score m√©tier: 63363.67 | AUC: 0.9862

üöÄ Entra√Ænement: LightGBM + pipeline
[LightGBM] [Info] Number of positive: 13240, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.031372 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12850
[LightGBM] [Info] Number of data points in the train set: 164003, number of used features: 361
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432467
[LightGBM] [Info] Start training from score -2.432467




[LightGBM] [Info] Number of positive: 13240, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.040682 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12877
[LightGBM] [Info] Number of data points in the train set: 164003, number of used features: 361
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432467
[LightGBM] [Info] Start training from score -2.432467




[LightGBM] [Info] Number of positive: 13240, number of negative: 150764
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.031470 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 13007
[LightGBM] [Info] Number of data points in the train set: 164004, number of used features: 361
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432473
[LightGBM] [Info] Start training from score -2.432473




[LightGBM] [Info] Number of positive: 13240, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.031116 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12850
[LightGBM] [Info] Number of data points in the train set: 164003, number of used features: 361
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432467
[LightGBM] [Info] Start training from score -2.432467




[LightGBM] [Info] Number of positive: 13240, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.040026 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12877
[LightGBM] [Info] Number of data points in the train set: 164003, number of used features: 361
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432467
[LightGBM] [Info] Start training from score -2.432467




[LightGBM] [Info] Number of positive: 13240, number of negative: 150764
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.032062 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 13007
[LightGBM] [Info] Number of data points in the train set: 164004, number of used features: 361
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432473
[LightGBM] [Info] Start training from score -2.432473




[LightGBM] [Info] Number of positive: 13240, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.030374 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12850
[LightGBM] [Info] Number of data points in the train set: 164003, number of used features: 361
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432467
[LightGBM] [Info] Start training from score -2.432467




[LightGBM] [Info] Number of positive: 13240, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.039661 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12877
[LightGBM] [Info] Number of data points in the train set: 164003, number of used features: 361
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432467
[LightGBM] [Info] Start training from score -2.432467




[LightGBM] [Info] Number of positive: 13240, number of negative: 150764
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.032543 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 13007
[LightGBM] [Info] Number of data points in the train set: 164004, number of used features: 361
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432473
[LightGBM] [Info] Start training from score -2.432473




[LightGBM] [Info] Number of positive: 13240, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.029477 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12850
[LightGBM] [Info] Number of data points in the train set: 164003, number of used features: 361
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432467
[LightGBM] [Info] Start training from score -2.432467




[LightGBM] [Info] Number of positive: 13240, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.044153 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12877
[LightGBM] [Info] Number of data points in the train set: 164003, number of used features: 361
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432467
[LightGBM] [Info] Start training from score -2.432467




[LightGBM] [Info] Number of positive: 13240, number of negative: 150764
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.033643 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 13007
[LightGBM] [Info] Number of data points in the train set: 164004, number of used features: 361
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432473
[LightGBM] [Info] Start training from score -2.432473




[LightGBM] [Info] Number of positive: 19860, number of negative: 226145
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.057901 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 12949
[LightGBM] [Info] Number of data points in the train set: 246005, number of used features: 363
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.080730 -> initscore=-2.432469
[LightGBM] [Info] Start training from score -2.432469




‚úÖ Mod√®le enregistr√©: models/LightGBM_pipeline.pkl | Score m√©tier: 65015.00 | AUC: 0.8042

üöÄ Entra√Ænement: RandomForest + smote




‚úÖ Mod√®le enregistr√©: models/RandomForest_smote.pkl | Score m√©tier: 95974.33 | AUC: 0.9706

üöÄ Entra√Ænement: XGBoost + smote


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


‚úÖ Mod√®le enregistr√©: models/XGBoost_smote.pkl | Score m√©tier: 67232.00 | AUC: 0.9985

üöÄ Entra√Ænement: LightGBM + smote
[LightGBM] [Info] Number of positive: 150763, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.058847 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19007
[LightGBM] [Info] Number of data points in the train set: 301526, number of used features: 234
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




[LightGBM] [Info] Number of positive: 150764, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.060232 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 18956
[LightGBM] [Info] Number of data points in the train set: 301527, number of used features: 234
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500002 -> initscore=0.000007
[LightGBM] [Info] Start training from score 0.000007




[LightGBM] [Info] Number of positive: 150763, number of negative: 150764
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.060834 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 18938
[LightGBM] [Info] Number of data points in the train set: 301527, number of used features: 233
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499998 -> initscore=-0.000007
[LightGBM] [Info] Start training from score -0.000007




[LightGBM] [Info] Number of positive: 150763, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.055985 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19007
[LightGBM] [Info] Number of data points in the train set: 301526, number of used features: 234
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




[LightGBM] [Info] Number of positive: 150764, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.060458 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 18956
[LightGBM] [Info] Number of data points in the train set: 301527, number of used features: 234
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500002 -> initscore=0.000007
[LightGBM] [Info] Start training from score 0.000007




[LightGBM] [Info] Number of positive: 150763, number of negative: 150764
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.060398 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 18938
[LightGBM] [Info] Number of data points in the train set: 301527, number of used features: 233
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499998 -> initscore=-0.000007
[LightGBM] [Info] Start training from score -0.000007




[LightGBM] [Info] Number of positive: 150763, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.057036 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19007
[LightGBM] [Info] Number of data points in the train set: 301526, number of used features: 234
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




[LightGBM] [Info] Number of positive: 150764, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.094694 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 18956
[LightGBM] [Info] Number of data points in the train set: 301527, number of used features: 234
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500002 -> initscore=0.000007
[LightGBM] [Info] Start training from score 0.000007




[LightGBM] [Info] Number of positive: 150763, number of negative: 150764
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.058242 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 18938
[LightGBM] [Info] Number of data points in the train set: 301527, number of used features: 233
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499998 -> initscore=-0.000007
[LightGBM] [Info] Start training from score -0.000007




[LightGBM] [Info] Number of positive: 150763, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.055892 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 19007
[LightGBM] [Info] Number of data points in the train set: 301526, number of used features: 234
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




[LightGBM] [Info] Number of positive: 150764, number of negative: 150763
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.098445 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 18956
[LightGBM] [Info] Number of data points in the train set: 301527, number of used features: 234
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500002 -> initscore=0.000007
[LightGBM] [Info] Start training from score 0.000007




[LightGBM] [Info] Number of positive: 150763, number of negative: 150764
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.059861 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 18938
[LightGBM] [Info] Number of data points in the train set: 301527, number of used features: 233
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.499998 -> initscore=-0.000007
[LightGBM] [Info] Start training from score -0.000007




[LightGBM] [Info] Number of positive: 226145, number of negative: 226145
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.076962 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 18984
[LightGBM] [Info] Number of data points in the train set: 452290, number of used features: 236
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.500000 -> initscore=0.000000




‚úÖ Mod√®le enregistr√©: models/LightGBM_smote.pkl | Score m√©tier: 67314.33 | AUC: 0.9798

üöÄ Entra√Ænement: RandomForest + custom




‚úÖ Mod√®le enregistr√©: models/RandomForest_custom.pkl | Score m√©tier: 5633.33 | AUC: 0.7869

üöÄ Entra√Ænement: XGBoost + custom


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


‚úÖ Mod√®le enregistr√©: models/XGBoost_custom.pkl | Score m√©tier: 5370.00 | AUC: 0.9755

üöÄ Entra√Ænement: LightGBM + custom
[LightGBM] [Info] Number of positive: 1127, number of negative: 12440
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004364 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10766
[LightGBM] [Info] Number of data points in the train set: 13567, number of used features: 293
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.083069 -> initscore=-2.401358
[LightGBM] [Info] Start training from score -2.401358




[LightGBM] [Info] Number of positive: 1126, number of negative: 12441
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004814 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10852
[LightGBM] [Info] Number of data points in the train set: 13567, number of used features: 295
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.082996 -> initscore=-2.402326
[LightGBM] [Info] Start training from score -2.402326




[LightGBM] [Info] Number of positive: 1127, number of negative: 12441
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004431 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10924
[LightGBM] [Info] Number of data points in the train set: 13568, number of used features: 295
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.083063 -> initscore=-2.401438
[LightGBM] [Info] Start training from score -2.401438




[LightGBM] [Info] Number of positive: 1127, number of negative: 12440
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005011 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10766
[LightGBM] [Info] Number of data points in the train set: 13567, number of used features: 293
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.083069 -> initscore=-2.401358
[LightGBM] [Info] Start training from score -2.401358




[LightGBM] [Info] Number of positive: 1126, number of negative: 12441
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004487 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10852
[LightGBM] [Info] Number of data points in the train set: 13567, number of used features: 295
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.082996 -> initscore=-2.402326
[LightGBM] [Info] Start training from score -2.402326




[LightGBM] [Info] Number of positive: 1127, number of negative: 12441
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005286 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10924
[LightGBM] [Info] Number of data points in the train set: 13568, number of used features: 295
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.083063 -> initscore=-2.401438
[LightGBM] [Info] Start training from score -2.401438




[LightGBM] [Info] Number of positive: 1127, number of negative: 12440
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004513 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10766
[LightGBM] [Info] Number of data points in the train set: 13567, number of used features: 293
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.083069 -> initscore=-2.401358
[LightGBM] [Info] Start training from score -2.401358




[LightGBM] [Info] Number of positive: 1126, number of negative: 12441
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004681 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10852
[LightGBM] [Info] Number of data points in the train set: 13567, number of used features: 295
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.082996 -> initscore=-2.402326
[LightGBM] [Info] Start training from score -2.402326




[LightGBM] [Info] Number of positive: 1127, number of negative: 12441
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005000 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10924
[LightGBM] [Info] Number of data points in the train set: 13568, number of used features: 295
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.083063 -> initscore=-2.401438
[LightGBM] [Info] Start training from score -2.401438




[LightGBM] [Info] Number of positive: 1127, number of negative: 12440
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005825 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10766
[LightGBM] [Info] Number of data points in the train set: 13567, number of used features: 293
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.083069 -> initscore=-2.401358
[LightGBM] [Info] Start training from score -2.401358




[LightGBM] [Info] Number of positive: 1126, number of negative: 12441
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004200 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10852
[LightGBM] [Info] Number of data points in the train set: 13567, number of used features: 295
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.082996 -> initscore=-2.402326
[LightGBM] [Info] Start training from score -2.402326




[LightGBM] [Info] Number of positive: 1127, number of negative: 12441
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.006175 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 10924
[LightGBM] [Info] Number of data points in the train set: 13568, number of used features: 295
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.083063 -> initscore=-2.401438
[LightGBM] [Info] Start training from score -2.401438




[LightGBM] [Info] Number of positive: 1690, number of negative: 18661
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005706 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 11060
[LightGBM] [Info] Number of data points in the train set: 20351, number of used features: 301
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.083043 -> initscore=-2.401707
[LightGBM] [Info] Start training from score -2.401707




‚úÖ Mod√®le enregistr√©: models/LightGBM_custom.pkl | Score m√©tier: 5492.67 | AUC: 0.9654
