Pipeline

In [4]:
library(tidymodels)
library(tidyverse)
library(dplyr)
library(ranger)
library(xgboost)

── [1mAttaching packages[22m ────────────────────────────────────── tidymodels 1.1.1 ──

[32m✔[39m [34mbroom       [39m 1.0.5      [32m✔[39m [34mrecipes     [39m 1.0.10
[32m✔[39m [34mdials       [39m 1.2.1      [32m✔[39m [34mrsample     [39m 1.2.0 
[32m✔[39m [34mdplyr       [39m 1.1.4      [32m✔[39m [34mtibble      [39m 3.2.1 
[32m✔[39m [34mggplot2     [39m 3.4.4      [32m✔[39m [34mtidyr       [39m 1.3.1 
[32m✔[39m [34minfer       [39m 1.0.6      [32m✔[39m [34mtune        [39m 1.1.2 
[32m✔[39m [34mmodeldata   [39m 1.3.0      [32m✔[39m [34mworkflows   [39m 1.1.4 
[32m✔[39m [34mparsnip     [39m 1.2.0      [32m✔[39m [34mworkflowsets[39m 1.0.1 
[32m✔[39m [34mpurrr       [39m 1.0.2      [32m✔[39m [34myardstick   [39m 1.3.0 

── [1mConflicts[22m ───────────────────────────────────────── tidymodels_conflicts() ──
[31m✖[39m [34mpurrr[39m::[32mdiscard()[39m masks [34mscales[39m::discard()
[31m✖[39m [34mdplyr[39m::[

In [5]:
list.files()

In [6]:
dataini <- readRDS("readmission_avc.rds")
dim(dataini)
head(dataini)

modeEntree,modeSortie,duree,ghm2,dp,sexe,age,nbActe,nbRum,nbda,id,id_D
<int>,<int>,<int>,<chr>,<chr>,<int>,<int>,<int>,<int>,<int>,<chr>,<chr>
8,9,0,01M37E,I671,2.0,76.0,4,1,,l19,
8,8,3,01C061,I652,2.0,77.0,4,1,1.0,s7e,
8,7,13,01M303,I634,,,4,1,7.0,23f,
8,8,11,01M301,I639,1.0,83.0,4,2,2.0,8oi,
8,6,8,01M303,I635,1.0,71.0,4,1,9.0,otz,ld
8,9,18,01M303,I635,,,9,2,6.0,bof,


# Exploratory data analysis

* check missing values
* tabs and plots

In [12]:
# summary(dataini)

# colSums(is.na(dataini))
apply(is.na(dataini),2,sum)
#map(dataini, function(x) sum(is.na(x))) ## fonction de dplyr on calcule en colonne
#map(dataini, ~ sum(is.na(.))) ## une syntaxe simplifée



# 0- Recoding and data pre-processing

In [10]:
dataset <- dataini %>%
  filter(!is.na(id_D)) %>%
    mutate(target = as.factor(ifelse(id_D=="",0,1))) %>%
    mutate_at(c("modeEntree","modeSortie","sexe"), as.character)  %>%    # mutate_at : permet d'appliquer une fonction à une liste de colonne que l'on sélectionne
    mutate(nbda = ifelse(is.na(nbda),0,nbda))  %>%
    select(-c(id,id_D)) %>%
    filter(modeSortie != 9)

dim(dataset)




# 1 -  Resample library : train set, eval set et test set ✈

In [11]:
set.seed(24)  # pour assurer la reproductivité

data_split <- initial_split(dataset, strata = target, prop = 0.8)

class(data_split)

training <- training(data_split) # data frame qui permet de faire le premier découpage
test_set <- testing(data_split) # extraire le test set

training_split <- initial_split(training, strata = target, prop = 0.8)

train_set <- training(training_split)
eval_set <- testing(training_split)

dim(train_set)
dim(eval_set)
dim(test_set)


# 2 - recipes library : create a collection of input formulas



## 2.1 Basic recipe and design matrix

In [13]:
rec_basic <- recipe(data = train_set, target ~.)

class(rec_basic)

prep(rec_basic)  ### pour préparer
## juice(prep(rec_basic))  ### pour extraire la design matrix
formula(prep(prep(rec_basic)))



[36m──[39m [1mRecipe[22m [36m──────────────────────────────────────────────────────────────────────[39m



── Inputs 

Number of variables by role

outcome:    1
predictor: 10



── Training information 

Training data contained 845 data points and 6 incomplete rows.



target ~ modeEntree + modeSortie + duree + ghm2 + dp + sexe + 
    age + nbActe + nbRum + nbda
<environment: 0x578a16ee7388>

In [14]:
# pour montrer --> mais pas généralisable --> voir le code ci-dessous
rec_basic <- recipe(data = train_set, target ~ .) %>%
  step_impute_mean(age) %>%     # on impute par la moyenne
  step_impute_mode(sexe) %>%    # on impute par le mode (la valeur qui revient le plus)
  step_normalize(age) %>%
  step_dummy(modeEntree) %>%
  step_other(dp, threshold = 0.05) %>% ## si la proportion est inférieure à 5%, on place dans la catégorie other.
  step_dummy(dp)


juice(prep(rec_basic)) %>% head(10)

modeSortie,duree,ghm2,sexe,age,nbActe,nbRum,nbda,target,modeEntree_X7,modeEntree_X8,dp_I633,dp_I634,dp_I635,dp_I638,dp_I639,dp_I652,dp_other
<fct>,<int>,<fct>,<fct>,<dbl>,<int>,<int>,<dbl>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
8,3,01C061,2,0.3570417,4,1,1,0,0,1,0,0,0,0,0,1,0
7,8,01M302,1,1.324857,7,2,4,0,0,1,0,0,0,0,1,0,0
8,8,01M301,2,1.0667729,8,2,5,0,0,1,0,0,0,1,0,0,0
8,0,01M30T,2,0.6796468,5,1,3,0,0,1,0,0,1,0,0,0,0
7,1,01M31T,2,-1.5785887,1,1,0,0,0,1,0,0,0,0,0,0,1
8,4,01M301,1,0.2925207,4,2,1,0,0,1,0,0,1,0,0,0,0
7,20,01M303,1,0.4215628,30,2,8,0,0,1,0,0,1,0,0,0,0
8,4,01M302,1,0.4215628,0,1,5,0,0,1,0,0,0,0,1,0,0
8,11,01M302,2,0.2279997,7,2,3,0,0,1,0,0,0,1,0,0,0
8,5,01M303,2,0.9377309,11,1,7,0,0,1,0,0,1,0,0,0,0


In [15]:
rec_basic <- recipe(train_set, target~.)%>%
  step_impute_mean(all_numeric_predictors())%>%
  step_impute_mode(all_nominal_predictors())%>%
  step_normalize(all_numeric_predictors())%>%
  step_other(dp, threshold =  .05)%>%
  step_other(ghm2, threshold = .02)%>%
  step_dummy(all_nominal_predictors())

colnames(juice(prep(rec_basic)))

juice(prep(rec_basic)) %>% head(10)
dim(juice(prep(rec_basic)))

duree,age,nbActe,nbRum,nbda,target,modeEntree_X7,modeEntree_X8,modeSortie_X7,modeSortie_X8,⋯,ghm2_X01M31T,ghm2_other,dp_I633,dp_I634,dp_I635,dp_I638,dp_I639,dp_I652,dp_other,sexe_X2
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
-0.6890871,0.3570417,-0.28795077,-0.8933736,-1.00131507,0,0,1,0,1,⋯,0,0,0,0,0,0,0,1,0,1
-0.1550825,1.324857,-0.08970112,0.1928144,-0.21055691,0,0,1,1,0,⋯,0,0,0,0,0,0,1,0,0,0
-0.1550825,1.0667729,-0.02361791,0.1928144,0.05302915,0,0,1,0,1,⋯,0,0,0,0,0,1,0,0,0,1
-1.0094899,0.6796468,-0.22186756,-0.8933736,-0.47414296,0,0,1,0,1,⋯,0,0,0,0,1,0,0,0,0,1
-0.902689,-1.5785887,-0.48620042,-0.8933736,-1.26490112,0,0,1,1,0,⋯,1,0,0,0,0,0,0,0,1,1
-0.5822862,0.2925207,-0.28795077,0.1928144,-1.00131507,0,0,1,0,1,⋯,0,0,0,0,1,0,0,0,0,0
1.1265285,0.4215628,1.43021285,0.1928144,0.84378731,0,0,1,1,0,⋯,0,0,0,0,1,0,0,0,0,0
-0.5822862,0.4215628,-0.55228364,-0.8933736,0.05302915,0,0,1,0,1,⋯,0,0,0,0,0,0,1,0,0,0
0.1653202,0.2279997,-0.08970112,0.1928144,-0.47414296,0,0,1,0,1,⋯,0,0,0,0,0,1,0,0,0,1
-0.4754853,0.9377309,0.17463174,-0.8933736,0.58020125,0,0,1,0,1,⋯,0,0,0,0,1,0,0,0,0,1


## 2.2 Interacting recipe

In [16]:
rec_inter <- rec_basic %>%
  step_interact(~age : duree) %>%
  step_interact(~age : starts_with("dp_"))  # déconseiller de croiser l'ensemble des variables --> couter bcp trop

head(juice(prep(rec_inter)))

duree,age,nbActe,nbRum,nbda,target,modeEntree_X7,modeEntree_X8,modeSortie_X7,modeSortie_X8,⋯,dp_other,sexe_X2,age_x_duree,age_x_dp_I633,age_x_dp_I634,age_x_dp_I635,age_x_dp_I638,age_x_dp_I639,age_x_dp_I652,age_x_dp_other
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
-0.6890871,0.3570417,-0.28795077,-0.8933736,-1.00131507,0,0,1,0,1,⋯,0,1,-0.2460329,0,0,0.0,0.0,0.0,0.3570417,0.0
-0.1550825,1.324857,-0.08970112,0.1928144,-0.21055691,0,0,1,1,0,⋯,0,0,-0.2054622,0,0,0.0,0.0,1.324857,0.0,0.0
-0.1550825,1.0667729,-0.02361791,0.1928144,0.05302915,0,0,1,0,1,⋯,0,1,-0.1654378,0,0,0.0,1.066773,0.0,0.0,0.0
-1.0094899,0.6796468,-0.22186756,-0.8933736,-0.47414296,0,0,1,0,1,⋯,0,1,-0.6860966,0,0,0.6796468,0.0,0.0,0.0,0.0
-0.902689,-1.5785887,-0.48620042,-0.8933736,-1.26490112,0,0,1,1,0,⋯,1,1,1.4249746,0,0,0.0,0.0,0.0,0.0,-1.578589
-0.5822862,0.2925207,-0.28795077,0.1928144,-1.00131507,0,0,1,0,1,⋯,0,0,-0.1703308,0,0,0.2925207,0.0,0.0,0.0,0.0


## 2.3 spline recipe

In [17]:
rec_spline <- rec_basic %>%
  step_ns(deg_free = tune())  # degré polynomial + nb noeuds

In [138]:
rec_spline



[36m──[39m [1mRecipe[22m [36m──────────────────────────────────────────────────────────────────────[39m



── Inputs 

Number of variables by role

outcome:    1
predictor: 10



── Operations 

[36m•[39m Mean imputation for: [34mall_numeric_predictors()[39m

[36m•[39m Mode imputation for: [34mall_nominal_predictors()[39m

[36m•[39m Centering and scaling for: [34mall_numeric_predictors()[39m

[36m•[39m Collapsing factor levels for: [34mdp[39m

[36m•[39m Collapsing factor levels for: [34mghm2[39m

[36m•[39m Dummy variables from: [34mall_nominal_predictors()[39m

[36m•[39m Natural splines on: [34m<none>[39m



## 2.4 Interaction + spline recipe

In [None]:
rec_spline_int <- rec_inter %>%


# 3 Parsnip librairy : creating and fitting a model

## 3.1 Regression logistic : model and workflow


In [19]:
log_mod <- logistic_reg() %>%
  set_engine("glm") %>%
    set_mode("classification")

In [20]:
log_wf <- workflow() %>%
  add_recipe(rec_basic) %>%
    add_model(log_mod)

fit(log_wf, train_set)
wf_fitted <- log_wf %>% fit(train_set)

# predict(wf_fitted, eval_set)
# predict(wf_fitted, eval_set, type="prob")

log_pred <- eval_set %>%
   select(target) %>%
   bind_cols(
      predict(wf_fitted, eval_set),
      predict(wf_fitted, eval_set, type="prob")
   )


log_pred %>% head(10)

══ Workflow [trained] ══════════════════════════════════════════════════════════
[3mPreprocessor:[23m Recipe
[3mModel:[23m logistic_reg()

── Preprocessor ────────────────────────────────────────────────────────────────
6 Recipe Steps

• step_impute_mean()
• step_impute_mode()
• step_normalize()
• step_other()
• step_other()
• step_dummy()

── Model ───────────────────────────────────────────────────────────────────────

Call:  stats::glm(formula = ..y ~ ., family = stats::binomial, data = data)

Coefficients:
  (Intercept)          duree            age         nbActe          nbRum  
     16.30483        0.05874       -0.04969       -0.05835       -0.26738  
         nbda  modeEntree_X7  modeEntree_X8  modeSortie_X7  modeSortie_X8  
      0.06444        0.58134        1.27976      -22.59611      -21.47789  
 ghm2_X01C062   ghm2_X01M301   ghm2_X01M302   ghm2_X01M303   ghm2_X01M304  
     -0.25437       -0.13896        0.84424        0.70689        2.22777  
 ghm2_X01M30T   ghm2_X01

target,.pred_class,.pred_0,.pred_1
<fct>,<fct>,<dbl>,<dbl>
0,0,0.9303006,0.069699372
0,0,0.9686659,0.031334074
0,0,0.9234047,0.076595316
0,0,0.8786154,0.121384646
0,0,0.9572263,0.042773713
0,0,0.9797216,0.020278446
0,0,0.9856902,0.014309809
0,0,0.992981,0.007018961
0,0,0.8968301,0.103169882
0,0,0.9819518,0.018048202


### 3-2 Random Forest : model and workflow


In [21]:
rf_mod <- rand_forest()%>%
  set_engine('ranger')%>%
  set_mode('classification')

In [22]:
rf_wf <- workflow() %>%
  add_recipe(rec_basic) %>%
  add_model(rf_mod)

rf_wf_fitted <- rf_wf %>%
  fit(train_set)

predict(rf_wf_fitted, eval_set, type = "prob")

rf_pred <- eval_set %>%
   select(target) %>%
   bind_cols(
      predict(rf_wf_fitted, eval_set, type = "prob"),
      predict(rf_wf_fitted, eval_set)
   )

.pred_0,.pred_1
<dbl>,<dbl>
0.9790661,0.020933947
0.9292339,0.070766066
0.8499196,0.150080384
0.9368707,0.063129253
0.9188882,0.081111769
0.9327362,0.067263828
0.9487314,0.051268601
0.9726738,0.027326190
0.9050082,0.094991783
0.9190888,0.080911212


### 3.3 - XGBOOST

In [23]:
xg_mod <- boost_tree() %>%
  set_engine("xgboost") %>%
  set_mode("classification")

In [24]:
xgboost_wf <- workflow() %>%
  add_recipe(rec_basic) %>%
  add_model(xg_mod)

xgboost_fitted <- xgboost_wf %>%
  fit(train_set)

predict(xgboost_fitted, eval_set, type = "prob")

xgboost_pred <- eval_set %>%
   select(target) %>%
   bind_cols(
      predict(xgboost_fitted, eval_set, type = "prob"),
      predict(xgboost_fitted, eval_set)
   )

.pred_0,.pred_1
<dbl>,<dbl>
0.9712305,0.02876949
0.8674424,0.13255757
0.8725805,0.12741947
0.9449753,0.05502468
0.9574106,0.04258937
0.9791465,0.02085346
0.9710613,0.02893871
0.9537461,0.04625392
0.9168373,0.08316267
0.9443317,0.05566829


In [None]:
#### On peut le fit tuner ...


xg_mod <- boost_tree(
  n_try = fit()
) %>%
  set_engine("xgboost") %>%
  set_mode("classification")

## 4 Yardstick library : evaluate wf performance

### 4.1. RL workflow

In [28]:
accuracy(log_pred, target, .pred_class)
roc_auc(log_pred, target, .pred_0)
roc_auc(log_pred, target, .pred_1, event_level = "second") # on constate le même montant


.metric,.estimator,.estimate
<chr>,<chr>,<dbl>
accuracy,binary,0.9103774


.metric,.estimator,.estimate
<chr>,<chr>,<dbl>
roc_auc,binary,0.7263923


.metric,.estimator,.estimate
<chr>,<chr>,<dbl>
roc_auc,binary,0.7263923


4.2 RandomForest workflow

In [29]:
accuracy(rf_pred, target, .pred_class) # 0.9103774
roc_auc(rf_pred, target, .pred_0) # 0.7761098


.metric,.estimator,.estimate
<chr>,<chr>,<dbl>
accuracy,binary,0.9103774


.metric,.estimator,.estimate
<chr>,<chr>,<dbl>
roc_auc,binary,0.7698144


# 4-3 - xgboost

In [30]:
accuracy(xgboost_pred, target, .pred_class) # 0.9103774
roc_auc(xgboost_pred, target, .pred_0) # 0.7728006

.metric,.estimator,.estimate
<chr>,<chr>,<dbl>
accuracy,binary,0.9103774


.metric,.estimator,.estimate
<chr>,<chr>,<dbl>
roc_auc,binary,0.7728006


## 5. Cross Validation


In [31]:
set.seed(24)
folds <- vfold_cv(training, v = 5)

xg_fitted_cv <- xgboost_wf %>%
  fit_resamples(                     # remplace fit et on va faire la validation croisée
    resamples = folds,
    metrics = metric_set(accuracy, roc_auc, f_meas)
    )

xg_fitted_cv %>% collect_metrics()


.metric,.estimator,mean,n,std_err,.config
<chr>,<chr>,<dbl>,<int>,<dbl>,<chr>
accuracy,binary,0.9375615,5,0.004066415,Preprocessor1_Model1
f_meas,binary,0.9637676,5,0.002610405,Preprocessor1_Model1
roc_auc,binary,0.8349435,5,0.018843252,Preprocessor1_Model1


# 6- Hyperparameters tuning

[Lien vers un lien écrit par les développeurs de tidymodels](https://www.tmwr.org/)

In [32]:
rf_mod_tune <- rand_forest(
  trees = tune(),
  min_n = tune()) %>% ### je ne prends pas le paramètre par défaut )
    set_engine('ranger')%>%
  set_mode('classification')

rf_tune_wf <- workflow() %>%
  add_recipe(rec_basic) %>%
  add_model(rf_mod_tune)

In [33]:
rf_tune_wf_fitted <- rf_tune_wf %>%
  tune_grid(                  # remplace fonction fit
    resamples= folds,
    metrics = metric_set(accuracy, roc_auc),
    grid = 20)               # space fitting

In [34]:
rf_tune_wf_fitted %>% collect_metrics() %>%
  filter(.metric == "roc_auc") %>%
  arrange(desc(mean)) # 0.8438979

trees,min_n,.metric,.estimator,mean,n,std_err,.config
<int>,<int>,<chr>,<chr>,<dbl>,<int>,<dbl>,<chr>
306,5,roc_auc,binary,0.8453997,5,0.01204367,Preprocessor1_Model12
782,2,roc_auc,binary,0.842707,5,0.00825248,Preprocessor1_Model10
1242,6,roc_auc,binary,0.8419934,5,0.0110889,Preprocessor1_Model18
1338,31,roc_auc,binary,0.8412789,5,0.01409576,Preprocessor1_Model08
1133,12,roc_auc,binary,0.8410152,5,0.01037097,Preprocessor1_Model11
1607,19,roc_auc,binary,0.8409385,5,0.0115824,Preprocessor1_Model15
1919,8,roc_auc,binary,0.8408621,5,0.01018292,Preprocessor1_Model04
860,14,roc_auc,binary,0.8406092,5,0.01036021,Preprocessor1_Model13
401,21,roc_auc,binary,0.8401357,5,0.01077837,Preprocessor1_Model16
641,28,roc_auc,binary,0.8390414,5,0.01290705,Preprocessor1_Model01


# 7. Workflowset

## 7.1 Create a workflow set

In [49]:
wf_set <- workflow_set(
  preproc = list(basic = rec_basic,
  inter = rec_inter,
  spline = rec_spline),
  models = list(log = log_mod,
                rf = rf_mod,
                tune_rf = rf_mod_tune,
                xgb = xg_mod
                )
)

In [52]:
set.seed(24)

wf_set_fitted <-
  wf_set %>%
  workflow_map(             # replace the fonction fit , fit_resamples,
    resamples = folds,
    metrics= metric_set(accuracy, roc_auc),
    grid = 20,
    fn= "tune_grid",
    verbose = TRUE)

[34mi[39m	[30mNo tuning parameters. `fit_resamples()` will be attempted[39m

[34mi[39m [30m 1 of 12 resampling: basic_log[39m


There were issues with some computations   [1m[33mA[39m[22m: x1

There were issues with some computations   [1m[33mA[39m[22m: x1



[32m✔[39m [30m 1 of 12 resampling: basic_log[39m[30m (1.3s)[39m

[34mi[39m	[30mNo tuning parameters. `fit_resamples()` will be attempted[39m

[34mi[39m [30m 2 of 12 resampling: basic_rf[39m

[32m✔[39m [30m 2 of 12 resampling: basic_rf[39m[30m (2.8s)[39m

[34mi[39m [30m 3 of 12 tuning:     basic_tune_rf[39m

[32m✔[39m [30m 3 of 12 tuning:     basic_tune_rf[39m[30m (1m 13.9s)[39m

[34mi[39m	[30mNo tuning parameters. `fit_resamples()` will be attempted[39m

[34mi[39m [30m 4 of 12 resampling: basic_xgb[39m

[32m✔[39m [30m 4 of 12 resampling: basic_xgb[39m[30m (1.4s)[39m

[34mi[39m	[30mNo tuning parameters. `fit_resamples()` will be attempted[39m

[34mi[39m [30m 5 of 12

#7.3 Evaluation of all workflow in wf set

In [57]:
rank_results(wf_set_fitted, rank_metric = 'roc_auc') # 0.8646417


wflow_id,.config,.metric,mean,std_err,n,preprocessor,model,rank
<chr>,<chr>,<chr>,<dbl>,<dbl>,<int>,<chr>,<chr>,<int>
inter_xgb,Preprocessor1_Model1,accuracy,0.9375615,0.004066415,5,recipe,boost_tree,1
inter_xgb,Preprocessor1_Model1,roc_auc,0.8646417,0.011435346,5,recipe,boost_tree,1
spline_rf,Preprocessor06_Model1,accuracy,0.9385049,0.004237095,5,recipe,rand_forest,2
spline_rf,Preprocessor06_Model1,roc_auc,0.8457869,0.010431249,5,recipe,rand_forest,2
spline_rf,Preprocessor03_Model1,accuracy,0.9385049,0.004237095,5,recipe,rand_forest,3
spline_rf,Preprocessor03_Model1,roc_auc,0.8457336,0.010753623,5,recipe,rand_forest,3
basic_tune_rf,Preprocessor1_Model12,accuracy,0.9385049,0.004237095,5,recipe,rand_forest,4
basic_tune_rf,Preprocessor1_Model12,roc_auc,0.8447027,0.010459633,5,recipe,rand_forest,4
spline_rf,Preprocessor08_Model1,accuracy,0.9375615,0.004066415,5,recipe,rand_forest,5
spline_rf,Preprocessor08_Model1,roc_auc,0.8446264,0.011269984,5,recipe,rand_forest,5


In [56]:
216 / 2 / 4

# 8. Last fit and final prediction

## 8.1. Extract the best wf

In [63]:
best_wf <- wf_set_fitted %>%
  extract_workflow_set_result("spline_xgb") %>%
  select_best(metric = "roc_auc")

In [62]:
best_wf

deg_free,.config
<int>,<chr>
4,Preprocessor01_Model1


## 8.2 last fit

In [67]:
last_fit <- wf_set_fitted %>%
  extract_workflow("spline_xgb") %>% #workflowid
  finalize_workflow(best_wf) %>%
  last_fit(split= data_split) # last fitting and prediction


last_fit %>% collect_metrics()
last_fit %>% collect_predictions()

.metric,.estimator,.estimate,.config
<chr>,<chr>,<dbl>,<chr>
accuracy,binary,0.9622642,Preprocessor1_Model1
roc_auc,binary,0.9037433,Preprocessor1_Model1


id,.pred_0,.pred_1,.row,.pred_class,target,.config
<chr>,<dbl>,<dbl>,<int>,<fct>,<fct>,<chr>
train/test split,0.95248568,0.04751432,2,0,0,Preprocessor1_Model1
train/test split,0.01885670,0.98114330,8,1,1,Preprocessor1_Model1
train/test split,0.96964169,0.03035831,44,0,0,Preprocessor1_Model1
train/test split,0.96954614,0.03045386,46,0,0,Preprocessor1_Model1
train/test split,0.93514043,0.06485957,47,0,0,Preprocessor1_Model1
train/test split,0.94388640,0.05611360,48,0,0,Preprocessor1_Model1
train/test split,0.98514128,0.01485872,56,0,0,Preprocessor1_Model1
train/test split,0.98789412,0.01210588,61,0,0,Preprocessor1_Model1
train/test split,0.96130115,0.03869885,65,0,0,Preprocessor1_Model1
train/test split,0.96841633,0.03158367,71,0,0,Preprocessor1_Model1


Uniquement les parties 1,2,3 (models), 7 et 8
