# Préparation des variables de sante

In [1]:
library(tidyverse)
library(factoextra)

# Charger les données
df_pilot1_2022 <- readRDS("../../../data/qc2022/preparation_donnees/01_pilot1_2022.rds")

# Préparation du jeu de données pour les variables de style
variables_sante <- c(
  "id",
  "cons_meat_never",
  "cons_meat_almost_never",
  "cons_meat_once_month",
  "cons_meat_once_week",
  "cons_meat_few_week",
  "cons_meat_daily",
  "cons_meat_few_daily",
  "cons_redWineDrink",
  "cons_whiteWineDrink",
  "cons_roseDrink",
  "cons_spiritDrink",
  "cons_bubbleDrink",
  "cons_beerDrink",
  "cons_microDrink",
  "cons_cocktailDrink",
  "cons_noDrink",
  "cons_Smoke_never",
  "cons_Smoke_few_times_year",
  "cons_Smoke_month",
  "cons_Smoke_once_week",
  "cons_Smoke_few_times_week",
  "cons_Smoke_once_day",
  "cons_Smoke_few_times_day"
)

df_sante <- df_pilot1_2022 %>%
  select(all_of(variables_sante))

-- [1mAttaching core tidyverse packages[22m ------------------------ tidyverse 2.0.0 --
[32mv[39m [34mdplyr    [39m 1.1.4     [32mv[39m [34mreadr    [39m 2.1.5
[32mv[39m [34mforcats  [39m 1.0.0     [32mv[39m [34mstringr  [39m 1.5.1
[32mv[39m [34mggplot2  [39m 3.5.1     [32mv[39m [34mtibble   [39m 3.2.1
[32mv[39m [34mlubridate[39m 1.9.3     [32mv[39m [34mtidyr    [39m 1.3.1
[32mv[39m [34mpurrr    [39m 1.0.2     
-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mi[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa



In [2]:
# Boucle pour afficher la table pour chaque variable
for (v in variables_sante) {
  if (v != "id") {
    cat("Table pour la variable:", v, "\n")
    print(table(df_sante[[v]]))
    cat("\n\n")
  }
}

Table pour la variable: cons_meat_never 

   0    1 
1455   45 


Table pour la variable: cons_meat_almost_never 

   0    1 
1480   20 


Table pour la variable: cons_meat_once_month 

   0    1 
1459   41 


Table pour la variable: cons_meat_once_week 

   0    1 
1351  149 


Table pour la variable: cons_meat_few_week 

  0   1 
725 775 


Table pour la variable: cons_meat_daily 

   0    1 
1155  345 


Table pour la variable: cons_meat_few_daily 

   0    1 
1375  125 


Table pour la variable: cons_redWineDrink 

   0    1 
1186  314 


Table pour la variable: cons_whiteWineDrink 

   0    1 
1299  201 


Table pour la variable: cons_roseDrink 

   0    1 
1451   49 


Table pour la variable: cons_spiritDrink 

   0    1 
1401   99 


Table pour la variable: cons_noDrink 

   0    1 
1191  309 


Table pour la variable: cons_Smoke_never 

   0    1 
 251 1249 


Table pour la variable: cons_Smoke_few_times_year 

   0    1 
1463   37 


Table pour la variable: cons_Smoke_month 



### Transformations

- cons_meat : Devrait être une variable ordinale 
  - 0 Not more than once a month
  - 0.5 Few weekly
  - 1 Daily

- cons_..._Drink : On peut conserver tel quel

- cons_Smoke : Devrait être une variable ordinale
  - 0 Never
  - 0.5 Not daily
  - 1 Daily

In [None]:
# Transformation de cons_meat en variable ordinale
#-------------------------------------------------

df_sante <- df_sante %>%

  mutate(
    cons_Meat = if_else(
      cons_meat_never == 1 | cons_meat_almost_never == 1 | cons_meat_once_month == 1,
      0,
      if_else(
        cons_meat_once_week == 1 | cons_meat_few_week == 1,
        0.5,
        1
      )
    )
  ) %>%
  select(
    -cons_meat_never,
    -cons_meat_almost_never,
    -cons_meat_once_month,
    -cons_meat_once_week,
    -cons_meat_few_week,
    -cons_meat_daily,
    -cons_meat_few_daily
  )

In [5]:
# Transformation de cons_Smoke variable ordinale
#-------------------------------------------------

df_sante <- df_sante %>%

  mutate(
    cons_Smoke = if_else(
      cons_Smoke_never == 1,
      0,
      if_else(
        cons_Smoke_few_times_year == 1 | cons_Smoke_month == 1 | cons_Smoke_once_week == 1 | cons_Smoke_few_times_week == 1,
        0.5,
        1
      )
    )
  ) %>%
  select(
    -cons_Smoke_never,
    -cons_Smoke_few_times_year,
    -cons_Smoke_month,
    -cons_Smoke_once_week,
    -cons_Smoke_few_times_week,
    -cons_Smoke_once_day,
    -cons_Smoke_few_times_day
  )

In [7]:
variables_sante_clust <- c(
  "id",
  "cons_Meat",
  "cons_redWineDrink",
  "cons_whiteWineDrink",
  "cons_roseDrink",
  "cons_spiritDrink",
  "cons_noDrink",
  "cons_Smoke"
)

In [8]:
# Boucle pour afficher la table pour chaque variable
for (v in variables_sante_clust) {
  if (v != "id") {
    cat("Table pour la variable:", v, "\n")
    print(table(df_sante[[v]]))
    cat("\n\n")
  }
}

Table pour la variable: cons_Meat 

  0 0.5   1 
106 924 470 


Table pour la variable: cons_redWineDrink 

   0    1 
1186  314 


Table pour la variable: cons_whiteWineDrink 

   0    1 
1299  201 


Table pour la variable: cons_roseDrink 

   0    1 
1451   49 


Table pour la variable: cons_spiritDrink 

   0    1 
1401   99 


Table pour la variable: cons_noDrink 

   0    1 
1191  309 


Table pour la variable: cons_Smoke 

   0  0.5    1 
1249   81  170 




In [9]:
# Sauvegarder les données préparées
saveRDS(df_sante, file = "../../../data/qc2022/preparation_donnees/02_pilot1_2022_sante.rds")