In [4]:
# Library ----------------------------------------------------------------
library(dplyr)
library(tidyr)
library(ggplot2)
library(sondr)
library(clessnize)
library(ggtext)
library(tidytext)

# 1. Load Data and Clustering Results ---------------------------
kmeans_result <- readRDS("../../../_PrivateFolder_datagotchi_federal_2025/data/clustering/qc2022/03_pilot1_kmeans.rds")
df_pilot1_2022 <- readRDS("../../../_PrivateFolder_datagotchi_federal_2025/data/clustering/qc2022/03_pilot1_2022.rds")
cluster_means <- read.csv("../../../_PrivateFolder_datagotchi_federal_2025/data/clustering/qc2022/03_pilot1_clusters_desc.csv")
df_pilot1_2022_with_issues <- readRDS("../../../_PrivateFolder_datagotchi_federal_2025/data/clustering/qc2022/datagotchiQuebec2022_pilote1Clean.rds")


In [5]:
# Enlever toutes les variables sauf id et enjeux

Df_pilot1_issues <- df_pilot1_2022_with_issues |> 
  select(id, starts_with("issue_"))

# Bind df avec enjeux au pilote utilisé auparavant

Df_validation <- merge(df_pilot1_2022, Df_pilot1_issues, by = "id")

In [6]:
# Préparer mes variables

# Liste des variables d'enjeux
variables_issues <- c(
  "issue_systemicRacismExistQc",
  "issue_proImmigration",
  "issue_reduceDeficit",
  "issue_proPrivateHealth", 
  "issue_protectFrench", 
  "issue_proQcIndependance",
  "issue_proTeachersReligiousSymbols",
  "issue_proStrictEnviroRules",
  "issue_QcTooPolCorrect",
  "issue_abortionMoreAccessibleQc",
  "issue_begin3eLienImportant",
  "issue_respectGHGReductionImportant"
)

# Chemin d’enregistrement des graphiques
output_path <- "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022"

colnames(Df_validation)

 [1] "id"                                 "act_Gym"                           
 [3] "act_Walk"                           "act_Run"                           
 [5] "act_Yoga"                           "act_Other"                         
 [7] "act_None"                           "act_Fishing"                       
 [9] "act_Hunting"                        "act_VisitsMuseumsGaleries"         
[11] "act_MotorizedOutdoorActivities"     "act_Volunteering"                  
[13] "app_swag_Classique"                 "app_swag_Casual"                   
[15] "app_swag_Sport"                     "app_swag_Other"                    
[17] "animal_cat"                         "animal_dog"                        
[19] "animal_noPet"                       "app_withTattoo"                    
[21] "animal_other"                       "cons_redWineDrink"                 
[23] "cons_whiteWineDrink"                "cons_roseDrink"                    
[25] "cons_spiritDrink"                   "cons_bubb

In [7]:
# Créer une fonction pour enjeu par cluster (pour ne pas avoir à créer 12 graphiques un par un)

generate_graph <- function(data, issue_variable, output_path) {
  
  # Convertir la variable en symbole
  issue_sym <- ensym(issue_variable)
  
  # Regroupement et génération du graphique
  data |> 
    group_by(cluster_name, !!issue_sym) |> 
    summarise(nombre_de_repondants = n(), .groups = "drop") |> 
    group_by(cluster_name) |> 
    mutate(
      nombre_total_de_repondants = sum(nombre_de_repondants),
      proportion = nombre_de_repondants / nombre_total_de_repondants
    ) |> 
    ggplot(aes(x = !!issue_sym, y = proportion, fill = factor(cluster_name))) +
    geom_col() +
    facet_wrap(~ cluster_name) +
    labs(
      x = as_label(issue_sym),  # Affiche la variable dynamiquement dans le label
      y = "Proportion",
      fill = "Cluster",
      title = paste("Distribution of", as_label(issue_sym), "by Cluster")
    ) +
    theme_clean_light() -> graph
  
  # Sauvegarder le graphique
  ggsave(
    filename = paste0(output_path, "/validationCluster_", as_label(issue_sym), ".pdf"),
    plot = graph,
    width = 16,
    height = 12
  )
}

# Loop pour générer les graphiques

# Utiliser lapply pour créer les graphiques
lapply(variables_issues, function(issue) {
  generate_graph(Df_validation, !!sym(issue), output_path)
})

[[1]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/validationCluster_issue_systemicRacismExistQc.pdf"

[[2]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/validationCluster_issue_proImmigration.pdf"

[[3]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/validationCluster_issue_reduceDeficit.pdf"

[[4]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/validationCluster_issue_proPrivateHealth.pdf"

[[5]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/validationCluster_issue_protectFrench.pdf"

[[6]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/validationCluster_issue_proQcIndependance.pdf"

[[7]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/validationCluster_issue_proTeachersReligiousSymbols.pdf"

[[8]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clusterin

In [8]:
 
# Créer une fonction par enjeu pour la proportion totale pour meilleure comparaison ----

generate_graph_all <- function(data, issue_variable, output_path) {
  
  # Convertir la variable en symbole
  issue_sym <- ensym(issue_variable)
  
  # Regroupement et génération du graphique
  data |> 
    group_by(!!issue_sym) |> 
    summarise(nombre_de_repondants = n(), .groups = "drop") |> 
    mutate(
      nombre_total_de_repondants = sum(nombre_de_repondants),
      proportion = nombre_de_repondants / nombre_total_de_repondants
    ) |> 
    ggplot(aes(x = !!issue_sym, y = proportion, fill = factor(!!issue_sym))) +
    geom_col(show.legend = FALSE) +
    labs(
      x = as_label(issue_sym),  # Affiche la variable dynamiquement dans le label
      y = "Proportion",
      title = paste("Distribution of", as_label(issue_sym), "among all respondents")
    ) +
    theme_clean_light() -> graph
  
  # Sauvegarder le graphique
  ggsave(
    filename = paste0(output_path, "/distribution_all_", as_label(issue_sym), ".pdf"),
    plot = graph,
    width = 16,
    height = 12
  )
}

# Loop pour générer les graphiques
lapply(variables_issues, function(issue) {
  generate_graph_all(Df_validation, !!sym(issue), output_path)
})

[[1]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/distribution_all_issue_systemicRacismExistQc.pdf"

[[2]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/distribution_all_issue_proImmigration.pdf"

[[3]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/distribution_all_issue_reduceDeficit.pdf"

[[4]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/distribution_all_issue_proPrivateHealth.pdf"

[[5]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/distribution_all_issue_protectFrench.pdf"

[[6]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/distribution_all_issue_proQcIndependance.pdf"

[[7]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_2022/distribution_all_issue_proTeachersReligiousSymbols.pdf"

[[8]]
[1] "../../../_PrivateFolder_datagotchi_federal_2025/graph/clustering/qc_20