In [None]:
# Library ----------------------------------------------------------------
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)

# Data
app_data <- readRDS("../../../data/qc2022/preparation_donnees/05_app_2022_clustered.rds")

In [None]:
# 4. Tracking Each Day's Respondents -------------------------------------
app_data$date <- as.Date(app_data$date)
unique_dates <- sort(unique(app_data$date))
date_to_day_number <- data.frame(
  date = unique_dates,
  day  = seq_along(unique_dates)
)

app_data <- app_data %>%
  left_join(date_to_day_number, by = "date") %>%
  arrange(date)

all_assignments <- data.frame()

# Les centroids de votre K-means scaled
centroids <- as.data.frame(kmeans_result$centers)

for (current_day in date_to_day_number$day) {
  
  # Filtrer jusqu'au jour actuel (approche cumulative)
  data_day <- app_data %>%
    filter(day <= current_day)
  
  if (nrow(data_day) == 0) next
  
  # Process & scale les données de ce jour
  data_day_processed <- process_app_data(data_day, orig_center, orig_scale)
  if (nrow(data_day_processed) == 0) next
  
  # Exclure vote_intent pour le calcul des distances
  feature_cols <- setdiff(names(data_day_processed), "vote_intent")
  
  # Vérifier l'alignement des colonnes
  feature_cols <- intersect(feature_cols, names(centroids))
  data_features <- data_day_processed[, feature_cols, drop = FALSE]
  centroids_features <- centroids[, feature_cols, drop = FALSE]
  
  # Assigner chaque répondant au centroid le plus proche
  cluster_assignment <- assign_clusters(
    data_features,
    centroids_features
  )
  
  # Stocker le cluster et le jour
  data_day_processed$cluster <- cluster_assignment
  data_day_processed$day     <- current_day
  
  # Ajouter à all_assignments
  all_assignments <- bind_rows(all_assignments, data_day_processed)
}