In [None]:
# Library ----------------------------------------------------------------
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)

# Data
app_data <- readRDS("../../../data/qc2022/preparation_donnees/05_app_2022_clustered.rds")

In [None]:
# Tracking Each Day's Respondents -------------------------------------
app_data$date <- as.Date(app_data$date)
unique_dates <- sort(unique(app_data$date))
date_to_day_number <- data.frame(
  date = unique_dates,
  day  = seq_along(unique_dates)
)

app_data <- app_data %>%
  left_join(date_to_day_number, by = "date") %>%
  arrange(date)

all_assignments <- data.frame()

# Assuming your K-means centroids are already scaled
centroids <- as.data.frame(kmeans_result$centers)

for (current_day in date_to_day_number$day) {
  
  # Filter data up to the current day (cumulative approach)
  app_data <- app_data %>%
    filter(day <= current_day)
  
  if (nrow(app_data) == 0) next
  
  # Exclude vote_intent from distance calculations
  feature_cols <- setdiff(names(app_data), "vote_intent")
  
  # Align columns with the centroids
  feature_cols <- intersect(feature_cols, names(centroids))
  data_features <- app_data[, feature_cols, drop = FALSE]
  centroids_features <- centroids[, feature_cols, drop = FALSE]
  
  # Assign each respondent to the nearest centroid
  cluster_assignment <- assign_clusters(
    data_features,
    centroids_features
  )
  
  # Store cluster and day
  data_day_processed$cluster <- cluster_assignment
  data_day_processed$day     <- current_day
  
  # Combine with the overall results
  all_assignments <- bind_rows(all_assignments, data_day_processed)
}

In [None]:
write_rds(all_assignments, file = "data/qc2022/preparation_donnees/06_app2022_clustered_voteIntent.rds")