In [9]:
library(dplyr)
library(tidyr)

thresholds <- c(5, 6, 7)
conditions <- c("DMSO", "Tazemetostat", "Valemetostat")
base_path <- "/sharedFolder/Results/Hotspot/Revision1/FULL/"

for (threshold in thresholds) {
  cat("▶ Processing threshold:", threshold, "\n")

  result_path <- file.path(base_path, as.character(threshold))
  db_path <- file.path(result_path, "Database_annotated.csv")
  rpm_path <- file.path(result_path, "RPMCOmparison", "FULLrpm_hgtgts.csv")

  if (!file.exists(db_path)) {
    cat("⚠️ Missing annotated DB:", db_path, "\n")
    next
  }

  db <- read.csv(db_path)
  has_rpm <- file.exists(rpm_path)
  if (has_rpm) {
    rpm <- read.csv(rpm_path, header = TRUE, check.names = FALSE)
  }

  for (cond in conditions) {
    cat("  ↪︎ Annotating", cond, "\n")

    sum_file <- file.path(result_path, paste0("Hotspot_", cond, "_thresh", threshold, "_SUMMARY.csv"))
    if (!file.exists(sum_file)) {
      cat("   ❌ File missing:", sum_file, "\n")
      next
    }

    df <- read.csv(sum_file)

    # Portalo in formato lungo
    df_long <- df %>%
      pivot_longer(cols = everything(), names_to = "HotspotCategory", values_to = "hotspotID") %>%
      filter(!is.na(hotspotID))

    # Unisci con il DB annotato
    annotated <- df_long %>%
      left_join(db, by = "hotspotID") %>%
      mutate(annotation = ifelse(is.na(annotation) | annotation == "" | annotation == "NULL",
                           paste0(Chromosome, ":", ChrStart, "-", ChrEnd),
                           annotation))


    # Aggiungi RPM solo se disponibili
    if (has_rpm) {
      # Nome esatto per WT e Idelalisib
      col_wt  <- paste0("AID-WT_", cond)
      col_ide <- paste0("MEC-1_Idelalisib_", cond)

      # KO: cerca con grep (perché ha suffisso numerico)
      ko_col_idx <- grep(paste0("MEC-1_AID-KO_", cond), colnames(rpm))

      annotated$RPM_WT <- if (col_wt %in% colnames(rpm)) {
        rpm[match(annotated$hotspotID, rpm[, 1]), col_wt]
      } else NA

      annotated$RPM_Idelalisib <- if (col_ide %in% colnames(rpm)) {
        rpm[match(annotated$hotspotID, rpm[, 1]), col_ide]
      } else NA

      annotated$RPM_KO <- if (length(ko_col_idx) == 1) {
        rpm[match(annotated$hotspotID, rpm[, 1]), ko_col_idx]
      } else NA
    } else {
      annotated$RPM_WT <- NA
      annotated$RPM_Idelalisib <- NA
      annotated$RPM_KO <- NA
    }

    # Seleziona solo le colonne richieste
    final_df <- annotated %>%
      select(annotation, HotspotCategory, RPM_WT, RPM_Idelalisib, RPM_KO)

    # Salva il file
    out_file <- file.path(result_path, paste0("HotspotSummary_", cond, "_thresh", threshold, "_ANNOTATED.csv"))
    write.csv(unique(final_df), out_file, row.names = FALSE)
  }
}

cat("✅ Done: tutti i file annotati con RPM e annotation corretta.\n")


▶ Processing threshold: 5 
  ↪︎ Annotating DMSO 
  ↪︎ Annotating Tazemetostat 
  ↪︎ Annotating Valemetostat 
▶ Processing threshold: 6 
  ↪︎ Annotating DMSO 
  ↪︎ Annotating Tazemetostat 
  ↪︎ Annotating Valemetostat 
▶ Processing threshold: 7 
  ↪︎ Annotating DMSO 
  ↪︎ Annotating Tazemetostat 
  ↪︎ Annotating Valemetostat 
✅ Done: tutti i file annotati con RPM e annotation corretta.


In [13]:
library(dplyr)
library(tidyr)

thresholds <- c(5, 6, 7)
comparisons <- c("DMSOvsTaze", "TazevsVale", "DMSOvsVale")
base_path <- "/sharedFolder/Results/Hotspot/Revision1/FULL/"

for (threshold in thresholds) {
  cat("▶ Processing threshold:", threshold, "\n")

  db_path <- file.path(base_path, as.character(threshold), "Database_annotated.csv")
  if (!file.exists(db_path)) {
    cat("⚠️ Missing DB:", db_path, "\n")
    next
  }

  db <- read.csv(db_path)

  for (comp in comparisons) {
    cat("  ↪︎ Annotating", comp, "\n")

    # Path giusto di input/output: dentro Idelalisib/<threshold>
    summary_dir <- file.path(base_path, "Idelalisib", as.character(threshold))
    summary_file <- file.path(summary_dir, paste0("Hotspot_", comp, "_thresh", threshold, "_SUMMARY.csv"))

    if (!file.exists(summary_file)) {
      cat("   ❌ File missing:", summary_file, "\n")
      next
    }

    df <- read.csv(summary_file)

    df_long <- df %>%
      pivot_longer(cols = everything(), names_to = "HotspotCategory", values_to = "hotspotID") %>%
      filter(!is.na(hotspotID))

    annotated <- df_long %>%
      left_join(db, by = "hotspotID") %>%
      mutate(annotation = ifelse(is.na(annotation) | annotation == "" | annotation == "NULL",
                                 paste0(Chromosome, ":", ChrStart, "-", ChrEnd),
                                 annotation))

    final_df <- annotated %>%
      select(annotation, HotspotCategory) %>%
      arrange(HotspotCategory)

    # Scrivi nello stesso path dove c'è il SUMMARY
    out_file <- file.path(summary_dir, paste0("HotspotSummary_", comp, "_thresh", threshold, "_ANNOTATED.csv"))
    write.csv(unique(final_df), out_file, row.names = FALSE)
  }
}

cat("✅ Fatto! Tutti i confronti sono stati annotati e salvati correttamente nella cartella Idelalisib.\n")


▶ Processing threshold: 5 
  ↪︎ Annotating DMSOvsTaze 
  ↪︎ Annotating TazevsVale 
  ↪︎ Annotating DMSOvsVale 
▶ Processing threshold: 6 
  ↪︎ Annotating DMSOvsTaze 
  ↪︎ Annotating TazevsVale 
  ↪︎ Annotating DMSOvsVale 
▶ Processing threshold: 7 
  ↪︎ Annotating DMSOvsTaze 
  ↪︎ Annotating TazevsVale 
  ↪︎ Annotating DMSOvsVale 
✅ Fatto! Tutti i confronti sono stati annotati e salvati correttamente nella cartella Idelalisib.


In [None]:
"/sharedFolder/Results/Hotspot/Revision1/FULL/Idelalisib/5/Hotspot_DMSOvsTaze_thresh5_SUMMARY.csv"