In [None]:
if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
BiocManager::install(c("TCGAbiolinks", "SummarizedExperiment", "dplyr"))

library(TCGAbiolinks)
library(SummarizedExperiment)
library(dplyr)

query <- GDCquery(
  project = "TCGA-THCA",
  data.category = "Transcriptome Profiling",
  data.type = "Gene Expression Quantification",
  workflow.type = "HTSeq - Counts"
)

GDCdownload(query)
se <- GDCprepare(query)   # SummarizedExperiment
se

In [None]:
table(colData(se)$shortLetterCode)
table(colData(se)$sample_type)

In [None]:
se_TN <- se[, colData(se)$sample_type %in% c("Primary Tumor", "Solid Tissue Normal")]
table(colData(se_TN)$sample_type)

In [None]:
clin <- GDCquery_clinic(project = "TCGA-THCA", type = "clinical")
head(clin)

In [None]:
sample_barcodes <- colnames(se_TN)
patient_id <- substr(sample_barcodes, 1, 12)

meta <- as.data.frame(colData(se_TN)) %>%
  mutate(patient_id = patient_id)

# Join clinique (selon la colonne patient/submitter id pr√©sente chez toi)
# Souvent c'est "submitter_id" ou "case_submitter_id" selon la table.
names(clin)

# Exemple si la colonne s'appelle submitter_id :
meta2 <- meta %>%
  left_join(clin, by = c("patient_id" = "submitter_id"))

In [None]:
meta_tumor <- meta2 %>%
  filter(sample_type == "Primary Tumor") %>%
  arrange(patient_id, sample_barcodes) %>%   # ordre stable
  distinct(patient_id, .keep_all = TRUE)

se_patient <- se_TN[, meta_tumor$barcode]   # selon le nom de la colonne barcode