In [78]:
library(limma)
library(DESeq2)


In [79]:
# Charger le pseudo-bulk : 22sec
pb <- read.csv("pseudobulk_matrix.csv")


In [80]:
head(pb)

Unnamed: 0_level_0,class,donor_id,disease,ENSG00000000003,ENSG00000000005,ENSG00000000419,ENSG00000000457,ENSG00000000460,ENSG00000000938,ENSG00000000971,⋯,ENSG00000288600,ENSG00000288602,ENSG00000288605,ENSG00000288606,ENSG00000288611,ENSG00000288612,ENSG00000288622,ENSG00000288637,ENSG00000288642,ENSG00000288643
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Astro,Donor_31,dementia || Alzheimer disease,71,7,242,173,139,1,4,⋯,0,98,37,6,0,21,4,3,15,13
2,Astro,Donor_228,dementia || Alzheimer disease,63,1,258,197,123,2,1,⋯,1,119,25,10,1,30,3,5,10,17
3,Astro,Donor_333,dementia || Alzheimer disease,110,12,140,102,101,1,5,⋯,0,69,13,7,1,18,1,3,39,16
4,Astro,Donor_545,dementia || Parkinson disease,19,3,154,112,69,1,1,⋯,0,91,21,7,1,19,2,6,13,18
5,Astro,Donor_609,normal,60,6,245,156,106,2,8,⋯,0,121,13,12,1,27,0,3,31,21
6,Astro,Donor_614,dementia || Alzheimer disease,30,4,107,73,56,2,0,⋯,1,70,5,8,0,10,0,0,8,5


In [81]:

# Séparer l’expression et les métadonnées
counts <- pb[, !(colnames(pb) %in% c("class", "donor_id", "disease"))]
counts <- as.matrix(counts)
# rownames(expr) <- paste(pb$class, pb$donor_id, pb$disease, sep="_")

counts

meta <- data.frame(
  class = pb$class,
  donor = pb$donor_id,
  disease = factor(pb$disease)
)


ENSG00000000003,ENSG00000000005,ENSG00000000419,ENSG00000000457,ENSG00000000460,ENSG00000000938,ENSG00000000971,ENSG00000001036,ENSG00000001084,ENSG00000001167,⋯,ENSG00000288600,ENSG00000288602,ENSG00000288605,ENSG00000288606,ENSG00000288611,ENSG00000288612,ENSG00000288622,ENSG00000288637,ENSG00000288642,ENSG00000288643
71,7,242,173,139,1,4,64,370,84,⋯,0,98,37,6,0,21,4,3,15,13
63,1,258,197,123,2,1,55,630,100,⋯,1,119,25,10,1,30,3,5,10,17
110,12,140,102,101,1,5,35,220,66,⋯,0,69,13,7,1,18,1,3,39,16
19,3,154,112,69,1,1,30,275,69,⋯,0,91,21,7,1,19,2,6,13,18
60,6,245,156,106,2,8,40,404,150,⋯,0,121,13,12,1,27,0,3,31,21
30,4,107,73,56,2,0,30,307,45,⋯,1,70,5,8,0,10,0,0,8,5
104,2,229,146,144,4,9,77,530,105,⋯,0,129,21,4,0,26,0,3,14,9
131,3,257,183,65,4,11,40,493,110,⋯,1,109,29,13,1,45,1,5,6,11
52,3,58,29,36,1,1,10,85,15,⋯,0,30,4,0,0,9,0,1,2,9
54,2,76,54,66,3,1,25,212,33,⋯,0,55,7,5,0,11,2,1,14,2


In [82]:
library(DESeq2)

# Boucle par cell type
for (ct in unique(meta$class)) {
  
  cat("\n=== Cell type :", ct, "===\n")
  
  idx <- meta$class == ct
  cts <- counts[idx, ]
  cts <- t(cts)  # DESeq2 = gènes × samples
  
  pheno <- meta[idx, ]
  
  # Vérifier au moins 2 conditions
  if (length(unique(pheno$disease)) < 2) {
    cat("Skip :", ct, "(not enough conditions)\n")
    next
  }
  
  # Création DESeq2
  dds <- DESeqDataSetFromMatrix(
    countData = cts,
    colData = pheno,
    design = ~ disease
  )
  
  dds <- DESeq(dds)
  
  ### --- 1) AD vs CTRL ---
  if (all(c("normal", "dementia || Alzheimer disease") %in% pheno$disease)) {
    
    res_ADvsCTRL <- results(
      dds,
      contrast = c("disease",
                   "dementia || Alzheimer disease",
                   "normal")
    )
    
    fname <- paste0("DE_results/DE_", gsub("/", "_", ct), "_ADvsCTRL.csv")
    write.csv(as.data.frame(res_ADvsCTRL), fname)
    cat("Saved:", fname, "\n")
  }
  
  ### --- 2) PD vs CTRL ---
  if (all(c("normal", "dementia || Parkinson disease") %in% pheno$disease)) {
    
    res_PDvsCTRL <- results(
      dds,
      contrast = c("disease",
                   "dementia || Parkinson disease",
                   "normal")
    )
    
    fname <- paste0("DE_results/DE_", gsub("/", "_", ct), "_PDvsCTRL.csv")
    write.csv(as.data.frame(res_PDvsCTRL), fname)
    cat("Saved:", fname, "\n")
  }
  
  ### --- 3) AD vs PD ---
  if (all(c("dementia || Alzheimer disease", 
            "dementia || Parkinson disease") %in% pheno$disease)) {
    
    res_ADvsPD <- results(
      dds,
      contrast = c("disease",
                   "dementia || Alzheimer disease",
                   "dementia || Parkinson disease")
    )
    
    fname <- paste0("DE_results/DE_", gsub("/", "_", ct), "_ADvsPD.csv")
    write.csv(as.data.frame(res_ADvsPD), fname)
    cat("Saved:", fname, "\n")
  }
}


=== Cell type : Astro ===


converting counts to integer mode

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating size factors

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

final dispersion estimates

fitting model and testing

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is rec

Saved: DE_results/DE_Astro_ADvsCTRL.csv 
Saved: DE_results/DE_Astro_PDvsCTRL.csv 
Saved: DE_results/DE_Astro_ADvsPD.csv 

=== Cell type : EN ===


converting counts to integer mode

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating size factors

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

final dispersion estimates

fitting model and testing

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is rec

Saved: DE_results/DE_EN_ADvsCTRL.csv 
Saved: DE_results/DE_EN_PDvsCTRL.csv 
Saved: DE_results/DE_EN_ADvsPD.csv 

=== Cell type : Endo ===


converting counts to integer mode

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating size factors

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

final dispersion estimates

fitting model and testing

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is rec

Saved: DE_results/DE_Endo_ADvsCTRL.csv 
Saved: DE_results/DE_Endo_PDvsCTRL.csv 
Saved: DE_results/DE_Endo_ADvsPD.csv 

=== Cell type : IN ===


converting counts to integer mode

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating size factors

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

final dispersion estimates

fitting model and testing

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is rec

Saved: DE_results/DE_IN_ADvsCTRL.csv 
Saved: DE_results/DE_IN_PDvsCTRL.csv 
Saved: DE_results/DE_IN_ADvsPD.csv 

=== Cell type : Immune ===


converting counts to integer mode

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating size factors

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

final dispersion estimates

fitting model and testing

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is rec

Saved: DE_results/DE_Immune_ADvsCTRL.csv 
Saved: DE_results/DE_Immune_PDvsCTRL.csv 
Saved: DE_results/DE_Immune_ADvsPD.csv 

=== Cell type : Mural ===


converting counts to integer mode

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating size factors

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

final dispersion estimates

fitting model and testing

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is rec

Saved: DE_results/DE_Mural_ADvsCTRL.csv 
Saved: DE_results/DE_Mural_PDvsCTRL.csv 
Saved: DE_results/DE_Mural_ADvsPD.csv 

=== Cell type : OPC ===


converting counts to integer mode

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating size factors

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

final dispersion estimates

fitting model and testing

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is rec

Saved: DE_results/DE_OPC_ADvsCTRL.csv 
Saved: DE_results/DE_OPC_PDvsCTRL.csv 
Saved: DE_results/DE_OPC_ADvsPD.csv 

=== Cell type : Oligo ===


converting counts to integer mode

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating size factors

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

final dispersion estimates

fitting model and testing

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is rec

Saved: DE_results/DE_Oligo_ADvsCTRL.csv 
Saved: DE_results/DE_Oligo_PDvsCTRL.csv 
Saved: DE_results/DE_Oligo_ADvsPD.csv 


In [86]:
df <- read.csv("DE_results/DE_Mural_ADvsPD.csv", row.names = 1)

In [87]:
df

Unnamed: 0_level_0,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>
ENSG00000000003,3.177798e+00,-0.46943648,0.9098093,-0.51597240,0.60587369,1
ENSG00000000005,2.328220e-01,0.35517483,2.4674346,0.14394498,0.88554392,1
ENSG00000000419,9.099047e+00,1.15441023,0.5862653,1.96909186,0.04894254,1
ENSG00000000457,6.347491e+00,0.38035269,0.6901966,0.55107873,0.58157971,1
ENSG00000000460,4.644989e+00,0.16541580,0.7395579,0.22366848,0.82301528,1
ENSG00000000938,1.807082e-01,-0.59607129,4.2465684,-0.14036540,,
ENSG00000000971,1.059941e+02,0.64767212,0.4858978,1.33293896,0.18255183,1
ENSG00000001036,3.798310e+00,1.24923938,0.9748424,1.28147824,0.20002574,1
ENSG00000001084,1.267879e+01,0.41003886,0.5116317,0.80143366,0.42288064,1
ENSG00000001167,5.972472e+00,-0.58320249,0.5713867,-1.02067913,0.30740648,1


In [None]:
dim(counts)

- aucun donneur n’a plusieurs diseases
- pas de duplication artificielle
- cellules présentes seulement pour certaines conditions
- distribution logique (tous les donneurs n’ont pas toutes les cell types)

Certains gènes ont une variance extrêmement faible entre donors dans un cell type donné.

C’est normal en pseudo-bulk log-normalisé
- encore plus normal quand :
- un cell type a peu de donors (ex : 2–3 en PD)
- les valeurs sont des moyennes log-normalisées
- beaucoup de gènes sont presque non exprimés

In [None]:
head(meta)
head(expr)
table(meta$class, meta$disease)
apply(expr, 1, sd)[1:20]  # 20 gènes au hasard

Unnamed: 0_level_0,class,donor,disease
Unnamed: 0_level_1,<chr>,<chr>,<fct>
1,Astro,Donor_31,dementia || Alzheimer disease
2,Astro,Donor_31,dementia || Parkinson disease
3,Astro,Donor_31,normal
4,Astro,Donor_228,dementia || Alzheimer disease
5,Astro,Donor_228,dementia || Parkinson disease
6,Astro,Donor_228,normal


Unnamed: 0,ENSG00000000003,ENSG00000000005,ENSG00000000419,ENSG00000000457,ENSG00000000460,ENSG00000000938,ENSG00000000971,ENSG00000001036,ENSG00000001084,ENSG00000001167,⋯,ENSG00000288600,ENSG00000288602,ENSG00000288605,ENSG00000288606,ENSG00000288611,ENSG00000288612,ENSG00000288622,ENSG00000288637,ENSG00000288642,ENSG00000288643
Astro_Donor_31_dementia || Alzheimer disease,71,7,242,173,139,1,4,64,370,84,⋯,0,98,37,6,0,21,4,3,15,13
Astro_Donor_31_dementia || Parkinson disease,0,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
Astro_Donor_31_normal,0,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
Astro_Donor_228_dementia || Alzheimer disease,63,1,258,197,123,2,1,55,630,100,⋯,1,119,25,10,1,30,3,5,10,17
Astro_Donor_228_dementia || Parkinson disease,0,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0
Astro_Donor_228_normal,0,0,0,0,0,0,0,0,0,0,⋯,0,0,0,0,0,0,0,0,0,0


        
         dementia || Alzheimer disease dementia || Parkinson disease normal
  Astro                             17                            17     17
  EN                                17                            17     17
  Endo                              17                            17     17
  Immune                            17                            17     17
  IN                                17                            17     17
  Mural                             17                            17     17
  Oligo                             17                            17     17
  OPC                               17                            17     17

In [None]:
expr_ct.var

ERROR: Error: object 'expr_ct.var' not found
