In [15]:
library(limma)

In [16]:
# Charger le pseudo-bulk
pb <- read.csv("pseudobulk_matrix.csv")

# Séparer l’expression et les métadonnées
expr <- pb[, !(colnames(pb) %in% c("class", "donor_id", "disease"))]
expr <- as.matrix(expr)

meta <- data.frame(
  class = pb$class,
  donor = pb$donor_id,
  disease = factor(pb$disease)
)
rownames(expr) <- paste(pb$class, pb$donor_id, pb$disease, sep="_")


In [17]:
# Liste des cell-types
celltypes <- unique(meta$class)

# Création d’un dossier de sortie
dir.create("DE_results", showWarnings = FALSE)

meta$class <- gsub("[/ ]+", "_", meta$class)

for (ct in celltypes) {
  
  cat("\n=== Cell type :", ct, "===\n")
  
  # Sélection des donors de ce cell type
  idx <- meta$class == ct
  expr_ct <- expr[idx, ]
  meta_ct <- meta[idx, ]
  
  print(table(meta_ct$disease))

  # Relevel pour que CTRL = reference
  meta_ct$disease <- factor(meta_ct$disease, levels = c("normal", "dementia || Alzheimer disease",
                                                       "dementia || Parkinson disease"))
  
  # Design matrix
  design <- model.matrix(~ 0 + disease, data=meta_ct)
  colnames(design) <- c("CTRL", "AD", "PD")
  
  cat("Design matrix OK\n")
  
  # modèle limma-trend
  fit <- lmFit(t(expr_ct), design)

  contrasts <- makeContrasts(
    AD_vs_CTRL = AD - CTRL,
    PD_vs_CTRL = PD - CTRL,
    AD_vs_PD   = AD - PD,
    levels = design
  )
  
  fit2 <- contrasts.fit(fit, contrasts)
  fit2 <- eBayes(fit2, trend = TRUE)
  
  # cell type nettoyé
  ct_clean <- gsub("[/ ]+", "_", ct)

# Export
  write.csv(topTable(fit2, coef="AD_vs_CTRL", number=Inf),
            paste0("DE_results/DE_", ct_clean, "_ADvsCTRL.csv"))
  
  write.csv(topTable(fit2, coef="PD_vs_CTRL", number=Inf),
            paste0("DE_results/DE_", ct_clean, "_PDvsCTRL.csv"))
  
  write.csv(topTable(fit2, coef="AD_vs_PD", number=Inf),
            paste0("DE_results/DE_", ct_clean, "_ADvsPD.csv"))
}


=== Cell type : Astro ===

dementia || Alzheimer disease dementia || Parkinson disease 
                           17                             3 
                       normal 
                            9 
Design matrix OK


“Zero sample variances detected, have been offset away from zero”



=== Cell type : EN ===

dementia || Alzheimer disease dementia || Parkinson disease 
                           17                             3 
                       normal 
                            9 
Design matrix OK


“Zero sample variances detected, have been offset away from zero”



=== Cell type : Endo ===

dementia || Alzheimer disease dementia || Parkinson disease 
                           17                             3 
                       normal 
                            9 
Design matrix OK


“Zero sample variances detected, have been offset away from zero”



=== Cell type : IN ===

dementia || Alzheimer disease dementia || Parkinson disease 
                           17                             3 
                       normal 
                            9 
Design matrix OK


“Zero sample variances detected, have been offset away from zero”



=== Cell type : Immune ===

dementia || Alzheimer disease dementia || Parkinson disease 
                           17                             3 
                       normal 
                            9 
Design matrix OK


“Zero sample variances detected, have been offset away from zero”



=== Cell type : Mural ===

dementia || Alzheimer disease dementia || Parkinson disease 
                           17                             3 
                       normal 
                            9 
Design matrix OK


“Zero sample variances detected, have been offset away from zero”



=== Cell type : OPC ===

dementia || Alzheimer disease dementia || Parkinson disease 
                           17                             3 
                       normal 
                            9 
Design matrix OK


“Zero sample variances detected, have been offset away from zero”



=== Cell type : Oligo ===

dementia || Alzheimer disease dementia || Parkinson disease 
                           17                             3 
                       normal 
                            9 
Design matrix OK


“Zero sample variances detected, have been offset away from zero”


- aucun donneur n’a plusieurs diseases
- pas de duplication artificielle
- cellules présentes seulement pour certaines conditions
- distribution logique (tous les donneurs n’ont pas toutes les cell types)

Certains gènes ont une variance extrêmement faible entre donors dans un cell type donné.

C’est normal en pseudo-bulk log-normalisé
- encore plus normal quand :
- un cell type a peu de donors (ex : 2–3 en PD)
- les valeurs sont des moyennes log-normalisées
- beaucoup de gènes sont presque non exprimés

In [18]:
head(expr_ct[,1:5])

Unnamed: 0,ENSG00000000003,ENSG00000000005,ENSG00000000419,ENSG00000000457,ENSG00000000460
Oligo_Donor_31_dementia || Alzheimer disease,0.006143345,0.0,0.3296928,0.2191126,0.2511945
Oligo_Donor_82_dementia || Alzheimer disease,0.0,0.003773585,0.1735849,0.1471698,0.1773585
Oligo_Donor_137_dementia || Alzheimer disease,0.010225564,0.0003007519,0.2192481,0.1669173,0.3389474
Oligo_Donor_228_dementia || Alzheimer disease,0.0,0.0,0.2577031,0.1596639,0.2156863
Oligo_Donor_310_dementia || Alzheimer disease,0.003427366,0.0005272871,0.285526,0.1832323,0.2399156
Oligo_Donor_333_dementia || Alzheimer disease,0.006134969,0.0003408316,0.2471029,0.2007498,0.2597137


In [19]:
head(meta)
head(expr)
table(meta$cell_type, meta$disease)
apply(expr, 1, sd)[1:20]  # 20 gènes au hasard

Unnamed: 0_level_0,class,donor,disease
Unnamed: 0_level_1,<chr>,<chr>,<fct>
1,Astro,Donor_31,dementia || Alzheimer disease
2,Astro,Donor_82,dementia || Alzheimer disease
3,Astro,Donor_137,dementia || Alzheimer disease
4,Astro,Donor_228,dementia || Alzheimer disease
5,Astro,Donor_310,dementia || Alzheimer disease
6,Astro,Donor_333,dementia || Alzheimer disease


Unnamed: 0,ENSG00000000003,ENSG00000000005,ENSG00000000419,ENSG00000000457,ENSG00000000460,ENSG00000000938,ENSG00000000971,ENSG00000001036,ENSG00000001084,ENSG00000001167,⋯,ENSG00000288600,ENSG00000288602,ENSG00000288605,ENSG00000288606,ENSG00000288611,ENSG00000288612,ENSG00000288622,ENSG00000288637,ENSG00000288642,ENSG00000288643
Astro_Donor_31_dementia || Alzheimer disease,0.16511628,0.01627907,0.5627907,0.4023256,0.3232558,0.002325581,0.009302326,0.14883721,0.8604651,0.1953488,⋯,0.0,0.22790697,0.08604651,0.01395349,0.0,0.04883721,0.009302326,0.006976744,0.03488372,0.03023256
Astro_Donor_82_dementia || Alzheimer disease,0.02547771,0.0,0.2675159,0.1464968,0.07643312,0.012738854,0.025477707,0.07006369,0.3503185,0.133758,⋯,0.0,0.03821656,0.01273885,0.0,0.0,0.01910828,0.0,0.0,0.06369427,0.01273885
Astro_Donor_137_dementia || Alzheimer disease,0.2875,0.0075,0.415,0.35,0.2125,0.0025,0.0075,0.115,1.1625,0.2,⋯,0.005,0.3125,0.0775,0.0125,0.0,0.05,0.01,0.005,0.0425,0.04
Astro_Donor_228_dementia || Alzheimer disease,0.11130742,0.001766784,0.4558304,0.3480565,0.21731448,0.003533569,0.001766784,0.09717315,1.1130742,0.1766784,⋯,0.001766784,0.21024735,0.04416961,0.01766785,0.001766784,0.05300353,0.005300353,0.008833922,0.01766785,0.03003534
Astro_Donor_310_dementia || Alzheimer disease,0.08391608,0.002331002,0.4825175,0.2937063,0.22144522,0.002331002,0.009324009,0.13752913,0.7249417,0.1934732,⋯,0.0,0.23543124,0.07459208,0.01165501,0.0,0.04195804,0.0,0.013986014,0.03962704,0.02797203
Astro_Donor_333_dementia || Alzheimer disease,0.2763819,0.030150754,0.3517588,0.2562814,0.25376883,0.002512563,0.012562814,0.0879397,0.5527638,0.1658292,⋯,0.0,0.17336683,0.03266332,0.01758794,0.002512563,0.04522613,0.002512563,0.007537689,0.09798995,0.040201


ERROR: Error in table(meta$cell_type, meta$disease): all arguments must have the same length


In [24]:
ct <- "Astro"

idx     <- meta$class == ct
expr_ct <- expr[idx, ]
meta_ct <- meta[idx, ]

meta_ct$disease <- factor(
  meta_ct$disease,
  levels = c("normal",
             "dementia || Alzheimer disease",
             "dementia || Parkinson disease")
)

design <- model.matrix(~ 0 + disease, data = meta_ct)
colnames(design) <- c("CTRL", "AD", "PD")

fit  <- lmFit(t(expr_ct), design)
contr <- makeContrasts(
  AD_vs_CTRL = AD - CTRL,
  PD_vs_CTRL = PD - CTRL,
  AD_vs_PD   = AD - PD,
  levels = design
)
fit2 <- eBayes(contrasts.fit(fit, contr), trend = TRUE)

tt_AD <- topTable(fit2, coef = "AD_vs_CTRL", number = Inf)
summary(tt_AD$P.Value)
min(tt_AD$P.Value)
head(tt_AD[order(tt_AD$P.Value), ], 10)

“Zero sample variances detected, have been offset away from zero”


     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
0.0001334 0.3606841 0.5795829 0.5708234 0.7952285 1.0000000 

Unnamed: 0_level_0,logFC,AveExpr,t,P.Value,adj.P.Val,B
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ENSG00000271321,-0.002075941,0.0008840268,-4.438037,0.0001333589,1,-0.04349535
ENSG00000279070,-0.002481059,0.0014541369,-4.243051,0.0002254249,1,-0.56203089
ENSG00000162267,-0.001905226,0.0010269403,-4.20927,0.00024682,1,-0.65140463
ENSG00000168748,0.007137643,0.0083543832,4.028837,0.0003999052,1,-1.12602219
ENSG00000170955,-0.001231912,0.0004381844,-3.59859,0.001243693,1,-2.23390554
ENSG00000230432,-0.003737465,0.0022404842,-3.472763,0.0017232587,1,-2.54986018
ENSG00000250043,-0.002977097,0.0012386606,-3.400491,0.0020753112,1,-2.72936988
ENSG00000276298,-0.001471687,0.0005159791,-3.3296,0.0024876965,1,-2.90395322
ENSG00000286399,-0.003779452,0.004130584,-3.318302,0.0025603331,1,-2.93163395
ENSG00000248174,-0.004116567,0.0035191872,-3.316034,0.0025751573,1,-2.93718524
