In [1]:
library("data.table")
set.seed(2023)

In [2]:
data.dir   = file.path("../Data/RNA/CREBBP/")
res.dir    = file.path("../Result/RNA/CREBBP/")

CREBBP.dat = readRDS(file.path(data.dir, "CREBBP.dat.rds"))
#all X have the same order of samples
#all DE genes or probes are unique and present in the corresponding X

# Adding W.hat to CREBBP.dat

In [3]:
W.hat = as.matrix(data.frame(fread(file.path(res.dir, "Cibersortx-W-hat", "CIBERSORTxGEP_Fractions.txt"), header = T), row.names = 1))
W.hat = W.hat[, 1:22]
# colapse to 4 celltypes
merge.class = colnames(fread(file.path(data.dir,"Cibersortx-fig3/Fig3b-f-LM4_merged_classes.txt")))
colnames(W.hat) = merge.class
W.hat = t(rowsum(t(W.hat), group = colnames(W.hat), na.rm = T))
# remove space in celltype names
colnames(W.hat) = gsub(" ", "", colnames(W.hat))
# reorder to be most abundant to least abundant
W.hat = W.hat[,order(-colMeans(W.hat))]
# reorder sample.ids
W.hat = W.hat[colnames(CREBBP.dat$X.genes),]

In [4]:
W.hat

Unnamed: 0,Bcells,Remaining,TcellsCD4,TcellsCD8
FL_1004,0.4184603,0.20032060,0.28977389,0.09144518
FL_1005,0.6138367,0.18113672,0.12287782,0.08214880
FL_1006,0.5260250,0.18142659,0.13871346,0.15383494
FL_1008,0.2632071,0.31874740,0.28268386,0.13536166
FL_1009,0.7442994,0.11681008,0.06220044,0.07669013
FL_1010,0.5455849,0.07774806,0.24935193,0.12731510
FL_1012,0.6347613,0.13470168,0.12674105,0.10379596
FL_1014,0.3782999,0.26462257,0.11354515,0.24353240
FL_1016,0.6531219,0.14504347,0.09764175,0.10419287
FL_1017,0.7372400,0.11315550,0.08649665,0.06310784


In [5]:
CREBBP.dat$W.hat = W.hat 

# construct expm.dat list with  (X,W,C1) pair

In [6]:
# logistic stuff to save cibsersortx need data
# key: character: used to identify the experiment set up
# expm: a list containing all necessary (W,X,C1) information
# ciberosrtx.dir: parent directory that the txt files of this experiment will be saved
dumpCibersortx = function(key, expm, cibersortx.dir){
  
  fp.src = file.path(cibersortx.dir, key, "src")
  fp.res = file.path(cibersortx.dir, key, "res")
  
  if (!file.exists(fp.src)){dir.create(fp.src, recursive = T)}
  if (!file.exists(fp.res)){dir.create(fp.res, recursive = T)}
  print(paste0("cibersortx data saved here: ",fp.src))
  print(paste0("cibersortx res dir: ",fp.res))
  
  #cibersortx data dump 
  fwrite(as.data.frame(expm$X),   
         file = file.path(fp.src,"X.txt"),  
         sep = "\t", quote=FALSE, row.names = T, col.names = T)
  
  fwrite(as.data.frame(expm$W),   
         file = file.path(fp.res,"W.txt"),  
         sep = "\t", quote=FALSE, row.names = T, col.names = T)
}


In [7]:
CREBBP.dat[["expm.dat"]] = list()


#configures: a list of configuration, each configuration is a combination of version, label and shuffle. 
#version: "genes" or "probes"
#label: boolean  # if set to T will read the mutation status and only run on those
#shuffle: boolean # if set to T will shuffle the label of mutation status, if not, go with the real status

configures = list(c("genes",  F, F))
#configures = list(c("genes",  F, F), c("genes",  T, F), c("genes",  T, T))

for (configure in configures){
  version = configure[1]
  label   = configure[2]
  shuffle = configure[3]
  key = paste0(version,  if (label)".label" else "",  if (shuffle) ".shuffle" else "")
  print(paste0("working on: ",key))
  
  
  expm = list()
  #subsetting X.genes to contain only gene set of interest
  if (version == "genes"){
    feature.ids = c(CREBBP.dat$inc.genes,  CREBBP.dat$dec.genes)
    expm$X = CREBBP.dat$X.genes[feature.ids,]
  }else{
    feature.ids = c(CREBBP.dat$inc.probes, CREBBP.dat$dec.probes)
    expm$X = CREBBP.dat$X.probes[feature.ids,]
  }
  
  if (label){
    sample.ids = c(CREBBP.dat$samples.mt, CREBBP.dat$samples.wt)
    expm$W  = CREBBP.dat$W.hat[sample.ids, ]
    expm$X  = expm$X[,sample.ids]
    expm$C1 = as.matrix(c(rep(1, length(CREBBP.dat$samples.mt)), rep(0, length(CREBBP.dat$samples.wt))))
    colnames(expm$C1) = "Mutation"
    rownames(expm$C1) = sample.ids
    
    if (shuffle){
      expm$C1 = expm$C1[sample(nrow(expm$C1)),,drop = F]
      rownames(expm$C1) = sample.ids
    }
  }else{
    sample.ids = colnames(expm$X)
    expm$W  = CREBBP.dat$W.hat[sample.ids, ]
    expm$C1 = NULL
  }
  
  CREBBP.dat[["expm.dat"]][[key]] = expm
  dumpCibersortx(key, expm, file.path(res.dir,"Cibersortx"))
}

[1] "working on: genes"
[1] "cibersortx data saved here: ../Result/RNA/CREBBP//Cibersortx/genes/src"
[1] "cibersortx res dir: ../Result/RNA/CREBBP//Cibersortx/genes/res"


# save the object 

In [8]:
saveRDS(CREBBP.dat, file.path(data.dir, "CREBBP.dat.rds"))