In [2]:
library("MIND")
library("TCA")
source("TCAx.r")
source("analysis.utils.r")

set.seed(2023)

In [3]:
#feature.set = "hvf.10k"
feature.set = "random.10k"

In [4]:
data.dir = paste0("../Data//Methylation/Purified-Reinius/", feature.set)
res.dir  = paste0("../Result//Methylation/Purified-Reinius/", feature.set)

if (!file.exists(res.dir)){dir.create(file.path(res.dir),recursive = T)}
print(data.dir)
print(res.dir)

[1] "../Data//Methylation/Purified-Reinius/hvf.10k"
[1] "../Result//Methylation/Purified-Reinius/hvf.10k"


# Load data

In [5]:
hannum  = readRDS(file.path(data.dir, paste0("hannum.", feature.set, ".rds")))
reinius = readRDS(file.path(data.dir, paste0("reinius.", feature.set, ".rds")))

In [6]:
str(hannum)

List of 4
 $ X       : num [1:10000, 1:426] 0.899 0.037 0.8897 0.0352 0.7929 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:10000] "cg12426467" "cg02113055" "cg00079898" "cg04131969" ...
  .. ..$ : chr [1:426] "GSM989827" "GSM989828" "GSM989829" "GSM989830" ...
 $ cov     : chr [1:426, 1:4] "47" "69" "46" "44" ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:426] "GSM989827" "GSM989828" "GSM989829" "GSM989830" ...
  .. ..$ : chr [1:4] "age" "gender" "plate" "ethnicity"
 $ W       : num [1:426, 1:6] 0.771 0.855 0.793 0.828 0.727 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:426] "GSM989827" "GSM989828" "GSM989829" "GSM989830" ...
  .. ..$ : chr [1:6] "Gran" "CD8T" "CD4T" "Mono" ...
 $ ctrl_pcs: num [1:426, 1:20] -62.9 -63.1 -64.7 -83.2 -64.4 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:426] "GSM989827" "GSM989828" "GSM989829" "GSM989830" ...
  .. ..$ : chr [1:20] "PC1" "PC2" "PC3" "PC4" ...


In [7]:
str(reinius)

List of 5
 $ X     : num [1:10000, 1:6] 0.9149 0.0436 0.8372 0.8827 0.1134 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:10000] "cg12426467" "cg02113055" "cg00079898" "cg04131969" ...
  .. ..$ : chr [1:6] "sample.1" "sample.2" "sample.3" "sample.4" ...
 $ cov   : chr [1:6, 1] "Male" "Male" "Male" "Male" ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:6] "GSM861635" "GSM861636" "GSM861637" "GSM861638" ...
  .. ..$ : chr "gender"
 $ W     : num [1:6, 1:6] 0.532 0.59 0.665 0.617 0.775 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:6] "sample.1" "sample.2" "sample.3" "sample.4" ...
  .. ..$ : chr [1:6] "Gran" "CD8T" "CD4T" "Mono" ...
 $ Z     : num [1:6, 1:10000, 1:6] 0.912 0.922 0.919 0.923 0.919 ...
  ..- attr(*, "dimnames")=List of 3
  .. ..$ : chr [1:6] "Gran" "CD8T" "CD4T" "Mono" ...
  .. ..$ : chr [1:10000] "cg12426467" "cg02113055" "cg00079898" "cg04131969" ...
  .. ..$ : chr [1:6] "sample.1" "sample.2" "sample.3" "sample.4" ...
 $ params:List of 4
 

# TCAx

In [8]:
#algorithm
mean_penalty  = 0
var_penalty   = 0.01
covar_penalty = 0.01

max_stds = 2
#max_stds = 3

mean_max_iterations = 2 
var_max_iterations = 3
nloptr_opts_algorithm = "NLOPT_LN_COBYLA"

pen.config = paste("mp", mean_penalty, "vp", var_penalty, "cp", covar_penalty, "maxStds", max_stds, sep =  "_")
print(pen.config)

[1] "mp_0_vp_0.01_cp_0.01_maxStds_2"


In [9]:
tcax.mdl = list()

tcax.mdl$params.hat <- TCAx(hannum$X, hannum$W, C1 = NULL, C2 = NULL, 
                            mean_max_iterations = mean_max_iterations, var_max_iterations = var_max_iterations,
                            mean_penalty = mean_penalty, var_penalty = var_penalty, covar_penalty = covar_penalty, 
                            max_u = 1, max_v =1, init_weight = "default", max_stds = max_stds,
                            fit_tau = FALSE, nloptr_opts_algorithm = nloptr_opts_algorithm, config_file = NULL,
                            parallel = TRUE, num_cores = NULL, 
                            log_file = file.path(res.dir, paste0("TCAx.", pen.config, ".log")))

#capping 
tcax.mdl$params.hat$sigmas_hat  = cap_values(tcax.mdl$params.hat$sigmas_hat,  max.val = 10 ** (4), min.val = 10**(-4))

#tensor 
tcax.mdl$Z.hat = tensor(X = reinius$X, W = reinius$W, C1 = NULL, C2 = NULL, 
                        tcax.mdl$params.hat, parallel = FALSE)


#save
saveRDS(tcax.mdl,  file.path(res.dir, paste0("tcax.mdl.", pen.config, ".rds")))

INFO [2023-07-23 20:08:51] Starting tcax...
INFO [2023-07-23 20:08:52] Starting parameter learning ...
INFO [2023-07-23 20:08:52] Initiate cluster...
INFO [2023-07-23 20:08:53] Parallel is on with 35 nodes.
INFO [2023-07-23 20:08:55] Packages were loaded into the cluster nodes.
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=06m 11s
INFO [2023-07-23 20:15:08] Stop cluster
INFO [2023-07-23 20:15:09] Formating results ...
INFO [2023-07-23 20:15:10] Finished parameter learning
[1] "there are extrmemely close to 0 values: 90"
INFO [2023-07-23 20:15:10] Starting tensor ...
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=03s  
INFO [2023-07-23 20:15:13] Formating tensor result...
INFO [2023-07-23 20:15:13] Finished tensor estimation


In [10]:
str(tcax.mdl)

List of 2
 $ params.hat:List of 14
  ..$ W              : num [1:426, 1:6] 0.771 0.855 0.793 0.828 0.727 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:426] "GSM989827" "GSM989828" "GSM989829" "GSM989830" ...
  .. .. ..$ : chr [1:6] "Gran" "CD8T" "CD4T" "Mono" ...
  ..$ C1             : num[1:426, 0 ] 
  ..$ C2             : num[1:426, 0 ] 
  ..$ mus_hat        : num [1:10000, 1:6] 58.31 1.68 39.41 1.78 1.74 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:10000] "cg12426467" "cg02113055" "cg00079898" "cg04131969" ...
  .. .. ..$ : chr [1:6] "Gran" "CD8T" "CD4T" "Mono" ...
  ..$ gammas_hat     : num[1:10000, 0 ] 
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:10000] "cg12426467" "cg02113055" "cg00079898" "cg04131969" ...
  .. .. ..$ : NULL
  ..$ betas_hat      : num[1:10000, 0 ] 
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:10000] "cg12426467" "cg02113055" "cg00079898" "cg04131969" ...
  .. .. ..$ : NULL
  ..$ Ls_hat         :

# TCA

In [11]:
tca.mdl = list()
tca.mdl$params.hat <- tca(hannum$X, hannum$W,  
                          constrain_mu = TRUE, log_fil=file.path(res.dir, paste0("TCA.log")))

#capping 
tca.mdl$params.hat$sigmas_hat = cap_values(tca.mdl$params.hat$sigmas_hat, 
                                           max.val = 10**(4), min.val = 10**(-4))

#mimic the structure to allow TCA to treat reinius as if trained on it
tca.mdl.params.hat    = copy(tca.mdl$params.hat)
tca.mdl.params.hat$W  = reinius$W
tca.mdl.params.hat$C1 = matrix(0, ncol(reinius$X), 0)
tca.mdl.params.hat$C2 = matrix(0, ncol(reinius$X), 0)

#tensor
tca.mdl$Z.hat = TCA::tensor(X = reinius$X, tca.mdl.params.hat, log_fil=file.path(res.dir, paste0("TCA.log")))
tca.mdl$Z.hat = list_2_array(tca.mdl$Z.hat, colnames(reinius$W))  

#save
saveRDS(tca.mdl,  file.path(res.dir, paste0("tca.mdl.rds")))

INFO [2023-07-23 20:15:16] Starting tca...
INFO [2023-07-23 20:15:16] Validating input...
INFO [2023-07-23 20:15:16] Fitting the TCA model...
INFO [2023-07-23 20:15:16] Fitting means and variances...
INFO [2023-07-23 20:15:16] Iteration 1 out of 10 internal iterations...
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100%
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100%
INFO [2023-07-23 20:15:21] Iteration 2 out of 10 internal iterations...
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100%
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100%
INFO [2023-07-23 20:15:28] Internal loop converged.
INFO [2023-07-23 20:15:28] Finished tca.
INFO [2023-07-23 20:15:28] Validating input...
INFO [2023-07-23 20:15:28] Starting tensor for estimating Z...
INFO [2023-07-23 20:15:28] Estimate tensor...
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100%
INFO [2023-07-23 20:15:30] Finished estimating tensor.


In [12]:
str(tca.mdl)

List of 2
 $ params.hat:List of 11
  ..$ W                     : num [1:426, 1:6] 0.771 0.855 0.793 0.828 0.727 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:426] "GSM989827" "GSM989828" "GSM989829" "GSM989830" ...
  .. .. ..$ : chr [1:6] "Gran" "CD8T" "CD4T" "Mono" ...
  ..$ mus_hat               : num [1:10000, 1:6] 0.924 0.492 0.909 0.524 0.529 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:10000] "cg12426467" "cg02113055" "cg00079898" "cg04131969" ...
  .. .. ..$ : chr [1:6] "Gran" "CD8T" "CD4T" "Mono" ...
  ..$ sigmas_hat            : num [1:10000, 1:6] 0.01 0.305 0.01 0.1 0.215 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:10000] "cg12426467" "cg02113055" "cg00079898" "cg04131969" ...
  .. .. ..$ : chr [1:6] "Gran" "CD8T" "CD4T" "Mono" ...
  ..$ tau_hat               : Named num 0.0397
  .. ..- attr(*, "names")= chr ""
  ..$ deltas_hat            : num[1:10000, 0 ] 
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:10

# Baseline

In [13]:
base.mdl = list()

base.mdl$Z.hat = copy(tca.mdl$Z.hat) 
base.mdl$Z.hat[,,] = 0

print("constructing simple Z by distribute X by W")
for (h in 1:dim(base.mdl$Z.hat)[1]){
    base.mdl$Z.hat[h,,] = reinius$X * repmat(t(as.matrix(reinius$W[,h])), dim(base.mdl$Z.hat)[2], 1)
} 

saveRDS(base.mdl,  file.path(res.dir, paste0("base.mdl.rds")))

[1] "constructing simple Z by distribute X by W"


In [14]:
str(base.mdl)

List of 1
 $ Z.hat: num [1:6, 1:10000, 1:6] 0.4866 0.1282 0.091 0.0657 0.1174 ...
  ..- attr(*, "dimnames")=List of 3
  .. ..$ : chr [1:6] "Gran" "CD8T" "CD4T" "Mono" ...
  .. ..$ : chr [1:10000] "cg12426467" "cg02113055" "cg00079898" "cg04131969" ...
  .. ..$ : chr [1:6] "sample.1" "sample.2" "sample.3" "sample.4" ...


# bMIND

In [20]:
# concat 
bMIND.X = cbind(reinius$X, hannum$X) 
bMIND.W = rbind(reinius$W, hannum$W) 

# remove . in sample name 
colnames(bMIND.X) = paste0("sample", 1:ncol(bMIND.X))
rownames(bMIND.W) = paste0("sample", 1:nrow(bMIND.W))


b = bMIND(bMIND.X, frac = bMIND.W)

In [21]:
bMIND.X

Unnamed: 0,sample1,sample2,sample3,sample4,sample5,sample6,sample7,sample8,sample9,sample10,⋯,sample423,sample424,sample425,sample426,sample427,sample428,sample429,sample430,sample431,sample432
cg12426467,0.91488190,0.91502700,0.298458800,0.92545490,0.47111070,0.92490330,0.89903180,0.92590960,0.91826220,0.89542250,⋯,0.91677660,0.94909290,0.953203400,0.9441383,0.93431760,0.942943700,0.91122790,0.91990670,0.94029550,0.94140290
cg02113055,0.04359573,0.86727660,0.481182000,0.88701470,0.04739124,0.90216260,0.03696226,0.88626630,0.87836520,0.86849750,⋯,0.85667130,0.55613170,0.056367730,0.1602883,0.91131790,0.512325600,0.06927244,0.51098900,0.55314420,0.51264430
cg00079898,0.83720980,0.16572860,0.884827900,0.91761760,0.90303340,0.55037850,0.88965650,0.87792100,0.87355200,0.88947420,⋯,0.91399400,0.93064080,0.917025100,0.9474120,0.89777700,0.922170500,0.93349360,0.91266410,0.93180790,0.97369900
cg04131969,0.88267420,0.44322350,0.042791470,0.48802330,0.03919326,0.86526810,0.03517555,0.89573970,0.03912235,0.88520320,⋯,0.90145870,0.44297980,0.493683000,0.8712109,0.03813352,0.502080700,0.49681480,0.02858034,0.02005606,0.92677640
cg06193597,0.11335140,0.13321190,0.777912300,0.76927150,0.77576500,0.14171530,0.79285760,0.76399860,0.77512050,0.91288980,⋯,0.10826240,0.76009680,0.726922300,0.4375131,0.08330818,0.748719100,0.75239490,0.75578320,0.77504210,0.70793310
cg00944631,0.78169180,0.75133910,0.702896300,0.03279774,0.73345080,0.10013650,0.77236650,0.06270020,0.75015940,0.01761136,⋯,0.06192281,0.71917420,0.745676500,0.7872897,0.76998660,0.059989780,0.75329230,0.00000000,0.76322610,0.00000000
cg08052546,0.84931640,0.77197590,0.853178400,0.03577627,0.05302563,0.01409008,0.84367690,0.79821700,0.82636360,0.83914550,⋯,0.86342230,0.73099770,0.856025200,0.6954082,0.76166310,0.853151700,0.88345410,0.85488090,0.85722270,0.78840330
cg02872767,0.11741680,0.90533890,0.903081800,0.87765290,0.88003180,0.18805060,0.22498280,0.88494290,0.86235790,0.84168260,⋯,0.89437470,0.84773710,0.386308800,0.8809212,0.91423540,0.882175700,0.84962400,0.78137170,0.91984800,0.91630710
cg06064954,0.86447530,0.23592820,0.952818100,0.87522290,0.90577230,0.91595180,0.85868460,0.91547390,0.94021880,0.90071860,⋯,0.97464050,0.97107100,0.953414000,0.9427025,0.98371890,0.984435600,0.97103300,0.95863040,0.91690840,0.93205370
cg04245305,0.98974800,0.97138780,0.902184400,0.35239270,0.97728060,0.98217180,0.89680080,0.98446580,0.89322220,0.97481480,⋯,0.88997870,0.99542370,0.985056500,0.7053242,0.97959150,0.924927900,0.96354370,0.98571630,0.89370600,0.99481370


In [22]:
bMIND.W 

Unnamed: 0,Gran,CD8T,CD4T,Mono,NK,B
sample1,0.5319253,0.14015393,0.099474009,0.07182887,0.128356633,0.0282612647
sample2,0.5895850,0.01399476,0.158510720,0.11339855,0.071217282,0.0532936886
sample3,0.6652398,0.11893223,0.106851067,0.06025880,0.021137221,0.0275808637
sample4,0.6165684,0.07321796,0.148882774,0.04857812,0.033879878,0.0788728976
sample5,0.7747228,0.04804910,0.082910108,0.06724337,0.000000000,0.0270746732
sample6,0.6511645,0.09518408,0.121815329,0.05959220,0.031145553,0.0410982946
sample7,0.7708958,0.01788965,0.148451813,0.04867780,0.000000000,0.0140849484
sample8,0.8549506,0.02464364,0.037986268,0.08158997,0.000000000,0.0008295391
sample9,0.7927175,0.02192523,0.114461255,0.03476014,0.007251677,0.0288841654
sample10,0.8275471,0.03376357,0.035015440,0.06949500,0.027238847,0.0069400686


In [28]:
#format result
bMIND.mdl = list()

#params at original scale
bMIND.mdl$params.hat.orig = list(mus_hat = b$mu,
                                 sigmas_hat = b$Sigma_c)

#tensor at original scale
bMIND.mdl$Z.hat = array(0, c(ncol(reinius$W), nrow(reinius$X), ncol(reinius$X))) #source by feature by sample 
for(h in 1:ncol(reinius$W)){
    bMIND.mdl$Z.hat[h,,] = b$A[, h, paste0("sample", 1:ncol(reinius$X))]
}
dimnames(bMIND.mdl$Z.hat)[[1]] =  colnames(reinius$W)
dimnames(bMIND.mdl$Z.hat)[[2]] =  rownames(reinius$X)
dimnames(bMIND.mdl$Z.hat)[[3]] =  paste0("sample.", 1:ncol(reinius$X))

In [29]:
str(bMIND.mdl)

List of 2
 $ params.hat.orig:List of 2
  ..$ mus_hat   : num [1:10000, 1:6] 0.91 0.505 0.913 0.53 0.545 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:10000] "cg12426467" "cg02113055" "cg00079898" "cg04131969" ...
  .. .. ..$ : chr [1:6] "Gran" "CD8T" "CD4T" "Mono" ...
  ..$ sigmas_hat: num [1:10000, 1:6, 1:6] 0.0167 0.1509 0.00402 0.16347 0.18528 ...
  .. ..- attr(*, "dimnames")=List of 3
  .. .. ..$ : chr [1:10000] "cg12426467" "cg02113055" "cg00079898" "cg04131969" ...
  .. .. ..$ : chr [1:6] "Gran" "CD8T" "CD4T" "Mono" ...
  .. .. ..$ : chr [1:6] "Gran" "CD8T" "CD4T" "Mono" ...
 $ Z.hat          : num [1:6, 1:10000, 1:6] 0.89 0.977 0.86 0.747 0.999 ...
  ..- attr(*, "dimnames")=List of 3
  .. ..$ : chr [1:6] "Gran" "CD8T" "CD4T" "Mono" ...
  .. ..$ : chr [1:10000] "cg12426467" "cg02113055" "cg00079898" "cg04131969" ...
  .. ..$ : chr [1:6] "sample.1" "sample.2" "sample.3" "sample.4" ...


In [30]:
bMIND.mdl$Z.hat[1,,]

Unnamed: 0,sample.1,sample.2,sample.3,sample.4,sample.5,sample.6
cg12426467,0.88967980,0.919985469,0.15466892,0.92756059,0.42266602,0.92813567
cg02113055,0.00000000,0.957056926,0.42599600,0.89495261,0.00000000,0.94388923
cg00079898,0.88492681,0.792385650,0.89425581,0.91725800,0.91284599,0.69894204
cg04131969,0.99918120,0.433009129,0.04132583,0.52567192,0.02100926,0.96750653
cg06193597,0.02156051,0.000000000,0.79296766,0.78873618,0.76268709,0.03365710
cg00944631,0.77059426,0.738080070,0.71050627,0.02531227,0.73533543,0.07410457
cg08052546,0.85092586,0.820522477,0.85296709,0.00000000,0.01496729,0.00000000
cg02872767,0.00000000,0.839039463,0.88074915,0.82244127,0.84775632,0.00000000
cg06064954,0.85840223,0.012423219,0.95576624,0.84806584,0.89089337,0.90638151
cg04245305,0.99918120,0.999181200,0.89693413,0.28837868,0.98459550,0.99474404


In [31]:
bMIND.mdl$Z.hat[5,,]

Unnamed: 0,sample.1,sample.2,sample.3,sample.4,sample.5,sample.6
cg12426467,0.9991812,0.99918120,0.99918120,0.99918120,0.9991812,0.999181200
cg02113055,0.0000000,0.03328335,0.00000000,0.00000000,0.0000000,0.000000000
cg00079898,0.6956935,0.00000000,0.86635433,0.93447932,0.9026403,0.009838928
cg04131969,0.5581853,0.39349641,0.34144441,0.37649639,0.3892652,0.440117624
cg06193597,0.0000000,0.05816388,0.15222755,0.16046401,0.1691326,0.104517720
cg00944631,0.7486818,0.71905455,0.64687105,0.57392273,0.6691105,0.610323532
cg08052546,0.6980716,0.69254971,0.66626385,0.53373154,0.6165197,0.551506615
cg02872767,0.3693037,0.71204179,0.67780897,0.67432560,0.6893928,0.590796190
cg06064954,0.6864301,0.42320899,0.71210073,0.68759432,0.7091771,0.710783472
cg04245305,0.7473665,0.73810216,0.69422030,0.55184875,0.7202152,0.723313859


In [32]:
saveRDS(bMIND.mdl,  file.path(res.dir, paste0("bMIND.mdl.rds")))