In [1]:
#library("EpiDISH")
require("data.table")
library("stringr")
library("testit")

source("Association.CellDMC.utils.r") # mask outliers, replace this file with Association.CellDMC.r 
set.seed(2023)

Loading required package: data.table



In [2]:
# args = commandArgs(trailingOnly=TRUE)
# data.version  = as.character(args[1])
# res.dir       = as.character(args[2])

data.version = "liu" # choose from "liu" "hannum" "hannon1" "hannon2"
res.dir = "/u/project/halperin/johnsonc/TCAx/TCAx2023/Result/Methylation/Consistency/Debug"

In [3]:
dir.version = "XY"
study.versions = c("age", "gender")
assoc_versions = list("parametric.full", "parametric.X2")

In [4]:
print(paste0("dir.version: ", dir.version))
print(paste0("data.version: ", data.version))
print(paste0("study.versions: ", study.versions))
print(paste0("res.dir: ", res.dir))
print(paste0("assoc_versions: ", assoc_versions))

[1] "dir.version: XY"
[1] "data.version: liu"
[1] "study.versions: age"    "study.versions: gender"
[1] "res.dir: /u/project/halperin/johnsonc/TCAx/TCAx2023/Result/Methylation/Consistency/Debug"
[1] "assoc_versions: parametric.full" "assoc_versions: parametricX2"   


In [5]:
data.dir = "/u/home/j/johnsonc/project-halperin/TCAx/TCAx2023/Data/Methylation/Consistency/"

file.paths = list()
file.paths[["liu"]]        = file.path(data.dir, "liu.processed.RData")
file.paths[["hannum"]]     = file.path(data.dir, "hannum.processed.RData")
file.paths[["hannon1"]]    = file.path(data.dir, "hannon1.processed.RData")
file.paths[["hannon2"]]    = file.path(data.dir, "hannon2.processed.RData")

In [6]:
res.dir  = file.path(res.dir, dir.version, data.version)
res.file = file.path(res.dir, paste0("CellDMC.mdl.rds"))
if (!file.exists(res.dir)){
    dir.create(res.dir, recursive = T)
}

In [7]:
res.dir

# Load Data

In [8]:
load(file.paths[[data.version]])

In [9]:
# by default: for XY direction all biological covars (including study.version y) are included as C1
if (data.version == "liu"){
    X  = liu$X; 
    W  = liu$W; 
    C1 = liu$cov[, c("age", "gender", "disease","smoking")];
    C2 = liu$ctrl_pcs;
}else if(data.version == "hannum"){
    X  = hannum$X; 
    W  = hannum$W; 
    C1 = hannum$cov[, c("age", "gender", "ethnicity", "smoking")];
    C2 = cbind(hannum$ctrl_pcs, hannum$cov[,"plate", drop = F])

}else if(data.version == "hannon1"){
    X  = hannon1$X; 
    W  = hannon1$W; 
    C1 = hannon1$cov[, c("age", "gender", "disease")];
    C2 = hannon1$ctrl_pcs

}else if(data.version == "hannon2"){
    X  = hannon2$X; 
    W  = hannon2$W; 
    C1 = hannon2$cov[, c("age", "gender", "disease")];
    C2 = hannon2$ctrl_pcs
}else{
    print("check your input")    
}

In [10]:
#reorder W by aboundance 
W = W[,order(-colMeans(W))]

# Debug

In [11]:
X = X[1:1000, ]

In [12]:
dim(X)

# Fitting 

In [13]:
start.t = Sys.time()
CellDMC.mdl = list()
for (study.version in study.versions){# age and gender
    
    print(study.version)
    study.res = list()
    y       = C1[, study.version]
    cov.mod = cbind(C1[, colnames(C1)!=study.version], C2)
    
    print(head(y))
    print(cov.mod[1:min(nrow(cov.mod), 5), 1:min(nrow(cov.mod), 5)])
    
    
    if ("parametric.full" %in% assoc_versions){                     
        print(paste0("fitting ", study.version, " parametric.full")) 
        study.res[["parametric.full"]]  = celldmc_w_joint(X = X, y = y, W = W, cov.mod = cov.mod, max_stds = Inf)
    } 
    
    if ("parametric.X2" %in% assoc_versions){           
        print(paste0("fitting ", study.version, " parametric.X2")) 
        study.res[["parametric.X2"]]     = celldmc_w_joint(X = X, y = y, W = W, cov.mod = cov.mod, max_stds = 2)
    }
    
    CellDMC.mdl[[study.version]] = study.res
} 
end.t = Sys.time()
print(start.t - end.t)

saveRDS(CellDMC.mdl, res.file)

[1] "age"
GSM1051525 GSM1051526 GSM1051527 GSM1051528 GSM1051529 GSM1051530 
        50         32         36         45         16         42 
           gender disease smoking       PC1        PC2
GSM1051525      1       2       2 -47.59335  -8.852559
GSM1051526      1       2       2 -53.21931  -9.013064
GSM1051527      1       2       1 -54.02958 -10.112155
GSM1051528      1       2       2 -83.31718  -2.852553
GSM1051529      1       2       0 -39.46948 -12.620472
[1] "fitting age parametric.full"


y is not factor or character. Treating as continuous variables.



[1] "fitting age parametricX2"


y is not factor or character. Treating as continuous variables.



[1] "gender"
GSM1051525 GSM1051526 GSM1051527 GSM1051528 GSM1051529 GSM1051530 
         1          1          1          1          1          1 
           age disease smoking       PC1        PC2
GSM1051525  50       2       2 -47.59335  -8.852559
GSM1051526  32       2       2 -53.21931  -9.013064
GSM1051527  36       2       1 -54.02958 -10.112155
GSM1051528  45       2       2 -83.31718  -2.852553
GSM1051529  16       2       0 -39.46948 -12.620472
[1] "fitting gender parametric.full"


Binary phenotype detected. Predicted change will be 1 - 0.



[1] "fitting gender parametricX2"


Binary phenotype detected. Predicted change will be 1 - 0.



Time difference of -1.827264 mins


In [15]:
str(CellDMC.mdl)

List of 2
 $ age   :List of 2
  ..$ parametric.full:List of 3
  .. ..$ marg.res :List of 6
  .. .. ..$ Gran: num [1:1000, 1:4] 0.000337 -0.000517 0.000302 0.000702 -0.000141 ...
  .. .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. .. ..$ : chr [1:1000] "cg00001349" "cg00002837" "cg00003287" "cg00008647" ...
  .. .. .. .. ..$ : chr [1:4] "Estimate" "SE" "t" "p"
  .. .. ..$ CD4T: num [1:1000, 1:4] 0.000962 0.001607 -0.001332 -0.002753 -0.000832 ...
  .. .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. .. ..$ : chr [1:1000] "cg00001349" "cg00002837" "cg00003287" "cg00008647" ...
  .. .. .. .. ..$ : chr [1:4] "Estimate" "SE" "t" "p"
  .. .. ..$ CD8T: num [1:1000, 1:4] 0.004197 -0.001663 -0.000503 -0.004631 -0.000147 ...
  .. .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. .. ..$ : chr [1:1000] "cg00001349" "cg00002837" "cg00003287" "cg00008647" ...
  .. .. .. .. ..$ : chr [1:4] "Estimate" "SE" "t" "p"
  .. .. ..$ Mono: num [1:1000, 1:4] 0.00493 0.00181 0.00559 0.00244 0.00416 ...
 

In [16]:
CellDMC.mdl$gender$parametric.full$ marg.res

Unnamed: 0,Estimate,SE,t,p
cg00001349,-0.0116745663,0.021683971,-0.53839614,0.590487339
cg00002837,-0.0155697471,0.012235403,-1.27251612,0.203643404
cg00003287,-0.0008907005,0.008790990,-0.10131971,0.919327822
cg00008647,0.0160086211,0.011806610,1.35590323,0.175599375
cg00016238,0.0065596323,0.008289601,0.79130860,0.429051695
cg00034101,0.0087221367,0.010795561,0.80793735,0.419421320
cg00038675,-0.0113615138,0.010384012,-1.09413531,0.274299684
cg00040446,-0.0060807090,0.003239221,-1.87721346,0.060935111
cg00044463,0.0047006078,0.009498956,0.49485522,0.620869032
cg00044796,-0.0075348787,0.018519301,-0.40686626,0.684239748

Unnamed: 0,Estimate,SE,t,p
cg00001349,0.042437145,0.10996361,0.3859199,0.699681881
cg00002837,-0.045043365,0.06204809,-0.7259428,0.468134348
cg00003287,-0.029455442,0.04458081,-0.6607202,0.509025145
cg00008647,0.002273982,0.05987360,0.0379797,0.969715493
cg00016238,0.033179702,0.04203817,0.7892757,0.430237832
cg00034101,0.012049654,0.05474638,0.2200996,0.825862496
cg00038675,-0.021710001,0.05265933,-0.4122726,0.680275117
cg00040446,0.038852491,0.01642672,2.3652015,0.018312044
cg00044463,-0.046721727,0.04817104,-0.9699132,0.332449435
cg00044796,0.063247476,0.09391496,0.6734548,0.500896659

Unnamed: 0,Estimate,SE,t,p
cg00001349,0.235777308,0.14181531,1.66256601,0.096879960
cg00002837,0.068645200,0.08002074,0.85784262,0.391294595
cg00003287,0.085880863,0.05749394,1.49373760,0.135727980
cg00008647,0.061744928,0.07721640,0.79963494,0.424213560
cg00016238,-0.133219413,0.05421481,-2.45725150,0.014259787
cg00034101,0.102338346,0.07060403,1.44946887,0.147687494
cg00038675,-0.178460546,0.06791246,-2.62780272,0.008796053
cg00040446,0.032800483,0.02118482,1.54830094,0.122035086
cg00044463,-0.075794254,0.06212411,-1.22004578,0.222888627
cg00044796,0.070760612,0.12111805,0.58422843,0.559268712

Unnamed: 0,Estimate,SE,t,p
cg00001349,-0.277815026,0.24593528,-1.12962657,0.25904902
cg00002837,-0.120134810,0.13877150,-0.86570232,0.38697185
cg00003287,-0.163392676,0.09970566,-1.63875027,0.10174770
cg00008647,-0.203421321,0.13390822,-1.51910999,0.12921956
cg00016238,-0.052088229,0.09401900,-0.55401811,0.57975641
cg00034101,0.052515232,0.12244110,0.42890199,0.66813622
cg00038675,0.159758095,0.11777339,1.35648716,0.17541373
cg00040446,0.002734231,0.03673860,0.07442392,0.94069590
cg00044463,-0.029809879,0.10773526,-0.27669565,0.78210151
cg00044796,-0.179392862,0.21004223,-0.85407999,0.39337436

Unnamed: 0,Estimate,SE,t,p
cg00001349,0.080741780,0.15200202,0.53118885,0.59546883
cg00002837,0.155517768,0.08576870,1.81322289,0.07025713
cg00003287,-0.095053657,0.06162378,-1.54248334,0.12344137
cg00008647,0.057429811,0.08276292,0.69390752,0.48798735
cg00016238,0.001640129,0.05810910,0.02822499,0.97749134
cg00034101,-0.024784504,0.07567558,-0.32750992,0.74338737
cg00038675,0.095452902,0.07279067,1.31133428,0.19020646
cg00040446,-0.032176589,0.02270655,-1.41706212,0.15694241
cg00044463,0.101616427,0.06658654,1.52608071,0.12747469
cg00044796,0.211705612,0.12981807,1.63078692,0.10341821

Unnamed: 0,Estimate,SE,t,p
cg00001349,-0.18994157,0.28625246,-0.6635456,0.5072157182
cg00002837,-0.14892816,0.16152088,-0.9220366,0.3568505484
cg00003287,0.21674027,0.11605082,1.8676324,0.0622617515
cg00008647,-0.04070356,0.15586035,-0.2611541,0.7940561752
cg00016238,0.09461608,0.10943192,0.8646114,0.3875701158
cg00034101,-0.30469621,0.14251337,-2.1380184,0.0328864010
cg00038675,-0.19358823,0.13708047,-1.4122233,0.1583613221
cg00040446,-0.02115246,0.04276131,-0.4946636,0.6210042309
cg00044463,-0.02026827,0.12539675,-0.1616331,0.8716448249
cg00044796,-0.31860884,0.24447531,-1.3032352,0.1929544926


In [17]:
CellDMC.mdl$gender$parametric.X2$ marg.res

Unnamed: 0,Estimate,SE,t,p
cg00001349,2.381637e-03,0.012405532,0.191981887,0.847817100
cg00002837,-9.662934e-03,0.011060359,-0.873654710,0.382643492
cg00003287,-3.375501e-03,0.007525899,-0.448518032,0.653934085
cg00008647,2.091343e-02,0.011204082,1.866590154,0.062426893
cg00016238,3.988617e-03,0.007176399,0.555796424,0.578551508
cg00034101,9.065911e-03,0.008045990,1.126761454,0.260271663
cg00038675,-9.554700e-03,0.010278377,-0.929592262,0.352942276
cg00040446,-5.555258e-03,0.003174624,-1.749894974,0.080635153
cg00044463,3.364208e-04,0.008535166,0.039415842,0.968571579
cg00044796,-1.417579e-02,0.016502842,-0.858990769,0.390677054

Unnamed: 0,Estimate,SE,t,p
cg00001349,-0.040749739,0.06279752,-0.64890678,0.516630687
cg00002837,-0.045937973,0.05601749,-0.82006487,0.412493269
cg00003287,-0.011770175,0.03828665,-0.30742246,0.758623758
cg00008647,-0.052388128,0.05729518,-0.91435496,0.360882969
cg00016238,0.034065564,0.03608138,0.94413137,0.345472072
cg00034101,-0.025348241,0.04338750,-0.58422915,0.559274953
cg00038675,-0.073934904,0.05361586,-1.37897445,0.168397498
cg00040446,0.048796067,0.01615906,3.01973502,0.002634849
cg00044463,-0.022959621,0.04282272,-0.53615509,0.592044294
cg00044796,0.090619466,0.08757633,1.03474838,0.301189208

Unnamed: 0,Estimate,SE,t,p
cg00001349,0.056608175,0.08105359,0.69840426,0.485177311
cg00002837,0.042706263,0.07119114,0.59988168,0.548803561
cg00003287,0.101215762,0.04933931,2.05142212,0.040640107
cg00008647,0.121155982,0.07925998,1.52858964,0.126872127
cg00016238,-0.122975140,0.04720152,-2.60532185,0.009400161
cg00034101,-0.001638057,0.05321113,-0.03078411,0.975451449
cg00038675,-0.233602375,0.07381779,-3.16458087,0.001628741
cg00040446,0.037634022,0.02105840,1.78712647,0.074409421
cg00044463,-0.053555536,0.05696272,-0.94018564,0.347489676
cg00044796,0.080625789,0.11291217,0.71405758,0.475459871

Unnamed: 0,Estimate,SE,t,p
cg00001349,-0.2595844991,0.14029495,-1.85027685,0.064732915
cg00002837,-0.1099982787,0.12530013,-0.87787843,0.380348662
cg00003287,-0.1319891387,0.08606634,-1.53357439,0.125637996
cg00008647,-0.3535896450,0.13111869,-2.69671422,0.007191488
cg00016238,-0.0255251717,0.08217088,-0.31063526,0.756182822
cg00034101,0.0096092367,0.09091091,0.10569948,0.915854382
cg00038675,0.1288012079,0.11856317,1.08635091,0.277743914
cg00040446,-0.0180780239,0.03627293,-0.49838884,0.618388187
cg00044463,0.0270937940,0.09781759,0.27698284,0.781885897
cg00044796,-0.1054135484,0.18803878,-0.56059472,0.575276045

Unnamed: 0,Estimate,SE,t,p
cg00001349,0.1414502804,0.08793113,1.60864844,0.10818485
cg00002837,0.1023204634,0.07698743,1.32905415,0.18431741
cg00003287,-0.0645674602,0.05366030,-1.20326306,0.22932794
cg00008647,0.1842002008,0.07992977,2.30452545,0.02152009
cg00016238,0.0058793440,0.04955496,0.11864289,0.90559690
cg00034101,0.0449278369,0.05605465,0.80150063,0.42314359
cg00038675,0.1008191468,0.07139601,1.41211170,0.15841636
cg00040446,-0.0371959405,0.02212950,-1.68083045,0.09330361
cg00044463,0.0262552237,0.06060786,0.43319834,0.66502184
cg00044796,0.1296807421,0.11619275,1.11608289,0.26481833

Unnamed: 0,Estimate,SE,t,p
cg00001349,0.0286755481,0.16324989,0.175654315,0.86062092
cg00002837,-0.1529045702,0.14399870,-1.061846874,0.28871731
cg00003287,0.0860889669,0.10268045,0.838416357,0.40211590
cg00008647,0.0837205251,0.14856805,0.563516350,0.57328545
cg00016238,0.0740151934,0.09318236,0.794304741,0.42732344
cg00034101,0.1020023933,0.15126533,0.674327654,0.50034999
cg00038675,0.3010334754,0.18777797,1.603135209,0.10941164
cg00040446,-0.0254891413,0.04147536,-0.614561061,0.53907193
cg00044463,-0.0450806689,0.11132792,-0.404935889,0.68566476
cg00044796,-0.4320080106,0.30188200,-1.431049274,0.15291912
