In [1]:
library("TCA")
source("analysis.utils.r")

source("Association.TCA.utils.r") # for adding pval
source("TCAx.r")# for adding pval
set.seed(2023)

“package ‘MASS’ was built under R version 4.1.3”

Attaching package: ‘config’


The following objects are masked from ‘package:base’:

    get, merge


Loading required package: nlme

This is mgcv 1.8-38. For overview type 'help("mgcv-package")'.


Attaching package: ‘mgcv’


The following object is masked from ‘package:pracma’:

    magic



Attaching package: ‘futile.logger’


The following object is masked from ‘package:mgcv’:

    scat




In [2]:
# args = commandArgs(trailingOnly=TRUE)

# dir.version   = as.character(args[1]) 
# data.version  = as.character(args[2])
# study.version = as.character(args[3])

# fit_model     = as.logical(args[4])
# fit_assoc     = as.logical(args[5])
# fit_tensor    = as.logical(args[6])
# res.dir       = as.character(args[7])

# if (length(args) >=8){
#     assoc_versions = as.list(args[8:length(args)])
# }else{
#     assoc_versions = list("parametric.full", "parametric.X2", "parametric.Q2", "parametric.XQ2")
# }

In [3]:
dir.version = "XY"
data.version = "liu" # choose from "liu" "hannum" "hannon1" "hannon2"
study.version = "NA" 

fit_model     = F
fit_assoc     = T
fit_tensor    = F

res.dir = "/u/project/halperin/johnsonc/TCAx/TCAx2023/Result/Methylation/Consistency/Debug"
assoc_versions = list("parametric.full", "parametric.X2", "parametric.Q2", "parametric.XQ2")

In [4]:
print(paste0("dir.version: ", dir.version))
print(paste0("data.version: ", data.version))
print(paste0("study.version: ", study.version))
print(paste0("fit_model: ", fit_model))
print(paste0("fit_assoc: ", fit_assoc))
print(paste0("fit_tensor: ", fit_tensor))
print(paste0("res.dir: ", res.dir))
print(paste0("assoc_versions: ", assoc_versions))

[1] "dir.version: XY"
[1] "data.version: liu"
[1] "study.version: NA"
[1] "fit_model: FALSE"
[1] "fit_assoc: TRUE"
[1] "fit_tensor: FALSE"
[1] "res.dir: /u/project/halperin/johnsonc/TCAx/TCAx2023/Result/Methylation/Consistency/Debug"
[1] "assoc_versions: parametric.full" "assoc_versions: parametric.X2"  
[3] "assoc_versions: parametric.Q2"   "assoc_versions: parametric.XQ2" 


In [5]:
data.dir = "/u/home/j/johnsonc/project-halperin/TCAx/TCAx2023/Data/Methylation/Consistency/"

file.paths = list()
file.paths[["liu"]]        = file.path(data.dir, "liu.processed.RData")
file.paths[["hannum"]]     = file.path(data.dir, "hannum.processed.RData")
file.paths[["hannon1"]]    = file.path(data.dir, "hannon1.processed.RData")
file.paths[["hannon2"]]    = file.path(data.dir, "hannon2.processed.RData")

In [6]:
if (dir.version == "XY" & study.version == "NA"){
    res.dir = file.path(res.dir, dir.version, data.version)
}else if(dir.version == "XY" & study.version != "NA"){
    print("################################### WARNING #################################")
    print(paste0("in XY direction with study version provided, running the shuffled version on: ", study.version))
    res.dir = file.path(res.dir, dir.version, paste0(data.version, "-", study.version,  "-shuffled"))
    print("################################### WARNING #################################")
}else if ((dir.version == "YX")){
    res.dir = file.path(res.dir, dir.version, paste0(data.version, "-", study.version))
}else{
    print("wrong dir.version")
}

res.file = file.path(res.dir, 
                     paste0("tca.mdl.rds"))
log.file = file.path(res.dir, 
                     paste0("tca.log"))

if (!file.exists(res.dir)){
    dir.create(res.dir, recursive = T)
}

In [7]:
res.dir

# Load Data

In [8]:
load(file.paths[[data.version]])

In [9]:
# by default: for XY direction all biological covars (including study.version y) are included as C1
if (data.version == "liu"){
    X  = liu$X; 
    W  = liu$W; 
    C1 = liu$cov[, c("age", "gender", "disease","smoking")];
    C2 = liu$ctrl_pcs;
}else if(data.version == "hannum"){
    X  = hannum$X; 
    W  = hannum$W; 
    C1 = hannum$cov[, c("age", "gender", "ethnicity", "smoking")];
    C2 = cbind(hannum$ctrl_pcs, hannum$cov[,"plate", drop = F])

}else if(data.version == "hannon1"){
    X  = hannon1$X; 
    W  = hannon1$W; 
    C1 = hannon1$cov[, c("age", "gender", "disease")];
    C2 = hannon1$ctrl_pcs

}else if(data.version == "hannon2"){
    X  = hannon2$X; 
    W  = hannon2$W; 
    C1 = hannon2$cov[, c("age", "gender", "disease")];
    C2 = hannon2$ctrl_pcs
}else{
    print("check your input")    
}


if (dir.version == "YX"){
    # C1 excludes y so each get a different directory
    y = C1[,study.version, drop = F]
    # TODO add conversion to binary if needed 
    C1 = C1[, setdiff(colnames(C1), c(study.version))]
    
    if(study.version == "disease"){
        message("make the control 0 and case 1")
        y = y - 1
        table(y)
    }
    if(study.version == "smoking"){
        message("converting never smoke 0 and ever smoked 1")
        y[y>0,] = 1
        table(y)
    }
    if(study.version == "gender"){
        message("converting gender to 0 and 1")
        y = y - 1
        table(y)
    }
}

In [10]:
if (dir.version == "XY" & study.version != "NA"){
    print(paste0("shuffeling the pehnotype of interest:", study.version))
    print(paste0("before shuffeling "))
    print(C1[1:10,])
    C1[,study.version] = sample(as.numeric(C1[,study.version]))
    print(paste0("after shuffeling "))
    print(C1[1:10,])
}

In [11]:
#reorder W by aboundance 
W = W[,order(-colMeans(W))]

# Debug

In [12]:
X = X[1:1000, ]

In [13]:
dim(X)

## Run TCA

In [14]:
start.t = Sys.time()
if (fit_model){
    tca.mdl = list()
    tca.mdl$params.hat <- tca(X = X, W = W, C1 = C1, C2 = C2, verbose = TRUE, 
                              parallel = F, num_cores = 1, log_file = log.file)

                                
    #capping at interal scale
    tca.mdl$params.hat$sigmas_hat  = cap_values(tca.mdl$params.hat$sigmas_hat,  max.val = 10 ** (4), min.val = 10**(-4))
    #save the object with estiamted parameters
    saveRDS(tca.mdl, res.file)
}
end.t = Sys.time()
print(end.t - start.t)

Time difference of 0.001693726 secs


In [15]:
if (dir.version == "XY" & fit_assoc){
    message("running association")
    tca.mdl = readRDS(res.file)    
                              
    if ("parametric.full" %in% assoc_versions){                  
        tca.mdl$params.hat = add_C1_C2_pvals_parametric_TCA(X = X, 
                                                            TCA.mdl = tca.mdl$params.hat, 
                                                            slot_name = "parametric.full", 
                                                            diag_only = F, # sigmas are already in diag format now
                                                            intercept = T, # dont fit intercept: lm ~ 0 + .
                                                            X_max_stds = Inf, 
                                                            Q_max_stds = Inf, 
                                                            XQ_max_stds = Inf, 
                                                            parallel = FALSE, num_cores = NULL, 
                                                            config_file = NULL, log_file = NULL, debug = FALSE, verbose = TRUE)
    saveRDS(tca.mdl, res.file)}
    

    if ("parametric.X2" %in% assoc_versions){                   
            tca.mdl$params.hat = add_C1_C2_pvals_parametric_TCA(X = X, 
                                                            TCA.mdl = tca.mdl$params.hat, 
                                                            slot_name = "parametric.X2", 
                                                            diag_only = F, # sigmas are already in diag format now
                                                            intercept = T, # dont fit intercept: lm ~ 0 + .
                                                            X_max_stds = 2, 
                                                            Q_max_stds = Inf, 
                                                            XQ_max_stds = Inf, 
                                                            parallel = FALSE, num_cores = NULL, 
                                                            config_file = NULL, log_file = NULL, debug = FALSE, verbose = TRUE)
    saveRDS(tca.mdl, res.file)}
    
    if ("parametric.Q2" %in% assoc_versions){                   
            tca.mdl$params.hat = add_C1_C2_pvals_parametric_TCA(X = X, 
                                                            TCA.mdl = tca.mdl$params.hat, 
                                                            slot_name = "parametric.Q2", 
                                                            diag_only = F, # sigmas are already in diag format now
                                                            intercept = T, # dont fit intercept: lm ~ 0 + .
                                                            X_max_stds = Inf, 
                                                            Q_max_stds = 2, 
                                                            XQ_max_stds = Inf, 
                                                            parallel = FALSE, num_cores = NULL, 
                                                            config_file = NULL, log_file = NULL, debug = FALSE, verbose = TRUE)
    saveRDS(tca.mdl, res.file)}
    
    
    if ("parametric.XQ2" %in% assoc_versions){                   
        tca.mdl$params.hat = add_C1_C2_pvals_parametric_TCA(X = X, 
                                                            TCA.mdl = tca.mdl$params.hat, 
                                                            slot_name = "parametric.XQ2", 
                                                            diag_only = F, # sigmas are already in diag format now
                                                            intercept = T, # dont fit intercept: lm ~ 0 + .
                                                            X_max_stds = Inf, 
                                                            Q_max_stds = Inf, 
                                                            XQ_max_stds = 2, 
                                                            parallel = FALSE, num_cores = NULL, 
                                                            config_file = NULL, log_file = NULL, debug = FALSE, verbose = TRUE)
    saveRDS(tca.mdl, res.file)}
    
    if ("parametric.X2Q2" %in% assoc_versions){                   
        tca.mdl$params.hat = add_C1_C2_pvals_parametric_TCA(X = X, 
                                                            TCA.mdl = tca.mdl$params.hat, 
                                                            slot_name = "parametric.X2Q2", 
                                                            diag_only = F, # sigmas are already in diag format now
                                                            intercept = T, # dont fit intercept: lm ~ 0 + .
                                                            X_max_stds = 2, 
                                                            Q_max_stds = 2, 
                                                            XQ_max_stds = Inf, 
                                                            parallel = FALSE, num_cores = NULL, 
                                                            config_file = NULL, log_file = NULL, debug = FALSE, verbose = TRUE)
    saveRDS(tca.mdl, res.file)}


    
    
    
}

running association



INFO [2023-07-08 15:46:26] Preparing weights for parametric pvals calculation ...
INFO [2023-07-08 15:46:26] Starting parametric pvals calculation: parametric.full
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=01m 04s
INFO [2023-07-08 15:47:31] Finished parametric pvals calculation: parametric.full
INFO [2023-07-08 15:47:31] Preparing weights for parametric pvals calculation ...
INFO [2023-07-08 15:47:31] Starting parametric pvals calculation: parametric.X2
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=01m 02s
INFO [2023-07-08 15:48:34] Finished parametric pvals calculation: parametric.X2
INFO [2023-07-08 15:48:35] Preparing weights for parametric pvals calculation ...
INFO [2023-07-08 15:48:35] Starting parametric pvals calculation: parametric.Q2
  |++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=01m 03s
INFO [2023-07-08 15:49:37] Finished parametric pvals calculation: parametric.Q2
INFO [2023-07-08 15:49:39] Preparing weigh

In [16]:
str(tca.mdl)

List of 1
 $ params.hat:List of 15
  ..$ W                     : num [1:687, 1:6] 0.683 0.727 0.846 0.733 0.626 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:687] "GSM1051525" "GSM1051526" "GSM1051527" "GSM1051528" ...
  .. .. ..$ : chr [1:6] "Gran" "CD4T" "CD8T" "Mono" ...
  ..$ mus_hat               : num [1:1000, 1:6] 0.7277 0.3121 0.1476 0.0798 0.9016 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:1000] "cg00001349" "cg00002837" "cg00003287" "cg00008647" ...
  .. .. ..$ : chr [1:6] "Gran" "CD4T" "CD8T" "Mono" ...
  ..$ sigmas_hat            : num [1:1000, 1:6] 0.085 0.0134 0.0124 0.01 0.0256 ...
  .. ..- attr(*, "dimnames")=List of 2
  .. .. ..$ : chr [1:1000] "cg00001349" "cg00002837" "cg00003287" "cg00008647" ...
  .. .. ..$ : chr [1:6] "Gran" "CD4T" "CD8T" "Mono" ...
  ..$ tau_hat               : Named num 0.027
  .. ..- attr(*, "names")= chr ""
  ..$ deltas_hat            : num [1:1000, 1:20] 1.35e-04 9.20e-05 9.01e-05 -6.07e-05 -2.01e-04 ...


In [17]:
head(tca.mdl$params.hat$gammas_hat_pvals)

Unnamed: 0,Gran.age,Gran.gender,Gran.disease,Gran.smoking,CD4T.age,CD4T.gender,CD4T.disease,CD4T.smoking,CD8T.age,CD8T.gender,⋯,Mono.disease,Mono.smoking,NK.age,NK.gender,NK.disease,NK.smoking,B.age,B.gender,B.disease,B.smoking
cg00001349,0.54336153,0.6658385,0.7821051,0.7067071,0.9388375,0.6323022,0.8734438068,0.9085293,0.8709128,0.219144055,⋯,0.5110823,0.489359,0.5389212,0.38698338,0.7145019,0.4804215,0.1993251,0.43674034,0.99213901,0.45829153
cg00002837,0.34683011,0.0985877,0.2776412,0.1477224,0.5691925,0.7106448,0.0671506926,0.98739029,0.2372861,0.332408375,⋯,0.6458872,0.1439277,0.4015235,0.10022742,0.55912111,0.9789613,0.2339649,0.57030326,0.009501754,0.65299736
cg00003287,0.42286668,0.8568804,0.1812482,0.8564867,0.6180686,0.458238,0.0006718904,0.41651626,0.4881405,0.147172449,⋯,0.2164603,0.7888529,0.8935937,0.14687561,0.91750039,0.7110477,0.7292421,0.03201514,0.042239818,0.42283266
cg00008647,0.07423226,0.1053725,0.4807461,0.8459535,0.6498994,0.3812989,0.7717248389,0.63368882,0.1113453,0.728842772,⋯,0.4314241,0.7894053,0.2390799,0.08004998,0.02503817,0.4344422,0.6561543,0.42432502,0.888585395,0.456448
cg00016238,0.46372517,0.4579304,0.6117024,0.2901219,0.6093697,0.3369869,0.3245285226,0.51424156,0.9128275,0.008551668,⋯,0.2104179,0.8712299,0.4770061,0.95632858,0.09877702,0.5359818,0.8890422,0.46300821,0.532341565,0.93399916
cg00034101,0.48322197,0.5234056,0.3811728,0.5661536,0.684254,0.7143409,0.065255214,0.06139222,0.9846984,0.166056386,⋯,0.9019417,0.2450838,0.2149125,0.75402997,0.19293764,0.8864497,0.8515513,0.33734323,0.061116083,0.01430649


In [18]:
head(tca.mdl$params.hat$parametric.full$gammas_hat_pvals)

Unnamed: 0,Gran.age,Gran.gender,Gran.disease,Gran.smoking,CD4T.age,CD4T.gender,CD4T.disease,CD4T.smoking,CD8T.age,CD8T.gender,⋯,Mono.disease,Mono.smoking,NK.age,NK.gender,NK.disease,NK.smoking,B.age,B.gender,B.disease,B.smoking
cg00001349,0.54336153,0.6658385,0.7821051,0.7067071,0.9388375,0.6323022,0.8734438068,0.9085293,0.8709128,0.219144055,⋯,0.5110823,0.489359,0.5389212,0.38698338,0.7145019,0.4804215,0.1993251,0.43674034,0.99213901,0.45829153
cg00002837,0.34683011,0.0985877,0.2776412,0.1477224,0.5691925,0.7106448,0.0671506926,0.98739029,0.2372861,0.332408375,⋯,0.6458872,0.1439277,0.4015235,0.10022742,0.55912111,0.9789613,0.2339649,0.57030326,0.009501754,0.65299736
cg00003287,0.42286668,0.8568804,0.1812482,0.8564867,0.6180686,0.458238,0.0006718904,0.41651626,0.4881405,0.147172449,⋯,0.2164603,0.7888529,0.8935937,0.14687561,0.91750039,0.7110477,0.7292421,0.03201514,0.042239818,0.42283266
cg00008647,0.07423226,0.1053725,0.4807461,0.8459535,0.6498994,0.3812989,0.7717248389,0.63368882,0.1113453,0.728842772,⋯,0.4314241,0.7894053,0.2390799,0.08004998,0.02503817,0.4344422,0.6561543,0.42432502,0.888585395,0.456448
cg00016238,0.46372517,0.4579304,0.6117024,0.2901219,0.6093697,0.3369869,0.3245285226,0.51424156,0.9128275,0.008551668,⋯,0.2104179,0.8712299,0.4770061,0.95632858,0.09877702,0.5359818,0.8890422,0.46300821,0.532341565,0.93399916
cg00034101,0.48322197,0.5234056,0.3811728,0.5661536,0.684254,0.7143409,0.065255214,0.06139222,0.9846984,0.166056386,⋯,0.9019417,0.2450838,0.2149125,0.75402997,0.19293764,0.8864497,0.8515513,0.33734323,0.061116083,0.01430649


In [19]:
head(tca.mdl$params.hat$gammas_hat_pvals.joint)

Unnamed: 0,age,gender,disease,smoking
cg00001349,0.5816957236,0.5803122257,0.9932073,0.9146739
cg00002837,0.0073616362,0.0420130964,0.09064257,0.3447317
cg00003287,0.0242209586,0.0008338504,0.01735935,0.7740116
cg00008647,0.2641713986,0.2403709984,0.40218425,0.3405444
cg00016238,0.9070954891,0.087475842,0.54179986,0.7727619
cg00034101,0.0003338202,0.2877422503,0.30031088,0.251791


In [20]:
head(tca.mdl$params.hat$parametric.full$gammas_hat_pvals.joint)

Unnamed: 0,age,gender,disease,smoking
cg00001349,0.5816957236,0.5803122257,0.9932073,0.9146739
cg00002837,0.0073616362,0.0420130964,0.09064257,0.3447317
cg00003287,0.0242209586,0.0008338504,0.01735935,0.7740116
cg00008647,0.2641713986,0.2403709984,0.40218425,0.3405444
cg00016238,0.9070954891,0.087475842,0.54179986,0.7727619
cg00034101,0.0003338202,0.2877422503,0.30031088,0.251791
