In [1]:
library("testit")
library("matrixStats")

source("analysis.utils.r")
set.seed(2023)

“package ‘MASS’ was built under R version 4.1.3”


In [2]:
data.dir = file.path("../Data/RNA/CREBBP/")
res.dir  = file.path("../Result/RNA/CREBBP/")
CREBBP.dat = readRDS(file.path(data.dir, "CREBBP.dat.rds"))

# DE genes and mutation status

In [3]:
inc.genes  = CREBBP.dat$inc.genes
dec.genes  = CREBBP.dat$dec.genes
samples.mt = CREBBP.dat$samples.mt
samples.wt = CREBBP.dat$samples.wt

print(paste0("length inc.genes: ", length(inc.genes)))
print(paste0("length dec.genes: ", length(dec.genes)))
print(paste0("length samples.mt: ", length(samples.mt)))
print(paste0("length samples.wt: ", length(samples.wt)))

[1] "length inc.genes: 275"
[1] "length dec.genes: 219"
[1] "length samples.mt: 14"
[1] "length samples.wt: 10"


# Gene experiemnt (W, X, C1)

In [4]:
key     = "genes"

W = CREBBP.dat$expm.dat[[key]]$W
X = CREBBP.dat$expm.dat[[key]]$X
source.ids  = colnames(W)
feature.ids = rownames(X)
sample.ids  = rownames(W)

k = length(source.ids)
m = length(feature.ids)
n = length(sample.ids)

print(source.ids)

[1] "Bcells"    "Remaining" "TcellsCD4" "TcellsCD8"


# Load model results

In [5]:
max_stds = 2
mean_penalty  = 0
var_penalty   = 0.01
covar_penalty = 0.01


pen.config = paste("mp",mean_penalty,"vp", var_penalty,"cp", covar_penalty ,"max_stds", max_stds, sep =  "_")
print(pen.config)

base.mdl       = readRDS(file.path(res.dir, paste0("base.mdl.",       key, ".rds")))
bulk.mdl       = readRDS(file.path(res.dir, paste0("bulk.mdl.",       key, ".rds")))
cibersortx.mdl = readRDS(file.path(res.dir, paste0("cibersortx.mdl.", key, ".rds")))
tca.mdl        = readRDS(file.path(res.dir, paste0("tca.mdl.",        key, ".rds")))
tcax.mdl       = readRDS(file.path(res.dir, paste0("tcax.mdl.",       key, ".", pen.config, ".rds")))
bMIND.scale.mdl= readRDS(file.path(res.dir, paste0("bMIND.scale.mdl.",key,  ".rds")))
bMIND.log.mdl  = readRDS(file.path(res.dir, paste0("bMIND.log.mdl.",  key,  ".rds")))

[1] "mp_0_vp_0.01_cp_0.01_max_stds_2"


In [6]:
# now force none negativity for the log transformation later
base.mdl$Z.hat = none_neg_Z(base.mdl$Z.hat)
bulk.mdl$Z.hat = none_neg_Z(bulk.mdl$Z.hat)
cibersortx.mdl$Z.hat = none_neg_Z(cibersortx.mdl$Z.hat)
tca.mdl$Z.hat  = none_neg_Z(tca.mdl$Z.hat)
tcax.mdl$Z.hat = none_neg_Z(tcax.mdl$Z.hat)
bMIND.scale.mdl$Z.hat = none_neg_Z(bMIND.scale.mdl$Z.hat)
bMIND.log.mdl$Z.hat  = none_neg_Z(bMIND.log.mdl$Z.hat)

0 percent of the feature-source are shifted to be non negative

0 percent of the feature-source are shifted to be non negative

0 percent of the feature-source are shifted to be non negative

32.95 percent of the feature-source are shifted to be non negative

27.68 percent of the feature-source are shifted to be non negative

0 percent of the feature-source are shifted to be non negative

0 percent of the feature-source are shifted to be non negative



In [7]:
bMIND.scale.mdl$Z.hat[1,,]

Unnamed: 0,FL_1004,FL_1005,FL_1006,FL_1008,FL_1009,FL_1010,FL_1012,FL_1014,FL_1016,FL_1017,⋯,FL_983,FL_984,FL_986,FL_987,FL_989,FL_991,FL_993,FL_994,FL_996,FL_999
ZYG11B,364.41534,330.33318,237.92396,353.15243,346.41758,260.29337,464.81389,429.74061,292.298322,341.30454,⋯,351.68520,184.830239,315.92348,316.68162,264.06274,207.33486,250.89192,260.39953,327.51763,308.65157
PIAS1,808.18319,1361.40951,763.39523,833.46197,1116.16541,584.58882,949.17576,980.24511,735.502411,747.11689,⋯,763.76904,536.087369,778.76865,640.02624,641.89609,921.90932,735.75458,1080.99939,991.48570,850.90569
SMCHD1,565.64017,415.94043,376.81392,456.44355,656.60355,364.85277,588.14784,380.02090,312.901463,527.01359,⋯,677.60272,348.823802,431.17739,520.31735,370.49967,281.75274,554.75024,564.55204,748.69498,600.53929
CCNI,5093.76212,6260.66685,4143.25872,6126.48770,6366.80359,5299.78563,6331.27226,7863.90903,3950.182531,5451.27606,⋯,5408.11136,4395.334698,4673.39612,3909.87016,4846.69580,7218.10388,4161.66359,4604.17009,4730.66118,4634.53495
ORMDL3,84.71325,136.36260,75.49509,107.57449,62.21172,135.25417,87.90740,104.82302,143.602517,176.97157,⋯,57.96111,122.288885,144.69585,85.47629,87.65957,148.33248,93.73249,102.36072,68.71347,139.77655
TSPYL1,94.10984,81.99393,92.17781,105.27290,150.62015,73.29226,139.26054,116.48291,92.343101,64.00628,⋯,91.78551,54.904633,75.43238,83.66545,74.89610,93.71774,46.69489,96.94241,84.31524,82.96715
ORAI2,416.04907,1054.08501,279.18251,508.65640,221.84035,715.21829,453.12842,526.80676,265.390458,431.46148,⋯,325.80704,502.524069,446.16524,808.49392,351.94480,286.04690,466.32041,279.63222,453.65613,374.67110
CXorf38,395.13404,315.57864,416.09086,352.48366,527.70295,601.78298,389.99162,400.37827,202.194484,540.24351,⋯,517.03070,185.846791,241.25493,327.40680,315.51724,378.09655,439.03748,218.18768,278.97629,220.84200
TTN,37.58998,24.23853,35.71200,54.80772,46.16489,40.27872,32.78435,34.40341,26.882614,82.40280,⋯,56.86403,29.507958,35.52077,62.79610,44.84543,29.53182,44.57733,24.65534,55.50196,35.19811
CSDE1,1970.09919,2403.38789,2011.49813,1884.94334,2040.26010,1735.33943,2116.16574,2412.19313,1675.974998,1535.03422,⋯,2070.56574,689.626170,2015.02045,1954.08457,1625.23746,1567.03563,1545.58285,1713.46526,2109.84877,819.46305


In [8]:
bMIND.log.mdl$Z.hat[1,,]

Unnamed: 0,FL_1004,FL_1005,FL_1006,FL_1008,FL_1009,FL_1010,FL_1012,FL_1014,FL_1016,FL_1017,⋯,FL_983,FL_984,FL_986,FL_987,FL_989,FL_991,FL_993,FL_994,FL_996,FL_999
ZYG11B,369.91402,336.69625,241.97674,353.74203,352.53344,261.54109,494.27313,436.84163,296.57856,347.71092,⋯,360.23395,186.49504,319.72500,323.89527,270.963244,212.211931,256.91761,263.66639,334.76607,314.17070
PIAS1,825.58003,1467.31417,786.40972,835.23762,1167.49386,606.15909,1002.67139,1007.56044,750.21375,757.58523,⋯,785.67865,545.62563,781.69441,656.72833,645.791722,955.143017,754.52784,1151.19649,1040.88630,878.57523
SMCHD1,586.71470,420.06747,372.81250,412.02032,714.63616,341.25768,653.45024,359.34909,309.48784,558.27754,⋯,773.32491,350.66969,429.71572,556.28907,327.839523,264.895913,600.02680,620.83414,883.86333,662.74626
CCNI,6004.99682,7142.48719,4673.13191,7468.88140,6852.39924,5802.50210,7020.26976,9510.10320,4315.40282,5845.46846,⋯,6293.53123,4731.99289,5482.97377,4437.84719,5794.878420,8442.968512,4759.64135,5086.36518,5369.70355,5093.35219
ORMDL3,85.26584,134.71128,81.93068,109.47766,66.66967,153.45008,93.73776,105.12255,144.66770,177.89242,⋯,61.25100,127.17263,145.34454,89.07682,79.570209,149.431795,95.17935,103.95800,74.57973,141.23660
TSPYL1,94.23506,82.47941,93.58142,104.81405,159.00078,75.29860,146.19696,118.72780,93.81598,64.98725,⋯,92.92537,57.44150,75.51767,84.77751,73.800710,94.932499,48.71139,98.98036,85.70228,84.05895
ORAI2,396.85493,1418.41393,274.10943,490.73752,212.08002,865.56945,485.63938,580.20429,259.17784,445.66152,⋯,307.05119,552.34165,445.59632,1035.76686,239.904193,245.845788,478.16383,270.72904,496.07038,382.21673
CXorf38,376.06547,294.14635,427.46474,283.66900,554.16969,661.92389,396.06026,400.11418,194.79783,571.02325,⋯,552.62369,188.34074,205.66184,318.71181,257.287141,366.390573,441.24875,209.91313,273.59039,210.33138
TTN,75.78928,32.09200,35.86853,29.76544,50.07766,31.50509,36.74074,25.70935,34.18161,84.36038,⋯,97.43009,35.57153,40.26504,88.68632,39.412981,30.252775,51.34223,30.36740,63.98643,40.55381
CSDE1,1932.94984,2312.74750,2022.18061,1819.16386,2021.10248,1876.91436,2143.22144,2343.46895,1676.30414,1524.65760,⋯,2048.80667,809.25634,1915.00614,1959.52782,1585.562819,1535.781835,1539.65837,1723.73763,2126.74912,908.85796


In [9]:
test.source.id    = "Bcells"
main.method.names = c("CIBERSORTx", "TCA", "TCAx", "bMIND.scale", "bMIND.log")
method.names      = c("Bulk", "Baseline", "CIBERSORTx", "TCA", "TCAx", "bMIND.scale", "bMIND.log")

method.mdls       = list("Bulk"       = bulk.mdl, 
                         "Baseline"   = base.mdl, 
                         "CIBERSORTx" = cibersortx.mdl, 
                         "TCA"        = tca.mdl, 
                         "TCAx"       = tcax.mdl,
                         "bMIND.scale"= bMIND.scale.mdl,
                         "bMIND.log"  = bMIND.log.mdl)

# Tensor space DE in log scale

In [10]:
logDiff.df = matrix(NA, m, length(method.names))
rownames(logDiff.df) = feature.ids
colnames(logDiff.df) = method.names

for(method.name in method.names){
    Z.hat = method.mdls[[method.name]]$Z.hat

    log.mt.mean = log2(1 + rowMeans(Z.hat[test.source.id, feature.ids, samples.mt]))
    log.wt.mean = log2(1 + rowMeans(Z.hat[test.source.id, feature.ids, samples.wt])) 

    logDiff.df[, method.name] = log.mt.mean - log.wt.mean
}


# for(method.name in method.names){
#     Z.hat = method.mdls[[method.name]]$Z.hat

#     log.mt.mean = log2(1 + rowMedians(Z.hat[test.source.id, feature.ids, samples.mt]))
#     log.wt.mean = log2(1 + rowMedians(Z.hat[test.source.id, feature.ids, samples.wt]))
        
    
#     logDiff.df[, method.name] = log.mt.mean - log.wt.mean
# }


# for(method.name in method.names){
#     Z.hat = method.mdls[[method.name]]$Z.hat

#     log.mt.mean = rowMeans(log2(1 + Z.hat[test.source.id, feature.ids, samples.mt]))
#     log.wt.mean = rowMeans(log2(1 + Z.hat[test.source.id, feature.ids, samples.wt]))
    
#     logDiff.df[, method.name] = log.mt.mean - log.wt.mean
# }



# for(method.name in method.names){
#     Z.hat = method.mdls[[method.name]]$Z.hat
#     log.mt.mean = rowMedians(log2(1 + Z.hat[test.source.id, feature.ids, samples.mt]))
#     log.wt.mean = rowMedians(log2(1 + Z.hat[test.source.id, feature.ids, samples.wt]))
    
#     logDiff.df[, method.name] = log.mt.mean - log.wt.mean
# }

In [11]:
logDiff.df

Unnamed: 0,Bulk,Baseline,CIBERSORTx,TCA,TCAx,bMIND.scale,bMIND.log
ZYG11B,0.259998527,0.42545134,0.114951882,1.509812e-01,0.280688689,0.226870850,0.241712629
PIAS1,0.154892908,0.31600046,0.048369229,8.315048e-02,0.201243198,0.120715207,0.125709137
SMCHD1,0.199521192,0.35901529,0.122499180,1.366608e-01,0.164292859,0.120288638,0.161488969
CCNI,0.019681314,0.19702826,0.012256381,1.356358e-02,-0.003250459,0.016207778,0.007932538
ORMDL3,-0.181131173,0.02755066,0.000000000,-1.397701e-02,-0.187772565,-0.072509379,-0.101309202
TSPYL1,0.105909234,0.27805016,0.089784335,7.956507e-02,0.149018477,0.139425358,0.147624919
ORAI2,-0.210209110,0.03513845,-0.127742841,-1.043317e-01,-0.238831049,-0.104652612,-0.151972629
CXorf38,-0.090994380,0.14884076,0.004904049,6.948768e-04,-0.080086864,-0.010993158,-0.040140221
TTN,0.181786617,0.38153683,0.002616326,9.035688e-02,0.126817011,0.104829772,0.175664286
CSDE1,0.008013025,0.18704828,-0.015648094,-2.503676e-02,-0.003524940,-0.007805205,-0.026571580


In [12]:
# rowMedians(log)
colMedians(logDiff.df[inc.genes,])

In [13]:
colMedians(logDiff.df[dec.genes,])

In [14]:
# colMedians(logDiff.df[inc.genes,])
# #0.1269256665593370.3028772925980250.0463329835076820.04886978686126490.125834302823439

# colMedians(logDiff.df[dec.genes,])
# #-0.1486324257794390.0274109365413153-0.0680979556456673-0.0873118134544164-0.176040833756401

# Tensor space DE pval 

In [15]:
length(logDiff.df[c(inc.genes, dec.genes), "Bulk"])

In [16]:
df = data.frame(diff = c(logDiff.df[c(inc.genes, dec.genes), "Bulk"],
                         logDiff.df[c(inc.genes, dec.genes), "Baseline"],
                         logDiff.df[c(inc.genes, dec.genes), "CIBERSORTx"],
                         logDiff.df[c(inc.genes, dec.genes), "TCA"],
                         logDiff.df[c(inc.genes, dec.genes), "TCAx"], 
                         logDiff.df[c(inc.genes, dec.genes), "bMIND.scale"],
                         logDiff.df[c(inc.genes, dec.genes), "bMIND.log"]),

                models = c(rep("Bulk", m), rep("Baseline", m), rep("CIBERSORTx", m),
                           rep("TCA", m), rep("TCAx", m), 
                           rep("bMIND.scale", m), rep("bMIND.log", m)),
                
                DE.type = rep(c(rep("Up Regulated", length(inc.genes)), rep("Down Regulated", length(dec.genes))), 
                              length(method.names)))

df$models <- factor(df$models, # Change ordering manually
                    levels = c("Bulk", "Baseline", "CIBERSORTx", "TCA", "TCAx", "bMIND.scale", "bMIND.log"))

In [17]:
pvals.inc = matrix(1, length(method.names), length(method.names))
rownames(pvals.inc) = method.names
colnames(pvals.inc) = method.names
pvals.dec = matrix(1, length(method.names), length(method.names))
rownames(pvals.dec) = method.names
colnames(pvals.dec) = method.names

for (method.name1 in method.names){
    for (method.name2 in method.names){
        # does method2/col outperforms method1/row
        pvals.inc[method.name1, method.name2] = wilcox.test(logDiff.df[inc.genes, method.name1],   
                                                            logDiff.df[inc.genes, method.name2], paired = T, "greater")$p.value
        pvals.dec[method.name1, method.name2] = wilcox.test(logDiff.df[dec.genes, method.name1],   
                                                            logDiff.df[dec.genes, method.name2], paired = T, "less")$p.value
    }
}

“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”


In [18]:
pvals.inc 
pvals.dec

Unnamed: 0,Bulk,Baseline,CIBERSORTx,TCA,TCAx,bMIND.scale,bMIND.log
Bulk,1.0,1,4.913007e-17,2.798769e-13,0.002644328,3.015634e-05,0.07868692
Baseline,3.764587e-47,1,5.951814e-46,6.717527e-47,9.316539e-47,1.883306e-45,1.94494e-45
CIBERSORTx,1.0,1,1.0,0.6278345,1.0,1.0,1.0
TCA,1.0,1,0.3724521,1.0,1.0,1.0,1.0
TCAx,0.9973619,1,1.00396e-10,3.546434e-08,1.0,0.1125248,0.9036443
bMIND.scale,0.9999699,1,2.624104e-10,1.88098e-08,0.8876199,1.0,0.999939
bMIND.log,0.9214242,1,2.322626e-12,2.155062e-12,0.0964851,6.123304e-05,1.0


Unnamed: 0,Bulk,Baseline,CIBERSORTx,TCA,TCAx,bMIND.scale,bMIND.log
Bulk,1.0,5.563788999999999e-38,3.48092e-15,1.080503e-05,1.0,1.0,1.0
Baseline,1.0,1.0,1.0,1.0,1.0,1.0,1.0
CIBERSORTx,1.0,4.250825e-27,1.0,0.9659385,1.0,1.0,1.0
TCA,0.9999892,1.2611729999999999e-24,0.0341421,1.0,1.0,1.0,1.0
TCAx,3.1538660000000004e-17,4.0529609999999996e-36,2.83958e-21,2.97268e-14,1.0,0.8713641,0.03194455
bMIND.scale,2.031069e-16,5.87829e-38,2.359125e-25,2.074448e-18,0.1288597,1.0,0.009403205
bMIND.log,1.204052e-13,5.563788999999999e-38,2.2265739999999998e-21,3.422747e-15,0.9681317,0.9906237,1.0


# Regression with Bcell proportion as covariates

### sd(log(1 + estimated Z.Bcell)) ~ estiamted W.B cell proportion + mt_status. 
### Then check mt_status'coef

In [19]:
test.sample.ids = c(samples.mt, samples.wt)

mutation = matrix(0, length(test.sample.ids),1)
rownames(mutation) = test.sample.ids
colnames(mutation) = c("mutation.status")
mutation[samples.mt, ] = 1
mutation[samples.wt, ] = 0

In [20]:
method.beta.list = list()
method.log10p.list = list()

for (method.name in method.names){
    Z.hat = method.mdls[[method.name]]$Z.hat

    beta.mat = matrix(0, length(feature.ids), (3)) # 1 intercept + 1 celltype + 1 mutation status
    rownames(beta.mat) = feature.ids
    p.vals.mat = matrix(1, length(feature.ids), (3)) 
    rownames(p.vals.mat) = feature.ids

    for (feature.id in feature.ids){
        regression.df = data.frame(Z.hat         = log2(1 + Z.hat[test.source.id, feature.id, test.sample.ids]),
                                   W.test.source = W[test.sample.ids, test.source.id] ,
                                   mutation      = mutation)
        tryCatch({
            #normalize to unit variance 
            regression.df$Z.hat = regression.df$Z.hat/sd(regression.df$Z.hat)
            
            fit <- lm(Z.hat ~ ., data = regression.df)
            p.vals.mat[feature.id, ] = t(summary(fit)$coefficients)["Pr(>|t|)", , drop = F]
            colnames(p.vals.mat)     = rownames(summary(fit)$coefficients)
            
            beta.mat[feature.id, ] = t(summary(fit)$coefficients)["Estimate", , drop = F]
            colnames(beta.mat)     = rownames(summary(fit)$coefficients)
            
        }, error=function(cond){})
    }
   
    method.log10p.list[[method.name]] = -log10(p.vals.mat)
    method.beta.list[[method.name]] = beta.mat
}

In [21]:
regression.df

Unnamed: 0_level_0,Z.hat,W.test.source,mutation.status
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>
FL_581,14.3866,0.4017826,1
FL_598,14.58081,0.6149005,1
FL_616,14.13178,0.3398949,1
FL_666,14.74196,0.536164,1
FL_731,12.83397,0.6034532,1
FL_1116,12.73223,0.6137996,1
FL_706,13.84248,0.6515202,1
FL_948,14.09227,0.5428632,1
FL_1062,13.38745,0.6416373,1
FL_1063,14.12547,0.6242046,1


In [22]:
method.log10p.list$TCAx

Unnamed: 0,(Intercept),W.test.source,mutation.status
ZYG11B,16.986628,0.785102984,1.205281485
PIAS1,10.442116,0.628797400,0.468093269
SMCHD1,15.619321,0.405289997,0.531419196
CCNI,26.068523,1.167382682,0.213489161
ORMDL3,14.048567,0.159501091,0.500588491
TSPYL1,15.121404,0.487112414,0.416073329
ORAI2,13.792534,0.574317145,0.276591772
CXorf38,11.027506,0.053911394,0.077938020
TTN,9.265080,0.220330746,0.367617534
CSDE1,17.570351,0.616697643,0.013734717


In [23]:
method.beta.list$TCAx

Unnamed: 0,(Intercept),W.test.source,mutation.status
ZYG11B,23.263886,2.52902378,0.753487274
PIAS1,11.848549,2.34512013,0.408489729
SMCHD1,22.129855,1.69749864,0.457340723
CCNI,69.238495,3.66570220,-0.214313564
ORMDL3,18.917030,-0.79893984,-0.447101458
TSPYL1,20.973612,1.96590799,0.379415679
ORAI2,18.074549,-2.23970064,-0.274150534
CXorf38,13.696917,-0.30675183,0.094570931
TTN,10.811518,-1.07481699,0.357435527
CSDE1,28.252004,2.40259670,-0.017190206


In [24]:
reg.pvals.inc = matrix(1, length(method.names), length(method.names))
rownames(reg.pvals.inc) = method.names
colnames(reg.pvals.inc) = method.names
reg.pvals.dec = matrix(1, length(method.names), length(method.names))
rownames(reg.pvals.dec) = method.names
colnames(reg.pvals.dec) = method.names

for (method.name1 in method.names){
    for (method.name2 in method.names){
        # does method2/col outperforms method1/row
        reg.pvals.inc[method.name1, method.name2] = wilcox.test(method.beta.list[[method.name1]][inc.genes, "mutation.status"],   
                                                                method.beta.list[[method.name2]][inc.genes, "mutation.status"], paired = T, "greater")$p.value
        reg.pvals.dec[method.name1, method.name2] = wilcox.test(method.beta.list[[method.name1]][dec.genes, "mutation.status"],   
                                                                method.beta.list[[method.name2]][dec.genes, "mutation.status"], paired = T, "less")$p.value

    }
}

“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”


In [25]:
#showing bMIND after scaling s.t max is 50
#if directly divided by max(X). somehow the result look better on the up regulated set worse on the down regulated for bMIND

reg.pvals.inc
reg.pvals.dec

Unnamed: 0,Bulk,Baseline,CIBERSORTx,TCA,TCAx,bMIND.scale,bMIND.log
Bulk,1.0,4.4222479999999995e-20,0.007304103,0.8757747,0.8526956,0.9544579,0.4102566
Baseline,1.0,1.0,0.999999938,1.0,1.0,1.0,1.0
CIBERSORTx,0.99271121,6.207041e-08,1.0,0.9911055,0.9938481,0.9976602,0.9345962
TCA,0.12438059,3.555381e-12,0.00891271,1.0,0.2776793,0.7361152,0.01192986
TCAx,0.14747897,7.197694000000001e-17,0.006165062,0.7225746,1.0,0.8734304,0.1166309
bMIND.scale,0.04561465,3.673339e-13,0.002345358,0.2641324,0.1267269,1.0,1.592199e-10
bMIND.log,0.59003792,4.804825e-12,0.065500321,0.9880937,0.8835175,1.0,1.0


Unnamed: 0,Bulk,Baseline,CIBERSORTx,TCA,TCAx,bMIND.scale,bMIND.log
Bulk,1.0,0.0001919463,0.1592415,0.4853421,1.0,0.8344784,1.0
Baseline,0.9998088,1.0,0.9862919,0.9956841,1.0,0.9997911,1.0
CIBERSORTx,0.8410167,0.01374544,1.0,0.8033852,0.9999992,0.9484832,0.9999993
TCA,0.5150826,0.004329413,0.1969101,1.0,0.9999768,0.8916079,0.9999997
TCAx,5.129824e-10,1.466165e-12,8.527769e-07,2.332466e-05,1.0,2.528761e-07,0.5138085
bMIND.scale,0.1657867,0.0002097667,0.05162937,0.1085904,0.9999997,1.0,1.0
bMIND.log,9.032524e-11,5.937127e-12,6.781794e-07,3.189442e-07,0.4866162,5.431867e-15,1.0


# if dont stratify by up or down regulated

In [26]:
reg.pvals = matrix(1, length(method.names), length(method.names))
rownames(reg.pvals) = method.names
colnames(reg.pvals) = method.names

for (method.name1 in method.names){
    for (method.name2 in method.names){
        # does method2/col outperforms method1/row
        reg.pvals[method.name1, method.name2] = wilcox.test(c(method.beta.list[[method.name1]][inc.genes, "mutation.status"], -method.beta.list[[method.name1]][dec.genes, "mutation.status"]), 
                                                            c(method.beta.list[[method.name2]][inc.genes, "mutation.status"], -method.beta.list[[method.name2]][dec.genes, "mutation.status"]),
                                                            paired = T, "greater")$p.value
       
    }
}

“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”
“cannot compute exact p-value with zeroes”


In [27]:
reg.pvals

Unnamed: 0,Bulk,Baseline,CIBERSORTx,TCA,TCAx,bMIND.scale,bMIND.log
Bulk,1.0,8.754295000000001e-23,0.005457931,0.81138395,0.9999995,0.97496931,0.9999742
Baseline,1.0,1.0,1.0,1.0,1.0,1.0,1.0
CIBERSORTx,0.994547,2.194253e-08,1.0,0.99018565,0.9999998,0.99932107,0.9999927
TCA,0.1887012,8.595589e-13,0.009822611,1.0,0.9866065,0.90147756,0.976791
TCAx,4.547931e-07,7.309042000000001e-28,2.465001e-07,0.01340434,1.0,0.01040762,0.2132729
bMIND.scale,0.02504912,1.977555e-15,0.0006796717,0.09857714,0.9896011,1.0,0.8295308
bMIND.log,2.578971e-05,2.388882e-22,7.329105e-06,0.02322635,0.7868187,0.1705491,1.0


# Save Result

In [28]:
CREBBP.eval = list(tensor = list(logDiff.df = logDiff.df,
                                 pvals.inc = pvals.inc,
                                 pvals.dec = pvals.dec),
                   regression = list(method.log10p.list = method.log10p.list,
                                     method.beta.list   = method.beta.list,
                                     reg.pvals.inc = reg.pvals.inc,
                                     reg.pvals.dec = reg.pvals.dec))

In [29]:
#constant and failed the regression
method.beta.list$CIBERSORTx[method.beta.list$CIBERSORTx[, "mutation.status"] == 0 , ]

Unnamed: 0,(Intercept),W.test.source,mutation.status
ORMDL3,0,0,0
KLRC4,0,0,0
KIAA1324L,0,0,0
ORAI3,0,0,0
WASL,0,0,0
SRGN,0,0,0
HIVEP2,0,0,0
CTSS,0,0,0
EBAG9,0,0,0
ERP29,0,0,0


In [30]:
saveRDS(CREBBP.eval, file.path(res.dir, "CREBBP.eval.rds"))