In [1]:
# use R to change path to ~/cdai/SpliFi/analysis if current working path is not
setwd("~/cdai/SpliFi/analysis")

In [2]:
suppressMessages(library(tidyverse))
suppressMessages(library(glue))
suppressMessages(library(data.table))
suppressMessages(library(cowplot))
suppressMessages(library(ggrastr))


In [3]:
theme_set(theme_cowplot())


In [4]:
options(repr.plot.width = 10, repr.plot.height = 7, repr.plot.res = 180)


In [5]:
suppressMessages(library(furrr))
plan(multisession, workers = 6)


# Data prep

In [6]:
permStats = fread("../code/results/qtl/noisy/Geuvadis/EUR/separateNoise/cis_100000/perm/chr22.addQval.txt.gz")

In [7]:
nomStats = fread("../code/results/qtl/noisy/Geuvadis/EUR/separateNoise/cis_100000/nom/chr22.txt.gz")

In [8]:
dim(permStats)
head(permStats)

phenotype_id,phenotype_chr,phenotype_start,phenotype_end,phenotype_strand,num_variants,best_nom_dist,best_genotype_id,best_genotype_chr,best_genotype_start,⋯,dof_true,dof_est,beta_ml1,beta_ml2,pval_nom,pval_r2,slope,pval_emp,pval_adj,q
<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<int>,⋯,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr22:17045000:17046345:clu_60424_+:IN,chr22,17045001,17046345,+,463,-1688,22:17043313:T:C,chr22,17043313,⋯,358,322.969,0.97185,56.7786,1.42367e-08,0.0860162,0.615162,0.000999001,5.88156e-06,8.0306e-05
chr22:17045000:17047323:clu_60424_+:IN,chr22,17045001,17047323,+,463,0,22:17045642:G:A,chr22,17045642,⋯,358,309.002,0.964383,49.8923,1.72142e-31,0.316974,-1.25829,0.000999001,8.26831e-25,6.562e-23
chr22:17047426:17050160:clu_60425_+:IN,chr22,17047427,17050160,+,468,-36114,22:17011313:G:A,chr22,17011313,⋯,358,298.671,0.952559,47.6652,0.011816,0.0175732,-0.140319,0.653347,0.664944,0.67322
chr22:17047426:17051574:clu_60425_+:IN,chr22,17047427,17051574,+,477,-21048,22:17026379:C:A,chr22,17026379,⋯,358,327.497,0.985805,56.7487,1.08313e-05,0.0527103,0.371827,0.003996,0.00161235,0.013338
chr22:17047426:17055564:clu_60425_+:IN,chr22,17047427,17055564,+,501,0,22:17049480:A:G,chr22,17049480,⋯,358,294.389,0.927213,44.2685,1.40749e-19,0.204813,-0.690923,0.000999001,1.07851e-13,2.9772e-12
chr22:17047426:17057072:clu_60425_+:IN,chr22,17047427,17057072,+,508,0,22:17055054:G:A,chr22,17055054,⋯,358,292.029,1.07236,46.5298,2.01417e-12,0.129228,0.892701,0.000999001,2.53412e-09,4.8389e-08


In [9]:
dim(nomStats)
head(nomStats)

V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15
<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<dbl>,<int>
chr22:17045000:17047323:clu_60424_+:IN,chr22,17045001,17047323,+,463,-99791,22:16945210:A:G,chr22,16945210,16945210,0.434977,0.00170343,0.59922,0
chr22:17045000:17046345:clu_60424_+:IN,chr22,17045001,17046345,+,463,-99791,22:16945210:A:G,chr22,16945210,16945210,0.763478,0.000253257,-0.216224,0
chr22:17045000:17047323:clu_60424_+:IN,chr22,17045001,17047323,+,463,-98854,22:16946147:G:A,chr22,16946147,16946147,0.0757452,0.00878331,-0.242195,0
chr22:17045000:17046345:clu_60424_+:IN,chr22,17045001,17046345,+,463,-98854,22:16946147:G:A,chr22,16946147,16946147,0.839406,0.00011487,-0.0259201,0
chr22:17045000:17046345:clu_60424_+:IN,chr22,17045001,17046345,+,463,-98227,22:16946774:A:C,chr22,16946774,16946774,0.126557,0.00650804,0.127069,0
chr22:17045000:17047323:clu_60424_+:IN,chr22,17045001,17047323,+,463,-98227,22:16946774:A:C,chr22,16946774,16946774,0.235301,0.00393219,-0.105544,0


## step 1, select phenotypes

In [10]:
permStats[, `:=`(clu = str_extract(phenotype_id, "clu_\\d+_[\\+\\-]"), itype = str_extract(phenotype_id, "[A-Z]{2}$"))]

In [11]:
qualClu = permStats[, .(phenotype_id, itype, Nintron = uniqueN(phenotype_id)), by = .(clu)][Nintron > 1, clu] %>% unique

In [12]:
permStats = permStats[clu %in% qualClu]

In [13]:
dim(permStats)
head(permStats)

phenotype_id,phenotype_chr,phenotype_start,phenotype_end,phenotype_strand,num_variants,best_nom_dist,best_genotype_id,best_genotype_chr,best_genotype_start,⋯,beta_ml1,beta_ml2,pval_nom,pval_r2,slope,pval_emp,pval_adj,q,clu,itype
<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<int>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
chr22:17045000:17046345:clu_60424_+:IN,chr22,17045001,17046345,+,463,-1688,22:17043313:T:C,chr22,17043313,⋯,0.97185,56.7786,1.42367e-08,0.0860162,0.615162,0.000999001,5.88156e-06,8.0306e-05,clu_60424_+,IN
chr22:17045000:17047323:clu_60424_+:IN,chr22,17045001,17047323,+,463,0,22:17045642:G:A,chr22,17045642,⋯,0.964383,49.8923,1.72142e-31,0.316974,-1.25829,0.000999001,8.26831e-25,6.562e-23,clu_60424_+,IN
chr22:17047426:17050160:clu_60425_+:IN,chr22,17047427,17050160,+,468,-36114,22:17011313:G:A,chr22,17011313,⋯,0.952559,47.6652,0.011816,0.0175732,-0.140319,0.653347,0.664944,0.67322,clu_60425_+,IN
chr22:17047426:17051574:clu_60425_+:IN,chr22,17047427,17051574,+,477,-21048,22:17026379:C:A,chr22,17026379,⋯,0.985805,56.7487,1.08313e-05,0.0527103,0.371827,0.003996,0.00161235,0.013338,clu_60425_+,IN
chr22:17047426:17055564:clu_60425_+:IN,chr22,17047427,17055564,+,501,0,22:17049480:A:G,chr22,17049480,⋯,0.927213,44.2685,1.40749e-19,0.204813,-0.690923,0.000999001,1.07851e-13,2.9772e-12,clu_60425_+,IN
chr22:17047426:17057072:clu_60425_+:IN,chr22,17047427,17057072,+,508,0,22:17055054:G:A,chr22,17055054,⋯,1.07236,46.5298,2.01417e-12,0.129228,0.892701,0.000999001,2.53412e-09,4.8389e-08,clu_60425_+,IN


## Step2, use perm pass phenotypes to filter nominal pass phenotype's pvalues

In [14]:
dim(nomStats)

In [15]:
nomStats[1:2]

V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15
<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<dbl>,<int>
chr22:17045000:17047323:clu_60424_+:IN,chr22,17045001,17047323,+,463,-99791,22:16945210:A:G,chr22,16945210,16945210,0.434977,0.00170343,0.59922,0
chr22:17045000:17046345:clu_60424_+:IN,chr22,17045001,17046345,+,463,-99791,22:16945210:A:G,chr22,16945210,16945210,0.763478,0.000253257,-0.216224,0


In [16]:
nomStats = nomStats[V1 %in% permStats$phenotype_id]

In [17]:
dim(nomStats)

In [18]:
nomStats[1:2]

V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15
<chr>,<chr>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<int>,<int>,<dbl>,<dbl>,<dbl>,<int>
chr22:17045000:17047323:clu_60424_+:IN,chr22,17045001,17047323,+,463,-99791,22:16945210:A:G,chr22,16945210,16945210,0.434977,0.00170343,0.59922,0
chr22:17045000:17046345:clu_60424_+:IN,chr22,17045001,17046345,+,463,-99791,22:16945210:A:G,chr22,16945210,16945210,0.763478,0.000253257,-0.216224,0


## phenotype matrix

In [19]:
pheno = fread('../code/results/pheno/noisy//Geuvadis//EUR/leafcutter_perind.counts.noise_by_intron.gz')

### subset phenotypes using perm pass results

In [20]:
pheno = pheno[chrom %in% permStats$phenotype_id]

In [21]:
pheno[1:5, 1:5]

chrom,HG00096,HG00097,HG00099,HG00100
<chr>,<chr>,<chr>,<chr>,<chr>
chr22:17045000:17046345:clu_60424_+:IN,0/3,0/14,0/0,0/3
chr22:17045000:17047323:clu_60424_+:IN,3/3,12/14,0/0,3/3
chr22:17047426:17050160:clu_60425_+:IN,0/24,1/38,0/24,0/12
chr22:17047426:17051574:clu_60425_+:IN,0/24,1/38,0/24,3/12
chr22:17047426:17055564:clu_60425_+:IN,0/24,3/38,0/24,0/12


In [22]:
dim(pheno)

## Covariance matrix

In [23]:
covmx = fread("../code/results/pheno/noisy/Geuvadis/EUR/separateNoise/chr22_CovMatrix.txt", sep = "\t", header = T)

“Stopped early on line 12. Expected 361 fields but found 1. Consider fill=TRUE and comment.char=. First discarded non-empty line: <<results/geno/Geuvadis/EUR/chr22_1_1_svd_PC1 34.1111 3.51292 -5.51009 31.3804 -25.9536 0.0285756 -32.878 -13.3678 17.655 -31.8691 -25.3536 44.2846 -31.9805 17.0618 9.40957 -35.1204 3.40283 -15.6494 -18.8231 12.2061 14.8133 45.7659 37.9913 39.0765 21.4015 30.3026 -11.9687 -29.2904 -72.8368 13.7287 33.4088 23.4695 7.12361 -82.9163 -35.3239 -76.8404 4.99847 31.5853 21.725 40.4732 25.4272 33.6207 -88.6218 10.6636 13.2809 39.2139 -31.2517 -16.1241 27.6947 8.5363 16.5472 -55.5176 -27.2203 31.42 19.4819>>”


In [24]:
dim(covmx)

In [25]:
head(covmx)
tail(covmx)

id,HG00096,HG00097,HG00099,HG00100,HG00101,HG00102,HG00103,HG00105,HG00106,⋯,NA20809,NA20810,NA20811,NA20812,NA20813,NA20814,NA20815,NA20819,NA20826,NA20828
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.052002532,0.03549744,-0.04058394,-0.059506099,0.05659947,-0.08367559,0.028197871,-0.059123241,0.028157609,⋯,0.027445648,-0.021767492,0.077721655,-0.0364216732,-0.007382174,-0.090896457,-0.049096492,-0.031754888,0.05744056,-0.102963819
2,0.031956302,0.11333963,0.03892355,0.025699965,0.01730131,-0.03031228,-0.107646208,0.018796886,-0.011472904,⋯,0.027333068,-0.037250882,-0.017823709,0.0790063192,0.041431736,0.02167875,0.049079922,-0.012251446,-0.0065807816,-0.027776086
3,-0.015324303,-0.03794721,0.04506368,0.011587173,0.02384883,0.09082147,-0.005677723,0.020886161,0.006835743,⋯,-0.090320732,-0.002160437,0.071765261,0.0003069108,-0.039408949,0.06266353,0.057752203,-0.028684991,-0.0008910074,0.015542673
4,-0.006351606,-0.02959577,0.04033815,-0.002787945,0.03800583,-0.02930297,-0.048555237,0.108755238,0.008147912,⋯,0.088831799,0.105649047,0.003233582,0.0646933099,0.045549885,0.059800425,0.092713141,-0.0154587,0.077065253,-0.089512007
5,-0.004501859,0.01323356,-0.00961876,-0.022923347,-0.08088199,-0.06006002,-0.058159624,0.002608612,-0.050560689,⋯,-0.031223067,-0.027364625,-0.019950411,-0.0003064266,-0.015568071,-0.049961947,-0.005414727,-0.040648685,0.0099870705,0.008180909
6,-0.023295145,-0.01468692,-0.01446826,0.003969463,-0.08092704,-0.07369791,0.129542842,0.030557664,0.117094034,⋯,-0.007720113,0.096013446,-0.034838134,0.0083657346,0.031140311,-0.001699893,0.004656594,0.005161383,-0.017631321,0.062005267


id,HG00096,HG00097,HG00099,HG00100,HG00101,HG00102,HG00103,HG00105,HG00106,⋯,NA20809,NA20810,NA20811,NA20812,NA20813,NA20814,NA20815,NA20819,NA20826,NA20828
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
5,-0.004501859,0.01323356,-0.00961876,-0.022923347,-0.080881988,-0.06006002,-0.05815962,0.002608612,-0.05056069,⋯,-0.0312230671,-0.02736462,-0.019950411,-0.0003064266,-0.01556807,-0.049961947,-0.0054147273,-0.040648685,0.00998707,0.008180909
6,-0.023295145,-0.01468692,-0.01446826,0.003969463,-0.080927035,-0.07369791,0.12954284,0.030557664,0.11709403,⋯,-0.0077201132,0.09601345,-0.034838134,0.0083657346,0.03114031,-0.001699893,0.0046565938,0.005161383,-0.017631321,0.062005267
7,0.030611979,7.201404e-05,-0.03860097,0.028583303,0.066349438,-0.08569148,0.01617997,0.030159065,-0.048512,⋯,0.001066417,0.08533024,0.094449506,0.0230581257,0.02369542,0.043643273,-0.0416550231,0.041249599,0.028860497,0.022762574
8,0.043248845,0.07453086,-0.0185791,-0.052708963,0.031844244,0.14795226,0.03084668,0.059306137,0.02785151,⋯,-0.0183149664,-0.04103611,0.002088982,-0.0770301791,0.01501864,0.039670088,0.0153114058,-0.024755726,0.053973489,0.141984425
9,0.087997583,-0.01969591,0.08500863,0.020144099,0.035754699,-0.02311829,0.06595065,0.06553231,0.03085318,⋯,-0.0007569028,0.10543511,0.04270549,0.0804110737,0.07924265,0.038344618,0.076458226,0.040646186,0.068287711,0.094831351
10,0.017677869,-0.07410055,0.02013663,-0.008438359,0.001343738,-0.03030285,-0.02404158,0.048178229,0.0491398,⋯,0.0162267903,0.02413531,0.033408012,0.1006196151,-0.05623039,-0.045415865,-0.0003372282,0.002720623,0.008716437,-0.029952265
