In [1]:
library(data.table)
library(qvalue)

In [2]:
## CD (Crohn's disease) SMR (summarised Mendelian Randomisation) results

In [5]:
# Matrix eQTL results
matrix_eqtl_smr_results_dir = "/directflow/SCCGGroupShare/projects/angxue/proj/SAIGE-eQTL/SMR/"
matrix_cd_file = paste0(matrix_eqtl_smr_results_dir, "cd_all_14_celltypes_all_smr.txt")
matrix_df = fread(matrix_cd_file)
head(matrix_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,p_GWAS,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
FCRL5,1,FCRL5,157483167,rs12741984,1,157534818,G,A,0.787234,⋯,0.2133,0.11378,0.00486738,7.500497e-121,0.185382,0.148752,0.2126742,0.4619789,20,BimmNaive
RPS8,1,RPS8,45240923,rs12120833,1,45241285,C,G,0.850097,⋯,0.4571,-0.105432,0.00500701,1.980368e-98,0.147458,0.198581,0.4577494,0.3150508,20,BimmNaive


In [6]:
# Calculate number of unique genes, and combination of genes + celltypes
length(unique(matrix_df$Gene))
matrix_df$comb = paste0(matrix_df$Gene,"_",matrix_df$Cell_type)
length(unique(matrix_df$comb))
head(matrix_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
FCRL5,1,FCRL5,157483167,rs12741984,1,157534818,G,A,0.787234,⋯,0.11378,0.00486738,7.500497e-121,0.185382,0.148752,0.2126742,0.4619789,20,BimmNaive,FCRL5_BimmNaive
RPS8,1,RPS8,45240923,rs12120833,1,45241285,C,G,0.850097,⋯,-0.105432,0.00500701,1.980368e-98,0.147458,0.198581,0.4577494,0.3150508,20,BimmNaive,RPS8_BimmNaive


In [7]:
# p<0.05/M, where M is the number of tests (number of genes)

In [8]:
nrow(matrix_df[matrix_df$p_SMR < 0.05/length(unique(matrix_df$Gene)),]) # M = number of genes
nrow(matrix_df[matrix_df$p_SMR < 0.05/length(unique(matrix_df$comb)),]) # M = number of genes-ct combinations

In [9]:
# significant results at FDR<5% instead (using qvalue)
matrix_df$qv = qvalue(matrix_df$p_SMR)$qvalues
nrow(matrix_df[matrix_df$qv<0.05,])

In [10]:
# SAIGE-QTL results
saige_eqtl_smr_results_dir = "/directflow/SCCGGroupShare/projects/angxue/proj/SAIGE-eQTL/SMR/saige_eQTL/"
saige_cd_file = paste0(saige_eqtl_smr_results_dir, "cd_all_14_celltypes_all_smr.txt")
saige_df = fread(saige_cd_file)
head(saige_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,p_GWAS,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
NOC2L,1,NOC2L,879584,rs3748595,1,887560,A,C,0.0672147,⋯,0.5676,-0.15302,0.0352961,1.4555e-05,-0.073327,0.129544,0.5713679,0.1533406,3,BimmNaive
C1orf86,1,C1orf86,2115903,rs11587831,1,2110848,T,G,0.7853,⋯,0.3485,-0.115809,0.0148762,6.980209e-15,0.134616,0.144356,0.3510651,0.6961858,8,BimmNaive


In [11]:
length(unique(saige_df$Gene))
saige_df$comb = paste0(saige_df$Gene,"_",saige_df$Cell_type)
length(unique(saige_df$comb))
head(saige_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
NOC2L,1,NOC2L,879584,rs3748595,1,887560,A,C,0.0672147,⋯,-0.15302,0.0352961,1.4555e-05,-0.073327,0.129544,0.5713679,0.1533406,3,BimmNaive,NOC2L_BimmNaive
C1orf86,1,C1orf86,2115903,rs11587831,1,2110848,T,G,0.7853,⋯,-0.115809,0.0148762,6.980209e-15,0.134616,0.144356,0.3510651,0.6961858,8,BimmNaive,C1orf86_BimmNaive


In [12]:
# significant results at p<0.05/M with M either # genes or # gene-celltype combinations
nrow(saige_df[saige_df$p_SMR < 0.05/length(unique(saige_df$Gene)),])
nrow(saige_df[saige_df$p_SMR < 0.05/length(unique(saige_df$comb)),])

In [13]:
# significant results at FDR<5% instead (using qvalue)
saige_df$qv = qvalue(saige_df$p_SMR)$qvalues
nrow(saige_df[saige_df$qv<0.05,])

In [14]:
# Overlap (everything tested)

matrix_sign_combs = matrix_df[matrix_df$qv < 0.05,"comb"]$comb
length(matrix_sign_combs)

saige_sign_combs = saige_df[saige_df$qv < 0.05,"comb"]$comb
length(saige_sign_combs)

length(saige_sign_combs[saige_sign_combs %in% matrix_sign_combs])

In [19]:
# Overlap (common gene-cell type combinations only)
matrix_combs = unique(matrix_df$comb)
saige_combs = unique(saige_df$comb)
common_combs = saige_combs[saige_combs %in% matrix_combs]
length(common_combs)

In [20]:
matrix_df_common = matrix_df[matrix_df$comb %in% common_combs,]
matrix_df_common$qv = qvalue(matrix_df_common$p_SMR)$qvalues
saige_df_common = saige_df[saige_df$comb %in% common_combs,]
saige_df_common$qv = qvalue(saige_df_common$p_SMR)$qvalues

In [21]:
matrix_sign_combs = matrix_df_common[matrix_df_common$qv < 0.05,"comb"]$comb
length(matrix_sign_combs)

saige_sign_combs = saige_df_common[saige_df_common$qv < 0.05,"comb"]$comb
length(saige_sign_combs)

length(saige_sign_combs[saige_sign_combs %in% matrix_sign_combs])

In [22]:
## SAIGE-QTL SMR gene list
saige_df_sign = saige_df[saige_df$qv<0.05,]
saige_df_sign

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb,qv
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>
RBM6,3,RBM6,49977440,rs7613875,3,49971514,C,A,0.448743,⋯,0.01473940,4.663713e-15,-0.503019,0.1361050,2.191745e-04,4.763586e-02,20,BimmNaive,RBM6_BimmNaive,2.559049e-02
ERAP2,5,ERAP2,96211643,rs27295,5,96358687,C,T,0.431335,⋯,0.02186180,9.580355e-47,-0.292410,0.0491637,2.719701e-09,7.152581e-01,20,BimmNaive,ERAP2_BimmNaive,3.028922e-06
CDC42SE2,5,CDC42SE2,130581186,rs31251,5,130833946,C,T,0.419729,⋯,0.01480940,1.523873e-06,1.065800,0.2961160,3.191008e-04,3.311225e-01,14,BimmNaive,CDC42SE2_BimmNaive,3.408073e-02
LST1,6,LST1,31553901,rs2256965,6,31555130,A,G,0.405706,⋯,0.02502590,4.159973e-16,-0.316840,0.0788990,5.925158e-05,2.730778e-03,14,BimmNaive,LST1_BimmNaive,1.059429e-02
TMEM258,11,TMEM258,61535973,rs174538,11,61560081,G,A,0.696325,⋯,0.01109410,1.172200e-11,-0.894723,0.2411910,2.075796e-04,6.515992e-01,20,BimmNaive,TMEM258_BimmNaive,2.445305e-02
AP003774.1,11,AP003774.1,64092522,rs646153,11,64089588,C,T,0.624275,⋯,0.03757460,6.251950e-24,0.168507,0.0412274,4.364900e-05,3.810215e-02,20,BimmNaive,AP003774.1_BimmNaive,8.843741e-03
CCDC88B,11,CCDC88B,64107695,rs510372,11,64115137,C,T,0.614120,⋯,0.02211490,1.260601e-07,-0.557565,0.1611960,5.423331e-04,1.837508e-01,16,BimmNaive,CCDC88B_BimmNaive,4.981332e-02
ZFP36L1,14,ZFP36L1,69254377,rs12435329,14,69250891,T,C,0.530948,⋯,0.01026990,4.299632e-10,0.970710,0.2663400,2.677812e-04,4.708427e-03,20,BimmNaive,ZFP36L1_BimmNaive,2.999478e-02
CCDC101,16,CCDC101,28565236,rs3743963,16,28604686,A,G,0.472437,⋯,0.02480000,3.211693e-11,0.416040,0.1049450,7.358785e-05,4.970654e-02,20,BimmNaive,CCDC101_BimmNaive,1.215504e-02
TUFM,16,TUFM,28853732,rs7187776,16,28857645,A,G,0.615571,⋯,0.01473370,3.420663e-67,-0.333696,0.0590836,1.624574e-08,7.104661e-02,20,BimmNaive,TUFM_BimmNaive,1.065837e-05


In [23]:
fwrite(saige_df_sign, "/directflow/SCCGGroupShare/projects/anncuo/OneK1K/saige_eqtl/for_wei/smr/cd_saige_egenes.txt")