In [17]:
library(data.table)
library(qvalue)

In [18]:
## IBD (inflammatory bowel disease) SMR (summarised Mendelian Randomisation) results

In [19]:
# Matrix eQTL results
matrix_eqtl_smr_results_dir = "/directflow/SCCGGroupShare/projects/angxue/proj/SAIGE-eQTL/SMR/"
matrix_ibd_file = paste0(matrix_eqtl_smr_results_dir, "ibd_all_14_celltypes_all_smr.txt")
matrix_df = fread(matrix_ibd_file)
head(matrix_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,p_GWAS,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
FCRL5,1,FCRL5,157483167,rs12741984,1,157534818,G,A,0.787234,⋯,0.03396,0.11378,0.00486738,7.500497e-121,0.244696,0.115784,0.03456732,0.76687,20,BimmNaive
RPS8,1,RPS8,45240923,rs12120833,1,45241285,C,G,0.850097,⋯,0.1348,-0.105432,0.00500701,1.980368e-98,0.228189,0.152891,0.1355682,0.9391271,20,BimmNaive


In [20]:
# Calculate number of unique genes, and combination of genes + celltypes
length(unique(matrix_df$Gene))
matrix_df$comb = paste0(matrix_df$Gene,"_",matrix_df$Cell_type)
length(unique(matrix_df$comb))
head(matrix_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
FCRL5,1,FCRL5,157483167,rs12741984,1,157534818,G,A,0.787234,⋯,0.11378,0.00486738,7.500497e-121,0.244696,0.115784,0.03456732,0.76687,20,BimmNaive,FCRL5_BimmNaive
RPS8,1,RPS8,45240923,rs12120833,1,45241285,C,G,0.850097,⋯,-0.105432,0.00500701,1.980368e-98,0.228189,0.152891,0.1355682,0.9391271,20,BimmNaive,RPS8_BimmNaive


In [21]:
# p<0.05/M, where M is the number of tests (number of genes)

In [22]:
nrow(matrix_df[matrix_df$p_SMR < 0.05/length(unique(matrix_df$Gene)),]) # M = number of genes
nrow(matrix_df[matrix_df$p_SMR < 0.05/length(unique(matrix_df$comb)),]) # M = number of genes-ct combinations

In [23]:
# significant results at FDR<5% instead (using qvalue)
matrix_df$qv = qvalue(matrix_df$p_SMR)$qvalues
nrow(matrix_df[matrix_df$qv<0.05,])

In [24]:
# SAIGE-QTL results
saige_eqtl_smr_results_dir = "/directflow/SCCGGroupShare/projects/angxue/proj/SAIGE-eQTL/SMR/saige_eQTL/"
saige_ibd_file = paste0(saige_eqtl_smr_results_dir, "ibd_all_14_celltypes_all_smr.txt")
saige_df = fread(saige_ibd_file)
head(saige_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,p_GWAS,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
NOC2L,1,NOC2L,879584,rs3748595,1,887560,A,C,0.0672147,⋯,0.7672,-0.15302,0.0352961,1.4555e-05,0.0294649,0.0996539,0.76748,0.529493,3,BimmNaive
C1orf86,1,C1orf86,2115903,rs11587831,1,2110848,T,G,0.7853,⋯,0.7714,-0.115809,0.0148762,6.980209e-15,0.0320707,0.110813,0.7722658,0.5495551,8,BimmNaive


In [25]:
length(unique(saige_df$Gene))
saige_df$comb = paste0(saige_df$Gene,"_",saige_df$Cell_type)
length(unique(saige_df$comb))
head(saige_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
NOC2L,1,NOC2L,879584,rs3748595,1,887560,A,C,0.0672147,⋯,-0.15302,0.0352961,1.4555e-05,0.0294649,0.0996539,0.76748,0.529493,3,BimmNaive,NOC2L_BimmNaive
C1orf86,1,C1orf86,2115903,rs11587831,1,2110848,T,G,0.7853,⋯,-0.115809,0.0148762,6.980209e-15,0.0320707,0.110813,0.7722658,0.5495551,8,BimmNaive,C1orf86_BimmNaive


In [26]:
# significant results at p<0.05/M with M either # genes or # gene-celltype combinations
nrow(saige_df[saige_df$p_SMR < 0.05/length(unique(saige_df$Gene)),])
nrow(saige_df[saige_df$p_SMR < 0.05/length(unique(saige_df$comb)),])

In [27]:
# significant results at FDR<5% instead (using qvalue)
saige_df$qv = qvalue(saige_df$p_SMR)$qvalues
nrow(saige_df[saige_df$qv<0.05,])

In [28]:
# Overlap (everything tested)

matrix_sign_combs = matrix_df[matrix_df$qv < 0.05,"comb"]$comb
length(matrix_sign_combs)

saige_sign_combs = saige_df[saige_df$qv < 0.05,"comb"]$comb
length(saige_sign_combs)

length(saige_sign_combs[saige_sign_combs %in% matrix_sign_combs])

In [29]:
# Overlap (common gene-cell type combinations only)
matrix_combs = unique(matrix_df$comb)
saige_combs = unique(saige_df$comb)
common_combs = saige_combs[saige_combs %in% matrix_combs]
length(common_combs)

In [30]:
matrix_df_common = matrix_df[matrix_df$comb %in% common_combs,]
matrix_df_common$qv = qvalue(matrix_df_common$p_SMR)$qvalues
saige_df_common = saige_df[saige_df$comb %in% common_combs,]
saige_df_common$qv = qvalue(saige_df_common$p_SMR)$qvalues

In [31]:
matrix_sign_combs = matrix_df_common[matrix_df_common$qv < 0.05,"comb"]$comb
length(matrix_sign_combs)

saige_sign_combs = saige_df_common[saige_df_common$qv < 0.05,"comb"]$comb
length(saige_sign_combs)

length(saige_sign_combs[saige_sign_combs %in% matrix_sign_combs])

In [32]:
## SAIGE-QTL SMR gene list
saige_df_sign = saige_df[saige_df$qv<0.05,]
saige_df_sign

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb,qv
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>
LY9,1,LY9,160765864,rs7412696,1,160828976,G,T,0.792070,⋯,0.0343060,1.121082e-07,-0.3047730,0.09295680,1.043041e-03,4.127029e-01,16,BimmNaive,LY9_BimmNaive,4.640058e-02
FCGR2B,1,FCGR2B,161551101,rs140315354,1,161617673,G,A,0.934720,⋯,0.0349359,8.175889e-07,-0.5566150,0.15881300,4.568856e-04,9.639709e-02,18,BimmNaive,FCGR2B_BimmNaive,2.575662e-02
AC010883.5,2,AC010883.5,43456712,rs4372955,2,43566146,A,G,0.869923,⋯,0.0902127,4.507045e-09,-0.1179020,0.03626890,1.150947e-03,2.060657e-02,20,BimmNaive,AC010883.5_BimmNaive,4.951664e-02
KIAA1841,2,KIAA1841,61293006,rs6727926,2,61240536,A,T,0.796422,⋯,0.0890220,6.535369e-12,-0.1099680,0.02628160,2.861393e-05,8.850827e-02,20,BimmNaive,KIAA1841_BimmNaive,2.563268e-03
RBM6,3,RBM6,49977440,rs7613875,3,49971514,C,A,0.448743,⋯,0.0147394,4.663713e-15,-0.4799370,0.11146500,1.664278e-05,5.805934e-03,20,BimmNaive,RBM6_BimmNaive,1.588820e-03
ERAP2,5,ERAP2,96211643,rs27295,5,96358687,C,T,0.431335,⋯,0.0218618,9.580355e-47,-0.1840280,0.03696720,6.419558e-07,6.248309e-02,20,BimmNaive,ERAP2_BimmNaive,9.650621e-05
LST1,6,LST1,31553901,rs2256965,6,31555130,A,G,0.405706,⋯,0.0250259,4.159973e-16,-0.3190970,0.06625470,1.463083e-06,9.578130e-02,14,BimmNaive,LST1_BimmNaive,1.993275e-04
HCG23,6,HCG23,32358287,rs9271548,6,32590234,A,T,0.338008,⋯,0.1046600,4.461682e-06,0.2119130,0.05182630,4.334293e-05,3.220945e-01,16,BimmNaive,HCG23_BimmNaive,3.587819e-03
HLA-DRB5,6,HLA-DRB5,32485120,rs9271699,6,32593179,A,G,0.524178,⋯,0.0559168,1.162295e-96,0.0414326,0.00942401,1.100153e-05,8.616021e-13,20,BimmNaive,HLA-DRB5_BimmNaive,1.151098e-03
HLA-DQA1,6,HLA-DQA1,32595956,rs9272546,6,32606941,T,C,0.336074,⋯,0.0139083,2.408243e-45,0.4690740,0.06514700,6.010586e-13,3.898305e-10,20,BimmNaive,HLA-DQA1_BimmNaive,3.417892e-10


In [33]:
fwrite(saige_df_sign, "/directflow/SCCGGroupShare/projects/anncuo/OneK1K/saige_eqtl/for_wei/smr/ibd_saige_egenes.txt")