In [17]:
library(data.table)
library(qvalue)

In [18]:
## RA (rheumathois arthritis) SMR (summarised Mendelian Randomisation) results

In [19]:
# Matrix eQTL results
matrix_eqtl_smr_results_dir = "/directflow/SCCGGroupShare/projects/angxue/proj/SAIGE-eQTL/SMR/"
matrix_ra_file = paste0(matrix_eqtl_smr_results_dir, "ra_all_14_celltypes_all_smr.txt")
matrix_df = fread(matrix_ra_file)
head(matrix_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,p_GWAS,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
FCRL5,1,FCRL5,157483167,rs12741984,1,157534818,G,A,0.787234,⋯,0.1312,0.11378,0.00486738,7.500497e-121,-0.236422,0.156769,0.1315321,0.0344273,20,BimmNaive
RPS8,1,RPS8,45240923,rs12120833,1,45241285,C,G,0.850097,⋯,0.1065,-0.105432,0.00500701,1.980368e-98,-0.385083,0.23877,0.1067927,0.6779076,20,BimmNaive


In [20]:
# Calculate number of unique genes, and combination of genes + celltypes
length(unique(matrix_df$Gene))
matrix_df$comb = paste0(matrix_df$Gene,"_",matrix_df$Cell_type)
length(unique(matrix_df$comb))
head(matrix_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
FCRL5,1,FCRL5,157483167,rs12741984,1,157534818,G,A,0.787234,⋯,0.11378,0.00486738,7.500497e-121,-0.236422,0.156769,0.1315321,0.0344273,20,BimmNaive,FCRL5_BimmNaive
RPS8,1,RPS8,45240923,rs12120833,1,45241285,C,G,0.850097,⋯,-0.105432,0.00500701,1.980368e-98,-0.385083,0.23877,0.1067927,0.6779076,20,BimmNaive,RPS8_BimmNaive


In [21]:
# p<0.05/M, where M is the number of tests (number of genes)

In [22]:
nrow(matrix_df[matrix_df$p_SMR < 0.05/length(unique(matrix_df$Gene)),]) # M = number of genes
nrow(matrix_df[matrix_df$p_SMR < 0.05/length(unique(matrix_df$comb)),]) # M = number of genes-ct combinations

In [23]:
# significant results at FDR<5% instead (using qvalue)
matrix_df$qv = qvalue(matrix_df$p_SMR)$qvalues
nrow(matrix_df[matrix_df$qv<0.05,])

In [24]:
# SAIGE-QTL results
saige_eqtl_smr_results_dir = "/directflow/SCCGGroupShare/projects/angxue/proj/SAIGE-eQTL/SMR/saige_eQTL/"
saige_ra_file = paste0(saige_eqtl_smr_results_dir, "ra_all_14_celltypes_all_smr.txt")
saige_df = fread(saige_ra_file)
head(saige_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,p_GWAS,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
NOC2L,1,NOC2L,879584,rs3748595,1,887560,A,C,0.0672147,⋯,0.4313,-0.15302,0.0352961,1.4555e-05,0.276434,0.35668,0.4383276,0.3042242,3,BimmNaive
C1orf86,1,C1orf86,2115903,rs11587831,1,2110848,T,G,0.7853,⋯,0.63,-0.115809,0.0148762,6.980209e-15,-0.115708,0.24051,0.6304503,0.2562137,8,BimmNaive


In [25]:
length(unique(saige_df$Gene))
saige_df$comb = paste0(saige_df$Gene,"_",saige_df$Cell_type)
length(unique(saige_df$comb))
head(saige_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
NOC2L,1,NOC2L,879584,rs3748595,1,887560,A,C,0.0672147,⋯,-0.15302,0.0352961,1.4555e-05,0.276434,0.35668,0.4383276,0.3042242,3,BimmNaive,NOC2L_BimmNaive
C1orf86,1,C1orf86,2115903,rs11587831,1,2110848,T,G,0.7853,⋯,-0.115809,0.0148762,6.980209e-15,-0.115708,0.24051,0.6304503,0.2562137,8,BimmNaive,C1orf86_BimmNaive


In [26]:
# significant results at p<0.05/M with M either # genes or # gene-celltype combinations
nrow(saige_df[saige_df$p_SMR < 0.05/length(unique(saige_df$Gene)),])
nrow(saige_df[saige_df$p_SMR < 0.05/length(unique(saige_df$comb)),])

In [27]:
# significant results at FDR<5% instead (using qvalue)
saige_df$qv = qvalue(saige_df$p_SMR)$qvalues
nrow(saige_df[saige_df$qv<0.05,])

In [28]:
# Overlap (everything tested)

matrix_sign_combs = matrix_df[matrix_df$qv < 0.05,"comb"]$comb
length(matrix_sign_combs)

saige_sign_combs = saige_df[saige_df$qv < 0.05,"comb"]$comb
length(saige_sign_combs)

length(saige_sign_combs[saige_sign_combs %in% matrix_sign_combs])

In [29]:
# Overlap (common gene-cell type combinations only)
matrix_combs = unique(matrix_df$comb)
saige_combs = unique(saige_df$comb)
common_combs = saige_combs[saige_combs %in% matrix_combs]
length(common_combs)

In [30]:
matrix_df_common = matrix_df[matrix_df$comb %in% common_combs,]
matrix_df_common$qv = qvalue(matrix_df_common$p_SMR)$qvalues
saige_df_common = saige_df[saige_df$comb %in% common_combs,]
saige_df_common$qv = qvalue(saige_df_common$p_SMR)$qvalues

In [31]:
matrix_sign_combs = matrix_df_common[matrix_df_common$qv < 0.05,"comb"]$comb
length(matrix_sign_combs)

saige_sign_combs = saige_df_common[saige_df_common$qv < 0.05,"comb"]$comb
length(saige_sign_combs)

length(saige_sign_combs[saige_sign_combs %in% matrix_sign_combs])

In [32]:
## SAIGE-QTL SMR gene list
saige_df_sign = saige_df[saige_df$qv<0.05,]
saige_df_sign

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb,qv
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>
PTPN22,1,PTPN22,114356433,rs2636008,1,114325914,A,G,0.643617,⋯,0.02671320,1.412721e-07,-1.4053400,0.2842150,7.629313e-07,3.762805e-06,12,BimmNaive,PTPN22_BimmNaive,5.474196e-05
FCRL3,1,FCRL3,157644111,rs7528684,1,157670816,A,G,0.530948,⋯,0.02532830,3.384601e-29,-0.2150690,0.0496142,1.458784e-05,3.113276e-01,20,BimmNaive,FCRL3_BimmNaive,7.095667e-04
RPS7,2,RPS7,3622795,rs62106040,2,3627302,A,T,0.932785,⋯,0.00517966,2.978748e-47,1.9843700,0.5199350,1.353200e-04,6.066024e-01,13,BimmNaive,RPS7_BimmNaive,4.775597e-03
REL,2,REL,61108656,rs12989427,2,61137506,G,A,0.830754,⋯,0.01614030,2.274546e-26,-0.5621190,0.1128770,6.361624e-07,5.273667e-03,20,BimmNaive,REL_BimmNaive,4.615603e-05
PSD4,2,PSD4,113914902,rs2241976,2,113943470,A,G,0.576886,⋯,0.02753300,7.402550e-09,0.4152450,0.1235370,7.757658e-04,2.411041e-01,20,BimmNaive,PSD4_BimmNaive,2.085917e-02
DAP,5,DAP,10679342,rs267948,5,10744197,C,G,0.278530,⋯,0.02472750,2.996078e-09,0.5113210,0.1276010,6.144669e-05,1.776416e-01,20,BimmNaive,DAP_BimmNaive,2.478313e-03
ZFP57,6,ZFP57,29640169,rs2747429,6,29648377,T,C,0.771760,⋯,0.21004300,5.252485e-11,0.0876951,0.0174152,4.765002e-07,4.882440e-06,20,BimmNaive,ZFP57_BimmNaive,3.536214e-05
HLA-A,6,HLA-A,29909037,rs2844810,6,29923580,G,C,0.413926,⋯,0.01122630,1.379438e-44,-0.6021640,0.0937355,1.326654e-10,4.206965e-09,20,BimmNaive,HLA-A_BimmNaive,1.776231e-08
XXbac-BPG299F13.17,6,XXbac-BPG299F13.17,31162977,rs6901685,6,31163796,T,C,0.786750,⋯,0.04203120,4.937758e-84,-0.1465020,0.0194096,4.422318e-14,6.000319e-18,20,BimmNaive,XXbac-BPG299F13.17_BimmNaive,9.116390e-12
HLA-C,6,HLA-C,31236526,rs9264664,6,31239227,C,T,0.348162,⋯,0.01381500,9.766027e-59,-0.3538780,0.0647373,4.593516e-08,3.616243e-33,20,BimmNaive,HLA-C_BimmNaive,4.142821e-06


In [33]:
fwrite(saige_df_sign, "/directflow/SCCGGroupShare/projects/anncuo/OneK1K/saige_eqtl/for_wei/smr/ra_saige_egenes.txt")