In [1]:
library(data.table)
library(qvalue)

In [2]:
## SLE (systemic lupus erythematosus) SMR (summarised Mendelian Randomisation) results

In [3]:
# Matrix eQTL results
matrix_eqtl_smr_results_dir = "/directflow/SCCGGroupShare/projects/angxue/proj/SAIGE-eQTL/SMR/"
matrix_sle_file = paste0(matrix_eqtl_smr_results_dir, "sle_all_14_celltypes_all_smr.txt")
matrix_df = fread(matrix_sle_file)
head(matrix_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,p_GWAS,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
FCRL5,1,FCRL5,157483167,rs12741984,1,157534818,G,A,0.787234,⋯,0.9282,0.11378,0.00486738,7.500497e-121,-0.0500968,0.554584,0.9280233,0.7014684,20,BimmNaive
RPS8,1,RPS8,45240923,rs12120833,1,45241285,C,G,0.850097,⋯,0.4884,-0.105432,0.00500701,1.980368e-98,-0.514077,0.742114,0.4884856,0.909178,20,BimmNaive


In [4]:
# Calculate number of unique genes, and combination of genes + celltypes
length(unique(matrix_df$Gene))
matrix_df$comb = paste0(matrix_df$Gene,"_",matrix_df$Cell_type)
length(unique(matrix_df$comb))
head(matrix_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
FCRL5,1,FCRL5,157483167,rs12741984,1,157534818,G,A,0.787234,⋯,0.11378,0.00486738,7.500497e-121,-0.0500968,0.554584,0.9280233,0.7014684,20,BimmNaive,FCRL5_BimmNaive
RPS8,1,RPS8,45240923,rs12120833,1,45241285,C,G,0.850097,⋯,-0.105432,0.00500701,1.980368e-98,-0.514077,0.742114,0.4884856,0.909178,20,BimmNaive,RPS8_BimmNaive


In [5]:
# p<0.05/M, where M is the number of tests (number of genes)

In [6]:
nrow(matrix_df[matrix_df$p_SMR < 0.05/length(unique(matrix_df$Gene)),]) # M = number of genes
nrow(matrix_df[matrix_df$p_SMR < 0.05/length(unique(matrix_df$comb)),]) # M = number of genes-ct combinations

In [7]:
# significant results at FDR<5% instead (using qvalue)
matrix_df$qv = qvalue(matrix_df$p_SMR)$qvalues
nrow(matrix_df[matrix_df$qv<0.05,])

In [8]:
# SAIGE-QTL results
saige_eqtl_smr_results_dir = "/directflow/SCCGGroupShare/projects/angxue/proj/SAIGE-eQTL/SMR/saige_eQTL/"
saige_sle_file = paste0(saige_eqtl_smr_results_dir, "sle_all_14_celltypes_all_smr.txt")
saige_df = fread(saige_sle_file)
head(saige_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,p_GWAS,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>
NOC2L,1,NOC2L,879584,rs3748595,1,887560,A,C,0.0672147,⋯,0.141,-0.15302,0.0352961,1.4555e-05,0.992681,0.712232,0.1633899,0.4676291,3,BimmNaive
C1orf86,1,C1orf86,2115903,rs11587831,1,2110848,T,G,0.7853,⋯,0.06286,-0.115809,0.0148762,6.980209e-15,-1.08195,0.597518,0.07017984,0.9955862,8,BimmNaive


In [9]:
length(unique(saige_df$Gene))
saige_df$comb = paste0(saige_df$Gene,"_",saige_df$Cell_type)
length(unique(saige_df$comb))
head(saige_df,2)

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,b_eQTL,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>
NOC2L,1,NOC2L,879584,rs3748595,1,887560,A,C,0.0672147,⋯,-0.15302,0.0352961,1.4555e-05,0.992681,0.712232,0.1633899,0.4676291,3,BimmNaive,NOC2L_BimmNaive
C1orf86,1,C1orf86,2115903,rs11587831,1,2110848,T,G,0.7853,⋯,-0.115809,0.0148762,6.980209e-15,-1.08195,0.597518,0.07017984,0.9955862,8,BimmNaive,C1orf86_BimmNaive


In [10]:
# significant results at p<0.05/M with M either # genes or # gene-celltype combinations
nrow(saige_df[saige_df$p_SMR < 0.05/length(unique(saige_df$Gene)),])
nrow(saige_df[saige_df$p_SMR < 0.05/length(unique(saige_df$comb)),])

In [16]:
saige_df[saige_df$p_SMR < 0.05/length(unique(saige_df$Gene)),]

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb,qv
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>
HLA-DRB1,6,HLA-DRB1,32546546,rs74927567,6,32597064,G,A,0.394101,⋯,0.0392904,5.966581e-13,-1.05605,0.228554,3.826358e-06,0.005936424,20,NKact,HLA-DRB1_NKact,0.05481379


In [11]:
# significant results at FDR<5% instead (using qvalue)
saige_df$qv = qvalue(saige_df$p_SMR)$qvalues
nrow(saige_df[saige_df$qv<0.05,])

In [12]:
# Overlap (everything tested)

matrix_sign_combs = matrix_df[matrix_df$qv < 0.05,"comb"]$comb
length(matrix_sign_combs)

saige_sign_combs = saige_df[saige_df$qv < 0.05,"comb"]$comb
length(saige_sign_combs)

length(saige_sign_combs[saige_sign_combs %in% matrix_sign_combs])

In [13]:
# Overlap (common gene-cell type combinations only)
matrix_combs = unique(matrix_df$comb)
saige_combs = unique(saige_df$comb)
common_combs = saige_combs[saige_combs %in% matrix_combs]
length(common_combs)

In [14]:
matrix_df_common = matrix_df[matrix_df$comb %in% common_combs,]
matrix_df_common$qv = qvalue(matrix_df_common$p_SMR)$qvalues
saige_df_common = saige_df[saige_df$comb %in% common_combs,]
saige_df_common$qv = qvalue(saige_df_common$p_SMR)$qvalues

In [15]:
matrix_sign_combs = matrix_df_common[matrix_df_common$qv < 0.05,"comb"]$comb
length(matrix_sign_combs)

saige_sign_combs = saige_df_common[saige_df_common$qv < 0.05,"comb"]$comb
length(saige_sign_combs)

length(saige_sign_combs[saige_sign_combs %in% matrix_sign_combs])

In [17]:
saige_df_common[saige_df_common$qv < 0.05,]

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb,qv
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>
HLA-A,6,HLA-A,29909037,rs2844810,6,29923580,G,C,0.413926,⋯,0.0112263,1.3794379999999999e-44,1.39382,0.358714,0.0001020795,9.597639e-06,20,BimmNaive,HLA-A_BimmNaive,0.0498162
FAM167A,8,FAM167A,11278972,rs4840568,8,11351019,G,A,0.728723,⋯,0.124259,3.978588e-18,-0.210294,0.0536299,8.810483e-05,0.7108002,20,BimmNaive,FAM167A_BimmNaive,0.0498162
BLK,8,BLK,11351510,rs2736338,8,11341883,A,C,0.739845,⋯,0.0137612,1.817536e-38,1.23096,0.30962,7.016493e-05,0.5842275,20,Bmem,BLK_Bmem,0.0498162
HLA-B,6,HLA-B,31321649,rs9264579,6,31235746,G,A,0.453578,⋯,0.0128697,4.068664e-81,-0.831398,0.201122,3.567982e-05,1.187305e-07,20,CD4all,HLA-B_CD4all,0.0498162
RNF5,6,RNF5,32146131,rs3131297,6,32141005,A,T,0.773694,⋯,0.0172581,1.089559e-09,-4.09221,0.955984,1.863741e-05,2.158111e-05,19,CD4all,RNF5_CD4all,0.0498162
HLA-DMA,6,HLA-DMA,32916390,rs76088152,6,32946322,G,A,0.925532,⋯,0.0310661,1.182231e-14,-2.12332,0.493735,1.703934e-05,0.005374339,20,CD4all,HLA-DMA_CD4all,0.0498162
RPS10,6,RPS10,34385231,rs9380433,6,34377626,C,T,0.181335,⋯,0.00941291,3.944415e-92,-1.47212,0.37494,8.626438e-05,0.000742854,20,CD4all,RPS10_CD4all,0.0498162
HLA-B,6,HLA-B,31321649,rs9264579,6,31235746,G,A,0.453578,⋯,0.0133946,6.45731e-77,-0.820872,0.198836,3.653178e-05,1.179006e-06,20,CD4effCM,HLA-B_CD4effCM,0.0498162
RPS10,6,RPS10,34385231,rs9380433,6,34377626,C,T,0.181335,⋯,0.0100703,1.23194e-75,-1.52217,0.389295,9.227231e-05,0.0006065366,20,CD4effCM,RPS10_CD4effCM,0.0498162
HLA-B,6,HLA-B,31321649,rs9264579,6,31235746,G,A,0.453578,⋯,0.01252,1.838895e-80,-0.858176,0.20764,3.58057e-05,4.446047e-07,20,CD8all,HLA-B_CD8all,0.0498162


In [19]:
## SAIGE-QTL SMR gene list
saige_df_sign = saige_df[saige_df$qv<0.05,]
saige_df_sign

probeID,ProbeChr,Gene,Probe_bp,topSNP,topSNP_chr,topSNP_bp,A1,A2,Freq,⋯,se_eQTL,p_eQTL,b_SMR,se_SMR,p_SMR,p_HEIDI,nsnp_HEIDI,Cell_type,comb,qv
<chr>,<int>,<chr>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<chr>,<dbl>
HLA-A,6,HLA-A,29909037,rs2844810,6,29923580,G,C,0.413926,⋯,0.0112263,1.3794379999999999e-44,1.39382,0.358714,0.0001020795,9.597639e-06,20,BimmNaive,HLA-A_BimmNaive,0.0498162
FAM167A,8,FAM167A,11278972,rs4840568,8,11351019,G,A,0.728723,⋯,0.124259,3.978588e-18,-0.210294,0.0536299,8.810483e-05,0.7108002,20,BimmNaive,FAM167A_BimmNaive,0.0498162
BLK,8,BLK,11351510,rs2736338,8,11341883,A,C,0.739845,⋯,0.0137612,1.817536e-38,1.23096,0.30962,7.016493e-05,0.5842275,20,Bmem,BLK_Bmem,0.0498162
HLA-B,6,HLA-B,31321649,rs9264579,6,31235746,G,A,0.453578,⋯,0.0128697,4.068664e-81,-0.831398,0.201122,3.567982e-05,1.187305e-07,20,CD4all,HLA-B_CD4all,0.0498162
RNF5,6,RNF5,32146131,rs3131297,6,32141005,A,T,0.773694,⋯,0.0172581,1.089559e-09,-4.09221,0.955984,1.863741e-05,2.158111e-05,19,CD4all,RNF5_CD4all,0.0498162
HLA-DMA,6,HLA-DMA,32916390,rs76088152,6,32946322,G,A,0.925532,⋯,0.0310661,1.182231e-14,-2.12332,0.493735,1.703934e-05,0.005374339,20,CD4all,HLA-DMA_CD4all,0.0498162
RPS10,6,RPS10,34385231,rs9380433,6,34377626,C,T,0.181335,⋯,0.00941291,3.944415e-92,-1.47212,0.37494,8.626438e-05,0.000742854,20,CD4all,RPS10_CD4all,0.0498162
HLA-B,6,HLA-B,31321649,rs9264579,6,31235746,G,A,0.453578,⋯,0.0133946,6.45731e-77,-0.820872,0.198836,3.653178e-05,1.179006e-06,20,CD4effCM,HLA-B_CD4effCM,0.0498162
RPS10,6,RPS10,34385231,rs9380433,6,34377626,C,T,0.181335,⋯,0.0100703,1.23194e-75,-1.52217,0.389295,9.227231e-05,0.0006065366,20,CD4effCM,RPS10_CD4effCM,0.0498162
HLA-B,6,HLA-B,31321649,rs9264579,6,31235746,G,A,0.453578,⋯,0.01252,1.838895e-80,-0.858176,0.20764,3.58057e-05,4.446047e-07,20,CD8all,HLA-B_CD8all,0.0498162


In [20]:
fwrite(saige_df_sign, "/directflow/SCCGGroupShare/projects/anncuo/OneK1K/saige_eqtl/for_wei/smr/sle_saige_egenes.txt")