In [1]:
setwd("/frazer01/projects/PPC/analysis/ppc_eqtls")

source("scripts/packages.R"  )
source("scripts/input_data.R")
source("scripts/functions.R" )
source("scripts/coloc_functions.R")
source("scripts/3.4.coloc_adult/functions.R")

suppressMessages(library(coloc))

“[1m[22mThe `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
[36mℹ[39m Please use the `linewidth` argument instead.”
“[1m[22mThe `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
[36mℹ[39m Please use the `linewidth` argument instead.”


# Objective: Check numbers in manuscript

## Abstract

In [6]:
table = fread("reviews/tables/Table_S11_eQTL_Annotations.txt", data.table = F)
a = table %>% filter(category_annotation %in% c("ipsc_ppc-unique", "ipsc_ppc singleton"))
message(paste("Total iPSC-PPC-unique:", nrow(a)))


Total iPSC-PPC-unique: 1805



In [8]:
a = table %>% filter(tissue == "ipsc-ppc" & category_annotation %in% c("fetal-adult", "fetal-islet", "fetal-pancreas") &
                            (islet_egene_overlap %in% c("partial", "different") | pancreas_egene_overlap %in% c("partial", "different")))  

table(a$islet_egene_overlap, a$pancreas_egene_overlap)

table(a$eqtl_phenotype)

message(paste("Total shared with adult:", nrow(a), 
              signif(nrow(a) / nrow(table[table$tissue == "ipsc-ppc",]) * 100, 3), 
              nrow(table[table$tissue == "ipsc-ppc",])))


           
            different partial same zero
  different       109      68   45   46
  partial          57     259  125   30
  same             28     114    0    0
  zero             76      86    0    0


alternative_splicing      gene_expression 
                 386                  657 

Total shared with adult: 1043 12.9 8108



## Identification and characterization of gene and isoform eQTLs in fetal-like iPSC-PPCs

In [56]:
table = fread("reviews/tables/Table_S7_LeadSNP.txt", data.table = F) %>% distinct()

# Number of eGenes and eIsoforms
a = table %>% filter(discovery_order == 0 & egene == T)
table(a$eqtl_phenotype)

# Number of egQTls and eiQTLs
a = table %>% filter(egene == T)
table(a$eqtl_phenotype)

# Number of genes affected by either gene expression or isoform usage
a = table %>% filter(egene == T)
message(length(unique(a$gene_id)))
all = length(unique(a$gene_id))

b = suppressWarnings(dcast(data.frame(table(a$gene_id, a$eqtl_phenotype)), Var1 ~ Var2))

message(paste("Both:", length(unique(b[b$gene_expression != 0 & b$isoform_usage != 0,]$Var1)) ))
length(unique(b[b$gene_expression != 0 & b$isoform_usage != 0,]$Var1)) / all * 100

message(paste("Only gene exp:", length(unique(b[b$gene_expression != 0 & b$isoform_usage == 0,]$Var1)) ))
length(unique(b[b$gene_expression != 0 & b$isoform_usage == 0,]$Var1)) / all * 100

message(paste("Only isof use:", length(unique(b[b$gene_expression == 0 & b$isoform_usage != 0,]$Var1)) ))
length(unique(b[b$gene_expression == 0 & b$isoform_usage != 0,]$Var1)) / all * 100




gene_expression   isoform_usage 
           4065            4016 


gene_expression   isoform_usage 
           4433            4232 

5619

Using 'Freq' as value column. Use 'value.var' to override

Both: 1008



Only gene exp: 3057



Only isof use: 1554



In [63]:
table = fread("reviews/tables/Table_S10_PPC_Gene_Isoform_Colocalization.txt", data.table = F)

a = table %>% filter(max_model_pp >= 0.8 & likely_model %in% c("H3", "H4"))
table(a$likely_model)
summary(a$max_model_pp)

message(paste("# genes with H3 or H4 with their isoforms:", length(unique(a$gene_id.1)) ))
length(unique(a$gene_id.1)) / 1008 * 100

b = data.frame(table(a$gene_id.1, a$likely_model))
b = suppressMessages(suppressWarnings(dcast(b, Var1 ~ Var2)))

all = length(unique(a$gene_id.1))

message(paste("# genes with only H3:", length(unique(b[b$H3 == 0 & b$H4 != 0,]$Var1)) ))
length(unique(b[b$H3 == 0 & b$H4 != 0,]$Var1)) / all * 100

message(paste("# genes with only H3:", length(unique(b[b$H3 != 0 & b$H4 == 0,]$Var1)) ))
length(unique(b[b$H3 != 0 & b$H4 == 0,]$Var1)) / all * 100

message(paste("# genes with both H3 and H4:", length(unique(b[b$H3 != 0 & b$H4 != 0,]$Var1)) ))
length(unique(b[b$H3 != 0 & b$H4 != 0,]$Var1)) / all * 100


 H3  H4 
147 671 

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.8001  0.8998  0.9565  0.9373  0.9866  1.0000 

# genes with H3 or H4 with their isoforms: 410



# genes with only H3: 333



# genes with only H3: 38



# genes with both H3 and H4: 39



# Most fetal-like and adult islet eGenes show developmental stage specificity

In [75]:
table = fread("reviews/tables/Table_S10_PPC_Islets_Colocalization_eGene.txt", data.table = F)
head(table,2)

# Genes with at least one H4
message(paste("H4:", length(unique(table[table$likely_model == "H4",]$gene_id.1))))

# Genes with only H3 and no H4
a = table[table$likely_model == "H4",]
message(paste("H3:", length(unique(table[table$likely_model == "H3" & !table$gene_id.1 %in% a$gene_id.1,]$gene_id.1))))




Unnamed: 0_level_0,eqtl_id.1,eqtl_id.2,transcript_id.1,transcript_id.2,gene_id.1,gene_id.2,gene_name.1,gene_name.2,eqtl_phenotype.1,eqtl_phenotype.2,⋯,nsnps,PP.H0.abf,PP.H1.abf,PP.H2.abf,PP.H3.abf,PP.H4.abf,likely_model,max_model_pp,topsnp,topsnp_pp
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<chr>,<dbl>
1,ipsc-ppc_0_ENSG00000003249,islet_0_ENSG00000003249,ENSG00000003249,ENSG00000003249,ENSG00000003249,ENSG00000003249,DBNDD1,DBNDD1,gene_expression,gene_expression,⋯,1847,6.764578e-05,4.95235e-05,0.07052776,0.0507548,0.8786003,H4,0.8786003,VAR_16_90081660_A_G,0.78119063
2,ipsc-ppc_0_ENSG00000004534,islet_0_ENSG00000004534,ENSG00000004534,ENSG00000004534,ENSG00000004534,ENSG00000004534,RBM6,RBM6,gene_expression,gene_expression,⋯,1152,1.484825e-07,6.369398e-07,0.0230944,0.09818851,0.8787163,H4,0.8787163,VAR_3_50174848_G_A,0.07459922


H4: 701

H3: 94



# Developmental stage-unique and shared egQTLs 

In [79]:
table = fread("reviews/tables/Table_S11_eQTL_Annotations.txt", data.table = F) 

a = table %>% filter(eqtl_phenotype %like% "gene" & category_annotation %like% "singleton")
table(a$category_annotation)



ipsc_ppc singleton    islet singleton pancreas singleton 
               887                703               1927 

In [92]:
table = fread("reviews/tables/Table_S12_ModuleSummary_Gene.txt", data.table = F)
table2 = fread("reviews/tables/Table_S11_eQTL_Annotations.txt")
str(table)

a = table %>% filter(module_pass == T & !category_annotation %like% "ambi")
message(paste("# modules passed criteria:", length(unique(a$module_id)) ))

b = a %>% filter(number_assocs == 2)
message(paste("# mod, 2 egQTLs:", length(unique(b$module_id)), signif(length(unique(b$module_id)) / length(unique(a$module_id)) * 100, 4)))

b = a %>% filter(number_assocs > 2)
message(paste("# mod, > 2 egQTLs:", length(unique(b$module_id)), signif(length(unique(b$module_id)) / length(unique(a$module_id)) * 100, 4)))

summary(a[a$number_assocs > 2,]$number_assocs)

'data.frame':	2720 obs. of  12 variables:
 $ module_id             : chr  "GE_1_1" "GE_1_10" "GE_1_100" "GE_1_101" ...
 $ associations          : chr  "ipsc-ppc_0_ENSG00000116874,ipsc-ppc_0_ENSG00000231365,islet_0_ENSG00000116874,pancreas_0_ENSG00000231365,pancre"| __truncated__ "islet_0_ENSG00000007341,islet_0_ENSG00000134245,pancreas_0_ENSG00000007341,pancreas_0_ENSG00000116489,pancreas_"| __truncated__ "pancreas_0_ENSG00000225217,pancreas_0_ENSG00000244682" "pancreas_0_ENSG00000226026,pancreas_0_ENSG00000228852,ipsc-ppc_0_ENSG00000235501" ...
 $ number_assocs         : int  5 6 2 3 2 2 3 2 2 2 ...
 $ number_ipsc_ppc_assocs: int  2 1 0 1 0 0 0 0 0 0 ...
 $ number_islet_assocs   : int  1 2 0 0 0 0 0 0 0 0 ...
 $ number_pancreas_assocs: int  2 3 2 2 2 2 3 2 2 2 ...
 $ islet_egene_overlap   : chr  "partial" "partial" "" "zero" ...
 $ pancreas_egene_overlap: chr  "same" "partial" "" "different" ...
 $ egene_overlap_category: chr  "C" "C" "" "D" ...
 $ module_pass           : logi  FALSE 

# modules passed criteria: 1852

# mod, 2 egQTLs: 939 50.7

# mod, > 2 egQTLs: 913 49.3



   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  3.000   3.000   3.000   3.954   4.000  20.000 

In [93]:
b = a[a$category_annotation %like% "unique",] 
message(paste("# mod tissue-unique:", nrow(b), signif(nrow(b) / nrow(a) * 100, 3), nrow(a)))
table(a$category_annotation)

b = table2 %>% filter(category_annotation %like% "unique" & eqtl_phenotype == "gene_expression") %>% select(module_id, tissue) %>% distinct()
table(b$tissue)

b = table2 %>% filter(category_annotation %like% "unique" & eqtl_phenotype == "gene_expression") %>% distinct()
table(b$tissue)

# mod tissue-unique: 199 10.7 1852




   adult-shared     fetal-adult     fetal-islet  fetal-pancreas ipsc_ppc-unique 
            670             652              53             278              10 
   islet-unique pancreas-unique 
             30             159 


ipsc-ppc    islet pancreas 
      10       30      159 


ipsc-ppc    islet pancreas 
      21       62      354 

In [96]:
b = a[!a$category_annotation %like% "unique" & a$category_annotation != "ambiguous",] 
message(paste("# mod tissue-sharing:", nrow(b), signif(nrow(b) / nrow(a) * 100, 4), nrow(a)))
table(b$category_annotation)

# mod tissue-sharing: 1653 89.25 1852




  adult-shared    fetal-adult    fetal-islet fetal-pancreas 
           670            652             53            278 

In [97]:
a = table2 %>% 
    filter(!category_annotation %like% "failed" & 
           !category_annotation %like% "singleton" & 
           !category_annotation %like% "unique" & 
           category_annotation != "ambiguous" & 
           eqtl_phenotype == "gene_expression") %>% 
    select(module_id, category_annotation) %>% distinct()
table(a$category_annotation)

message(paste("# total fetal-adult sharing:", nrow(a[a$category_annotation %like% "fetal",])))



  adult-shared    fetal-adult    fetal-islet fetal-pancreas 
           670            652             53            278 

# total fetal-adult sharing: 983



In [99]:
a = table[!table$category_annotation %like% "failed" & 
        table$category_annotation != "adult-shared" & 
        !table$category_annotation %like% "unique" & 
        table$category_annotation != "ambiguous",] 
table(a$category_annotation)

message(paste("# mod fetal-adult sharing:", nrow(a), signif(nrow(a) / nrow(table) * 100, 4), nrow(table)))

a = table2 %>% filter(category_annotation %in% c("fetal-adult", "fetal-islets", "fetal-pancreas") & eqtl_phenotype == "gene_expression") %>% 
    select(eqtl_id, tissue) %>% distinct()
message(paste("how many eQTLs:"))
table(a$tissue)


   fetal-adult    fetal-islet fetal-pancreas 
           652             53            278 

# mod fetal-adult sharing: 983 36.14 2720

how many eQTLs:




ipsc-ppc    islet pancreas 
    1122      870     1394 

### conclusion

In [104]:
a = table2 %>% filter(category_annotation %in% c("ipsc_ppc singleton" , "ipsc_ppc-unique"))
message(paste("# total ppc unique:", length(unique(a$eqtl_id))))


table(a$eqtl_phenotype, a$eqtl_type)

table(a$eqtl_type)

# total ppc unique: 1805



                      
                       combinatorial singleton
  alternative_splicing           266       631
  gene_expression                 21       887


combinatorial     singleton 
          287          1518 

In [108]:
# iPSC-PPC eQTLs shared with adult
a = table2 %>% filter(category_annotation %in% c("fetal-adult", "fetal-islet", "fetal-pancreas") & tissue == "ipsc-ppc")
message(paste("# total ppc shared with adult:", nrow(a)))
table(a$eqtl_phenotype)

# total ppc shared with adult: 1977




alternative_splicing      gene_expression 
                 802                 1175 

In [110]:
a = table2 %>% filter(category_annotation %in% c("module_failed", "ambiguous") & tissue == "ipsc-ppc")
message(paste("# failed:", nrow(a)))
table(a$eqtl_phenotype)

# failed: 4326




alternative_splicing      gene_expression 
                2260                 2066 

# Regulatory plasticity in combinatorial egQTLs shared between fetal-like and adult pancreatic tissues

In [128]:
table = fread("reviews/tables/Table_S12_ModuleSummary_Gene.txt", data.table = F) %>% distinct()
table2 = fread("reviews/tables/Table_S11_eQTL_Annotations.txt", data.table = F)

a = table %>% filter(category_annotation %in% c("fetal-adult", "fetal-islet", "fetal-pancreas"))
message(paste("# total modules shared b/t ppc and adult:", nrow(a)))
table(a$category_annotation)
table(a$egene_overlap_category)

signif(table(a$egene_overlap_category) / nrow(a) * 100, 3)

b = a %>% filter(egene_overlap_category %in% c("A", "B"))
message(paste("# same genes:", nrow(b)))
nrow(b) / nrow(a) * 100

b = a %>% filter(egene_overlap_category %in% c("C", "D", "E"))
message(paste("# same genes:", nrow(b)))
nrow(b) / nrow(a) * 100

# total modules shared b/t ppc and adult: 983




   fetal-adult    fetal-islet fetal-pancreas 
           652             53            278 


  A   B   C   D   E 
200 305 350  88  40 


    A     B     C     D     E 
20.30 31.00 35.60  8.95  4.07 

# same genes: 505



# same genes: 478



### Another way to get category modules (sanity-check)

In [129]:
b = a %>% filter( (islet_egene_overlap == "zero" & pancreas_egene_overlap == "same") | (islet_egene_overlap == "same" & pancreas_egene_overlap == "zero") )
message(paste("# mod in A:", nrow(b), signif(nrow(b) / nrow(a) * 100, 4)))

b = table2 %>% filter(eqtl_phenotype == "gene_expression" & module_id %in% b$module_id) %>% select(module_id, gene_id) %>% distinct()
summary(data.frame(table(b$module_id))$Freq)

# mod in A: 200 20.35



   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00    1.00    1.00    1.01    1.00    2.00 

In [131]:
b = a %>% filter( (islet_egene_overlap == "same" & pancreas_egene_overlap == "same") | 
                  (islet_egene_overlap == "same" & pancreas_egene_overlap == "same") )
message(paste("# mod in B:", nrow(b), signif(nrow(b) / nrow(a) * 100, 4)))

b = table2 %>% filter(eqtl_phenotype == "gene_expression" & module_id %in% b$module_id) %>% select(module_id, gene_id) %>% distinct()
summary(data.frame(table(b$module_id))$Freq)

# mod in B: 305 31.03



   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00    1.00    1.00    1.01    1.00    2.00 

In [132]:
b = a %>% filter(islet_egene_overlap %like% "partial" | pancreas_egene_overlap %like% "partial")
message(paste("# mod in C:", nrow(b), signif(nrow(b) / nrow(a) * 100, 4)))

b = table2 %>% filter(eqtl_phenotype == "gene_expression" & module_id %in% b$module_id) %>% select(module_id, gene_id) %>% distinct()
summary(data.frame(table(b$module_id))$Freq)

# mod in C: 350 35.61



   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  2.000   2.000   2.000   2.831   3.000  12.000 

In [133]:
b = a %>% filter( (islet_egene_overlap == "different" & pancreas_egene_overlap %in% c("zero", "same")) | 
                  (islet_egene_overlap %in% c("zero", "same") & pancreas_egene_overlap == "different") )
message(paste("# mod in D:", nrow(b), signif(nrow(b) / nrow(a) * 100, 2)))

b = table2 %>% filter(eqtl_phenotype == "gene_expression" & module_id %in% b$module_id) %>% select(module_id, gene_id) %>% distinct()
summary(data.frame(table(b$module_id))$Freq)


# mod in D: 88 9



   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  2.000   2.000   2.000   2.284   2.000   5.000 

In [135]:
b = a %>% filter( islet_egene_overlap == "different" & pancreas_egene_overlap == "different" )
message(paste("# mod in E:", nrow(b), signif(nrow(b) / nrow(a) * 100, 2)))

b = table2 %>% filter(eqtl_phenotype == "gene_expression" & module_id %in% b$module_id) %>% select(module_id, gene_id) %>% distinct()
summary(data.frame(table(b$module_id))$Freq)

# mod in E: 40 4.1



   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   2.00    2.00    3.00    3.15    4.00    7.00 

In [140]:
mod1 = fread("reviews/tables/Table_S12_ModuleSummary_Gene.txt", data.table = F) %>% filter(!category_annotation %like% "module_fail") %>% distinct()
mod2 = fread("reviews/tables/Table_S12_ModuleSummary_AltSplicing.txt", data.table = F) %>% filter(!category_annotation %like% "module_fail") %>% distinct()
mod = rbind(mod1, mod2) %>% filter(category_annotation %in% c("fetal-adult", "fetal-islet", "fetal-pancreas") & egene_overlap_category %in% c("C", "D", "E")) 

message(paste("total modules shared b/t ppc and adult:", nrow(mod)))

table(table2[table2$module_id %in% mod$module_id,]$tissue)

total modules shared b/t ppc and adult: 655




ipsc-ppc    islet pancreas 
    1043      934     1111 

# Associations of developmental stage-unique eQTLs with pancreatic traits and disease phenotypes

In [141]:
gwas_coloc = fread("reviews/tables/Table_S14_GWAS_Colocalization.txt", data.table = F)
eqtl_annot = fread("reviews/tables/Table_S11_eQTL_Annotations.txt", data.table = F)

## Singleton

In [145]:
a = eqtl_annot %>% filter(category_annotation %like% "singleton")
message(paste("# total singletons:", length(unique(a$eqtl_id))))
table(a$eqtl_phenotype)

# total singletons: 6101




alternative_splicing      gene_expression 
                2584                 3517 

In [148]:
b = gwas_coloc[gwas_coloc$eqtl_id %in% a$eqtl_id,]
message(paste("# singletons with GWAS coloc:", length(unique(b$eqtl_id)), signif(length(unique(b$eqtl_id)) / length(unique(a$eqtl_id)) * 100, 4) ))
message(paste("# ppc-singleton:", length(unique(b[b$tissue == "ipsc-ppc",]$eqtl_id)), length(unique(a[a$tissue == "ipsc-ppc",]$eqtl_id)), signif(length(unique(b[b$tissue == "ipsc-ppc",]$eqtl_id)) / length(unique(a[a$tissue == "ipsc-ppc",]$eqtl_id)) * 100, 4) ))
message(paste("# islet-singleton:", length(unique(b[b$tissue == "islet",]$eqtl_id)), length(unique(a[a$tissue == "islet",]$eqtl_id)), signif(length(unique(b[b$tissue == "islet",]$eqtl_id)) / length(unique(a[a$tissue == "islet",]$eqtl_id)) * 100, 4) ))
message(paste("# panc-singleton:", length(unique(b[b$tissue == "pancreas",]$eqtl_id)), length(unique(a[a$tissue == "pancreas",]$eqtl_id)), signif(length(unique(b[b$tissue == "pancreas",]$eqtl_id)) / length(unique(a[a$tissue == "pancreas",]$eqtl_id)) * 100, 4) ))

table(unique(b[,c("eqtl_id", "tissue")])$tissue)


# singletons with GWAS coloc: 118 1.934

# ppc-singleton: 21 1518 1.383

# islet-singleton: 57 2225 2.562

# panc-singleton: 40 2358 1.696




ipsc-ppc    islet pancreas 
      21       57       40 

In [154]:
b = gwas_coloc[gwas_coloc$eqtl_id %in% a$eqtl_id,] %>% select(eqtl_id, trait_id) %>% distinct()
b = data.frame(table(b$eqtl_id)) %>% filter(Freq != 1)
message(paste("eQTls colocalized with > 1 traits:", length(unique(b$Var1))))
summary(b$Freq)


eQTls colocalized with > 1 traits: 38



   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  2.000   2.000   2.000   2.711   3.000   6.000 

In [161]:
b = gwas_coloc[gwas_coloc$eqtl_id %in% a$eqtl_id,]
message(paste("# total loci:", length(unique(b$gwas_locus_id))))
summary(b$cs_size)
message(paste("# loci with 1 SNP:", length(unique(b[b$cs_size == 1,]$gwas_locus_id))))
message(paste("# loci 2-10 SNP:", length(unique(b[b$cs_size >= 2 & b$cs_size <= 10,]$gwas_locus_id))))
message(paste("# loci >10 SNP:", length(unique(b[b$cs_size >= 10,]$gwas_locus_id))))

# Mean of SNPs per CS with > 10 SNPs
summary(b[b$cs_size >= 10,]$cs_size)

# total loci: 183



   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   1.00    4.00   12.00   26.88   35.00  227.00 

# loci with 1 SNP: 21

# loci 2-10 SNP: 63

# loci >10 SNP: 99



   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   11.0    18.0    33.0    46.3    55.0   227.0 

## Combinatorial

In [280]:
gwas_coloc = fread("reviews/tables/Table_S14_GWAS_Colocalization.txt", data.table = F)
eqtl_annot = fread("reviews/tables/Table_S11_eQTL_Annotations.txt", data.table = F)

In [164]:
a = eqtl_annot %>% filter(!category_annotation %like% "singleton" & !category_annotation %like% "failed" & category_annotation != "ambiguous")
b = a %>% select(module_id, eqtl_phenotype, category_annotation) %>% distinct()
message(paste("# total modules:", length(unique(b$module_id))))
table(b$eqtl_phenotype)


# total modules: 2832




alternative_splicing      gene_expression 
                 980                 1852 

In [173]:
b = gwas_coloc[gwas_coloc$module_id %in% a$module_id,] %>% select(module_id, eqtl_phenotype, category_annotation) %>% distinct()
message(paste("# modules with GWAS:", length(unique(b$module_id)), signif(length(unique(b$module_id)) / length(unique(a$module_id)) * 100, 4), length(unique(a$module_id))))
length(unique(b$module_id))
table(b$eqtl_phenotype)

b = gwas_coloc[gwas_coloc$module_id %in% a$module_id,]
message(paste("Total GWAS loci colocalized with module:", length(unique(b$gwas_locus_id))))

# modules with GWAS: 89 3.143 2832




alternative_splicing      gene_expression 
                  32                   57 

Total GWAS loci colocalized with module: 129



In [179]:
b = gwas_coloc[gwas_coloc$module_id %in% a$module_id,] %>% select(module_id, eqtl_phenotype, category_annotation) %>% distinct()
length(unique(b$module_id))
message(paste("# ppc-unique modules:", nrow(b[b$category_annotation == "ipsc_ppc-unique",])))
message(paste("# fetal-adult modules:", nrow(b[b$category_annotation %in% c("fetal-adult", "fetal-islet", "fetal-whole-pancreas"),])))
message(paste("# adult-only modules:", nrow(b[b$category_annotation %in% c("adult-shared", "islet-unique", "whole pancreas-unique"),])))
table(b$category_annotation)

# ppc-unique modules: 5

# fetal-adult modules: 36

# adult-only modules: 48




         adult-shared           fetal-adult           fetal-islet 
                   21                    21                     4 
 fetal-whole-pancreas       ipsc_ppc-unique          islet-unique 
                   11                     5                    22 
whole pancreas-unique 
                    5 

In [181]:
table(b[b$category_annotation == "ipsc_ppc-unique",]$eqtl_phenotype)


alternative_splicing 
                   5 

In [190]:
b = gwas_coloc[gwas_coloc$module_id %in% a$module_id,]
message(paste("# GWAS loci:", length(unique(b$gwas_locus_id))), appendLF = F)
message(paste("# modules:", length(unique(b[b$eqtl_type %like% "comb",]$module_id))))

c = eqtl_annot %>% filter(module_id %in% b$module_id)

message(paste("# total module:", length(unique(c$module_id))))
message(paste("# ppc eqtls / genes:", length(unique(c[c$tissue == "ipsc-ppc",]$eqtl_id)), length(unique(c[c$tissue == "ipsc-ppc",]$gene_id))))
message(paste("# islet eqtls / genes:", length(unique(c[c$tissue == "islet",]$eqtl_id)), length(unique(c[c$tissue == "islet",]$gene_id))))
message(paste("# pancreas eqtls/ genes:", length(unique(c[c$tissue == "pancreas",]$eqtl_id)), length(unique(c[c$tissue == "pancreas",]$gene_id))))



# GWAS loci: 129
# modules: 89

# total module: 89

# ppc eqtls / genes: 49 41

# islet eqtls / genes: 98 75

# pancreas eqtls/ genes: 71 69



In [193]:
b = gwas_coloc[gwas_coloc$module_id %in% a$module_id & gwas_coloc$used_to_finemap == T,]
message(paste("# GWAS loci:", length(unique(b$gwas_locus_id))))
message(paste("# GWAS loci with 1 SNP:", length(unique(b[b$cs_size == 1,]$gwas_locus_id))))
message(paste("# GWAS loci with 2-10 SNP:", length(unique(b[b$cs_size >= 2 & b$cs_size <= 10,]$gwas_locus_id))))
message(paste("# GWAS loci with >10 SNP:", length(unique(b[b$cs_size > 10,]$gwas_locus_id))))

# Avg snps for CS with > 10 snps
summary(b[b$cs_size > 10,]$cs_size)

# GWAS loci: 129

# GWAS loci with 1 SNP: 15

# GWAS loci with 2-10 SNP: 54

# GWAS loci with >10 SNP: 60



   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  11.00   16.75   22.50   31.77   42.00  106.00 

# Spatiotemporally informed eQTL resource provides novel insights into GWAS signals

In [297]:
a = gwas_coloc %>% filter(eqtl_type == "combinatorial" & category_annotation != "ambiguous" & !category_annotation %like% "fail")
a = a %>% filter(category_annotation %in% c("fetal-islet", "fetal-whole-pancreas", "fetal-adult"))

mod1 = fread("reviews/tables/Table_S12_ModuleSummary_Gene.txt", data.table = F) 
mod2 = fread("reviews/tables/Table_S12_ModuleSummary_AltSplicing.txt", data.table = F)
mod = rbind(mod1, mod2)

b1 = mod %>% filter(module_id %in% a$module_id)
table(b1$category_annotation)

message(paste("Total:", length(unique(b1$module_id))))
message(paste("Same:", length(unique(b1[b1$egene_overlap_category %in% c("A", "B"),]$module_id)), length(unique(b1[b1$egene_overlap_category %in% c("A", "B"),]$module_id)) / length(unique(b$module_id)) * 100))
message(paste("Partial:", length(unique(b1[b1$egene_overlap_category %in% c("C"),]$module_id)), length(unique(b1[b1$egene_overlap_category %in% c("C"),]$module_id)) / length(unique(b1$module_id)) * 100))
message(paste("Different:", length(unique(b1[b1$egene_overlap_category %in% c("D", "E"),]$module_id)), length(unique(b1[b1$egene_overlap_category %in% c("D", "E"),]$module_id)) / length(unique(b1$module_id)) * 100))

table(b1$egene_overlap_category)


   fetal-adult    fetal-islet fetal-pancreas 
            21              4             11 

Total: 36

Same: 30 83.3333333333333

Partial: 1 2.77777777777778

Different: 5 13.8888888888889




 A  B  C  D  E 
11 19  1  4  1 

# eASQTLs

In [215]:
eqtl_annot = fread("reviews/tables/Table_S11_eQTL_Annotations.txt", data.table = F) 
eqtl_annot_as = eqtl_annot %>% filter(eqtl_phenotype %like% "alt")

message("# easQTLs per tissue:")
table(eqtl_annot_as$tissue)

# easQTLs per tissue:




ipsc-ppc    islet pancreas 
    3959     4939     2077 

In [222]:
pairs = fread("reviews/tables/Table_S10_GraphInput_AltSplicing.txt", data.table = F)
message("# easQTL pairs")
nrow(pairs)

# easQTL pairs



In [213]:
message("# easQTL singletons")
data.frame(table(eqtl_annot_as[eqtl_annot_as$category_annotation %like% "single",]$category_annotation))

# easQTL singletons



Var1,Freq
<fct>,<int>
ipsc_ppc singleton,631
islet singleton,1522
pancreas singleton,431


In [231]:
eqtl_annot_as_pass = eqtl_annot_as %>% filter(!category_annotation %like% "ambi" & !category_annotation %like% "failed")

message("# easQTL modules")
length(unique(eqtl_annot_as_pass[!eqtl_annot_as_pass$category_annotation %like% "single",]$module_id))
length(unique(eqtl_annot_as_pass[!eqtl_annot_as_pass$eqtl_type %like% "single",]$module_id))

mod = fread("reviews/tables/Table_S12_ModuleSummary_AltSplicing.txt", data.table = F)  %>% filter(!category_annotation %like% "module_failed" & !category_annotation %like% "ambi")
nrow(mod)


# easQTL modules



In [236]:
message("# tissue-unique easQTL modules")
length(unique(eqtl_annot_as_pass[eqtl_annot_as_pass$category_annotation %like% "unique",]$module_id))
table(mod[mod$category_annotation %like% "unique",]$category_annotation)

message("# easQTL in tissue-unique easQTL modules")
a = eqtl_annot_as_pass[eqtl_annot_as_pass$category_annotation %like% "unique",]
table(a$tissue)

# tissue-unique easQTL modules




ipsc_ppc-unique    islet-unique pancreas-unique 
            124             203              17 

# easQTL in tissue-unique easQTL modules




ipsc-ppc    islet pancreas 
     266      452       37 

In [241]:
message("# adult-shared easQTL modules")
length(unique(eqtl_annot_as_pass[eqtl_annot_as_pass$category_annotation == "adult-shared",]$module_id))
length(unique(mod[mod$category_annotation == "adult-shared",]$module_id))

# adult-shared easQTL modules



In [260]:
message("# modules shared between iPSC-PPC and adult")
a = eqtl_annot_as_pass[eqtl_annot_as_pass$category_annotation %in% c("fetal-islet", "fetal-pancreas", "fetal-adult"),] %>% select(category_annotation, module_id) %>% distinct()
table(a$category_annotation)
length(unique(a$module_id))


a = mod[mod$category_annotation %in% c("fetal-islet", "fetal-pancreas", "fetal-adult"),]
table(a$category_annotation)
nrow(a)

message("# easQTLs per tissue in the 411 modules")
a = eqtl_annot_as_pass[eqtl_annot_as_pass$category_annotation %in% c("fetal-islet", "fetal-pancreas", "fetal-adult"),]
table(a$tissue)



# modules shared between iPSC-PPC and adult




   fetal-adult    fetal-islet fetal-pancreas 
           214            139             58 


   fetal-adult    fetal-islet fetal-pancreas 
           214            139             58 

# easQTLs per tissue in the 411 modules




ipsc-ppc    islet pancreas 
     802      561      318 

In [267]:
message("# iPSC-PPC-unique easQTLs")
a = eqtl_annot_as_pass %>% filter(category_annotation %like% "single" | category_annotation %like% "unique") %>% filter(tissue == "ipsc-ppc")
nrow(a)

table(a$eqtl_type)

message("# iPSC-PPC-unique easQTLs shared with adult")
a = eqtl_annot_as_pass %>% filter(!category_annotation %like% "single"  & !category_annotation %like% "unique") %>% filter(tissue == "ipsc-ppc")
nrow(a)

message("# iPSC-PPC failed criteria")
a = eqtl_annot_as %>% filter(category_annotation %like% "ambi" | category_annotation %like% "failed") %>% filter(tissue == "ipsc-ppc")
nrow(a)

# iPSC-PPC-unique easQTLs




combinatorial     singleton 
          266           631 

# iPSC-PPC-unique easQTLs shared with adult



# iPSC-PPC failed criteria



## Characterization of fetal-adult-shared eASQTL modules in iPSC-PPC

In [279]:
a = mod[mod$category_annotation %in% c("fetal-islet", "fetal-pancreas", "fetal-adult"),]
table(a$category_annotation)
nrow(a)
table(a$egene_overlap_category)

nrow(a[a$egene_overlap_category %in% c("C", "D", "E"),])

a = eqtl_annot_as_pass %>% filter(module_id %in% a[a$egene_overlap_category %in% c("C", "D", "E"),]$module_id)
table(a$tissue)




   fetal-adult    fetal-islet fetal-pancreas 
           214            139             58 


  A   B   C   D   E 
149  85  93  57  27 


ipsc-ppc    islet pancreas 
     386      361      184 

# Methods

In [307]:
gwas_coloc = fread("reviews/tables/Table_S14_GWAS_Colocalization.txt", data.table = F) %>% filter(!category_annotation %like% "ambiguous" & !category_annotation %like% "failed")

message("# total gwas loci that colocalized with eQTL")
length(unique(gwas_coloc$gwas_locus_id))

message("# loci by eqtl type")
a = gwas_coloc %>% select(gwas_locus_id, eqtl_type) %>% distinct()
table(a$eqtl_type)

# total gwas loci that colocalized with eQTL



# loci by eqtl type




combinatorial     singleton 
          129           183 

In [315]:
message("# gwas loci in cred sets")
cred_sets = fread("reviews/tables/Table_S15_99CredibleSet_GWAS.txt", data.table = F)
length(unique(cred_sets$gwas_locus_id))

gwas_coloc = fread("reviews/tables/Table_S14_GWAS_Colocalization.txt", data.table = F) %>% filter(!category_annotation %like% "ambiguous" & !category_annotation %like% "failed")

message("check if there are non-overlapping loci")
cred_sets %>% filter(!gwas_locus_id %in% gwas_coloc$gwas_locus_id)
gwas_coloc %>% filter(!gwas_locus_id %in% cred_sets$gwas_locus_id)



# gwas loci in cred sets



check if there are non-overlapping loci

“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”


gwas_locus_id,eqtl_id,trait_id,description,snp,pvalues.df1,MAF.df1,N.df1,V.df1,z.df1,⋯,lABF.df1,pvalues.df2,MAF.df2,N.df2,V.df2,z.df2,r.df2,lABF.df2,internal.sum.lABF,SNP.PP.H4
<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>


“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”
“number of rows of result is not a multiple of vector length (arg 2)”


gwas_locus_id,eqtl_id,transcript_id,gene_id,gene_name,eqtl_phenotype,tissue,eqtl_type,module_id,trait_id,⋯,likely_model,topsnp,topsnp_pp,topsnp_gwas_pval,topsnp_eqtl_pval,islet_egene_overlap,pancreas_egene_overlap,category_annotation,used_to_finemap,cs_size
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<lgl>,<int>
