In [15]:
source("~/software/notebook_assist/packages.R")
source("~/software/notebook_assist/functions.R")
setwd("~/projects/Resource/iPSCORE_Multi-QTL_Resource/SourceData/")

### load SourceData 

a_data = fread("SOURCEDATA.FIGURE3A.txt",sep="\t",data.table=F)
b_data = fread("SOURCEDATA.FIGURE3B.txt",sep="\t",data.table=F)
c_data = fread("SOURCEDATA.FIGURE3C.txt",sep="\t",data.table=F)

## plot graphs

theme_bw2 = theme_bw() +
    theme(
        axis.text = element_text(size = 8),
        strip.text = element_text(size = 8),
        axis.title = element_text(size = 8),
        legend.text = element_text(size = 8),
        legend.title = element_text(size = 8),
        legend.position = "top"
    )

a_data$`iPSCORE Tissue` = ifelse(a_data$Tissue1 %in% c("iPSC","CVPC","PPC"), a_data$Tissue1,
                                  ifelse(a_data$Tissue2 %in% c("iPSC","CVPC","PPC"), a_data$Tissue2, NA))
a_data$`iPSCORE Tissue` = factor(a_data$`iPSCORE Tissue`, levels = c("iPSC","CVPC","PPC"))
a_data$`QTL Specificity` = factor(a_data$Type, levels = c("EDev","Adult","Shared"))
a_data %>% group_by(`QTL Specificity`) %>% summarise(mean_r = mean(r2))
    
a = ggplot(a_data, aes(`QTL Specificity`, r2, group=`QTL Specificity`)) + theme_bw2 + 
        ylim(c(0,0.4)) + xlab("Specificity") +
        ylab(bquote('Correlation'~(r^2))) +  geom_jitter(size=0.75) 
    
t.test(a_data$r2[ a_data$`QTL Specificity` == "EDev"],
      a_data$r2[ a_data$`QTL Specificity` == "Adult"])$p.value

t.test(a_data$r2[ a_data$`QTL Specificity` == "EDev"],
      a_data$r2[ a_data$`QTL Specificity` == "Shared"])$p.value

t.test(a_data$r2[ a_data$`QTL Specificity` == "Adult"],
      a_data$r2[ a_data$`QTL Specificity` == "Shared"])$p.value

b_data$Tissue = factor(b_data$Tissue, levels = c("iPSC","CVPC","PPC"))
b_data$Stage = factor(b_data$Stage, levels = c("EDev","Shared","No Association"))
b_data[ b_data$Stage == "EDev",]
b = ggplot(b_data[ b_data$Stage == "EDev",], aes(Tissue, Percent, group=Tissue)) + theme_bw2 + 
        ylab("Percent\nEDev-specific") + ylim(c(0,20)) +
        geom_bar(stat="identity") +
        geom_text(aes(label = Freq), vjust = 0, size=3)

c_data$Tissue = factor(c_data$Tissue, levels = c("iPSC","CVPC","PPC"))
c_data$Stage = factor(c_data$Stage_Specificity, levels = c("EDev","Shared"))


c = ggplot(c_data, aes(Tissue, abs(Effect_Size), fill = Stage)) + theme_bw2 + 
            theme(legend.title = element_text(size = 0)) + 
            ylab("Absolute\nEffect Size") + xlab("Tissue") +
            ylim(c(0,3)) +
            geom_boxplot(outlier.shape =  NA)



QTL Specificity,mean_r
<fct>,<dbl>
EDev,0.001222009
Adult,0.006931362
Shared,0.278946015


Unnamed: 0_level_0,Freq,Tissue,Stage,Total_eGenes,Percent
Unnamed: 0_level_1,<int>,<fct>,<fct>,<int>,<dbl>
1,855,CVPC,EDev,4837,17.676246
4,951,iPSC,EDev,9012,10.552597
7,240,PPC,EDev,5456,4.398827


In [20]:
psize(2,5)
png("~/projects/Resource/figures/Revisions/Figure3.png",units="in",res=300,height=5,width=2)
plot_grid(a,b,c,labels=c("a","b","c"),nrow=3)
dev.off()

“[1m[22mRemoved 32 rows containing non-finite outside the scale range
(`stat_boxplot()`).”
