In [1]:
suppressMessages(library(tidyverse))
suppressMessages(library(data.table))
suppressMessages(library(glue))
suppressMessages(library(ggpointdensity))
suppressMessages(library(magrittr))

In [2]:
options(repr.plot.width=5, repr.plot.height=5, repr.plot.res=180)

In [3]:
suppressMessages(library(cowplot))
theme_set(theme_cowplot(font_size = 14))

In [4]:
suppressMessages(library(patchwork))
suppressMessages(library(ggrepel))

In [5]:
getwd()

In [6]:
setwd("~/cdai/SpliFi/analysis")

In [7]:
contr = 'Brain-Cortex_v_Muscle-Skeletal'
rds = paste0('../data/ds_v_dge/', contr, '_data.rds')

In [8]:
data <- readRDS(rds)

In [9]:
names(data)

In [10]:
ds <- data$ds
dge <- data$dge

In [11]:
ds[1:5]
dge[1:5]

intron,cluster,itype,ctype,gene_name,gene_id,logef,Muscle-Skeletal,Brain-Cortex,deltapsi,status,loglr,df,p,p.adjust
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<int>,<dbl>,<dbl>
chr1:930336:931038:clu_11_+,chr1:clu_11_+,PR,PR,SAMD11,ENSG00000187634.11,0.001370763,0.5276685,0.5283518,0.0006832305,Success,0.0001028596,1,0.9885564,0.9923067
chr1:931089:935771:clu_11_+,chr1:clu_11_+,PR,PR,SAMD11,ENSG00000187634.11,-0.001370763,0.4723315,0.4716482,-0.0006832305,Success,0.0001028596,1,0.9885564,0.9923067
chr1:939129:939271:clu_12_+,chr1:clu_12_+,PR,PR,SAMD11,ENSG00000187634.11,0.033612392,0.3507898,0.3662486,0.0154587644,Success,0.0288890045,1,0.810043,0.846778
chr1:939129:939274:clu_12_+,chr1:clu_12_+,PR,PR,SAMD11,ENSG00000187634.11,-0.033612392,0.6492102,0.6337514,-0.0154587644,Success,0.0288890045,1,0.810043,0.846778
chr1:939412:941143:clu_13_+,chr1:clu_13_+,PR,PR,SAMD11,ENSG00000187634.11,-0.022419411,0.2376936,0.2324409,-0.0052526823,Success,1.3652655309,2,0.2553129,0.3191507


gene_id,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ENSG00000168530.15,105416.002,-13.50923,0.2121247,-63.68533,0,0
ENSG00000109061.9,245404.863,-13.502,0.2338176,-57.74586,0,0
ENSG00000125414.18,184805.529,-13.45354,0.2456384,-54.76969,0,0
ENSG00000240045.1,18618.845,-13.419,0.1741738,-77.04373,0,0
ENSG00000250978.5,2387.255,-13.38848,0.3289347,-40.70254,0,0


In [12]:
dge[1:5]

gene_id,baseMean,log2FoldChange,lfcSE,stat,pvalue,padj
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
ENSG00000168530.15,105416.002,-13.50923,0.2121247,-63.68533,0,0
ENSG00000109061.9,245404.863,-13.502,0.2338176,-57.74586,0,0
ENSG00000125414.18,184805.529,-13.45354,0.2456384,-54.76969,0,0
ENSG00000240045.1,18618.845,-13.419,0.1741738,-77.04373,0,0
ENSG00000250978.5,2387.255,-13.38848,0.3289347,-40.70254,0,0


In [13]:
ds[cluster == 'chr1:clu_261_+']

intron,cluster,itype,ctype,gene_name,gene_id,logef,Muscle-Skeletal,Brain-Cortex,deltapsi,status,loglr,df,p,p.adjust
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<int>,<dbl>,<dbl>
chr1:15651895:15656420:clu_261_+,chr1:clu_261_+,UP,"PR,UP",DDI2,ENSG00000197312.11,-2.090066,0.513989,0.01591951,-0.4980695,Success,34.69655,1,8.067068e-17,3.546202e-16
chr1:15651895:15656616:clu_261_+,chr1:clu_261_+,PR,"PR,UP",DDI2,ENSG00000197312.11,2.090066,0.486011,0.98408049,0.4980695,Success,34.69655,1,8.067068e-17,3.546202e-16


In [15]:
inner_join(
  x = ds[ctype == 'PR,UP' & itype == 'UP', -c(7:9, 11:13)],
  y = dge[, .(gene_id, log2FoldChange, dge_p = pvalue, dge_padj = padj)],
  by = 'gene_id'
) %>% 
.[`p.adjust` < 1e-5 & abs(deltapsi) > .3 & abs(log2FoldChange) > 1 & dge_padj < 1e-3]

intron,cluster,itype,ctype,gene_name,gene_id,deltapsi,p,p.adjust,log2FoldChange,dge_p,dge_padj
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr1:6212903:6213065:clu_111_+,chr1:clu_111_+,UP,"PR,UP",RNF207,ENSG00000158286.12,0.7185528,1.446719e-43,1.414418e-42,1.766427,1.430305e-50,3.664874e-50
chr1:15651895:15656420:clu_261_+,chr1:clu_261_+,UP,"PR,UP",DDI2,ENSG00000197312.11,-0.4980695,8.067068e-17,3.546202e-16,-2.509378,1.466086e-259,1.525591e-258
chr1:43583093:43588730:clu_690_+,chr1:clu_690_+,UP,"PR,UP",PTPRF,ENSG00000142949.16,-0.3574384,1.787567e-24,1.032042e-23,3.701421,5.925958e-171,3.860454e-170
chr1:109485509:109486056:clu_1179_+,chr1:clu_1179_+,UP,"PR,UP",ATXN7L2,ENSG00000162650.16,-0.3559785,5.613616e-20,2.780204e-19,-1.245866,3.844560e-38,8.511333e-38
chr1:109621266:109625661:clu_1187_+,chr1:clu_1187_+,UP,"PR,UP",AMPD2,ENSG00000116337.15,0.3965847,6.430093e-114,2.265287e-112,3.732341,0.000000e+00,0.000000e+00
chr1:151728463:151728765:clu_1455_+,chr1:clu_1455_+,UP,"PR,UP",RIIAD1,ENSG00000178796.12,-0.4157512,1.047935e-90,2.596041e-89,3.300104,4.750008e-91,1.803052e-90
chr1:180073908:180075021:clu_1855_+,chr1:clu_1855_+,UP,"PR,UP",CEP350,ENSG00000135837.15,-0.7107416,2.178390e-57,2.830769e-56,-1.862379,9.326018e-129,4.643086e-128
chr1:31655502:31656097:clu_2854_-,chr1:clu_2854_-,UP,"PR,UP",COL16A1,ENSG00000084636.17,0.3513873,1.337340e-48,1.458874e-47,2.060926,1.897270e-78,6.426671e-78
chr1:66921401:66924661:clu_3262_-,chr1:clu_3262_-,UP,"PR,UP",WDR78,ENSG00000152763.16,-0.5675298,1.084076e-20,5.498192e-20,1.619790,6.253355e-117,2.880485e-116
chr1:113700243:113700793:clu_3531_-,chr1:clu_3531_-,UP,"PR,UP",PHTF1,ENSG00000116793.15,-0.3926263,7.493384e-29,4.979307e-28,1.430844,3.361694e-54,8.969951e-54


In [17]:
inner_join(
  x = ds[ctype == 'PR,UP' & itype == 'UP', -c(7:9, 11:13)],
  y = dge[, .(gene_id, log2FoldChange, dge_p = pvalue, dge_padj = padj)],
  by = 'gene_id'
) %>% 
.[`p.adjust` < 1e-5 & abs(deltapsi) > .3 & abs(log2FoldChange) > 1 & dge_padj < 1e-3] %>% 
.[, intron] %>% writeLines('2024-04-09-prep-sashimi-intronsList-toPlot.txt')

## Bam files

In [22]:
dir('../code/resources/GTEx/BigWig')

dir('../code/resources/GTEx/BigWig/Brain-Cortex', '*.bw')  %>% head()
dir('../code/resources/GTEx/BigWig/Brain-Cortex', '*.bw')  %>% length()

## count / PSI files

In [40]:
dir('../code/results/pheno/noisy/GTEx/Brain-Cortex') %>% .[str_detect(., 'leafcutter.+perind\\.counts\\.noise_by_intron*')]
glue('zcat ../code/results/pheno/noisy/GTEx/Brain-Cortex/', 'leafcutter_perind.counts.noise_by_intron.gz', ' | head | cut -d " " -f 1-5 ') %>% 
  system(intern = T)  %>%
  print

 [1] "chrom GTEX-1117F.tsv.gz GTEX-111FC.tsv.gz GTEX-1128S.tsv.gz GTEX-117XS.tsv.gz"
 [2] "chr1:779092:803918:clu_1_+:IN 0/0 1/1 0/0 1/2"                                
 [3] "chr1:804222:804775:clu_2_+:IN 1/2 0/0 1/1 0/0"                                
 [4] "chr1:804222:807216:clu_2_+:IN 0/2 0/0 0/1 0/0"                                
 [5] "chr1:804966:807216:clu_2_+:IN 1/2 0/0 0/1 0/0"                                
 [6] "chr1:807323:809657:clu_3_+:IN 0/0 1/1 0/0 1/1"                                
 [7] "chr1:829104:841199:clu_4_+:IN 1/12 0/28 1/36 3/37"                            
 [8] "chr1:829104:847653:clu_4_+:IN 2/12 7/28 9/36 10/37"                           
 [9] "chr1:829104:851926:clu_4_+:IN 4/12 7/28 2/36 3/37"                            
[10] "chr1:847806:849483:clu_4_+:IN 1/12 1/28 6/36 2/37"                            
