In [17]:
library(data.table)
library(dplyr)
library(ggpubr)
library(VennDiagram)
setwd("/gpfs/commons/home/mschertzer/for_Karin")

In [18]:
list.files(pattern=".rda")

In [19]:
# load RBFOX2 minicutter outputs - WT empty vector 
load("rb2_empty_hesc_all_lfc_junctions.rda")
wt=juncs_recluster
wt$experiment = "WT"

In [20]:
# load RBFOX2 minicutter outputs - total knockdown vector 
load("rb2_g1_hesc_all_lfc_junctions.rda")
total_rbfox2 = juncs_recluster
total_rbfox2$experiment = "total_rbfox2"

In [21]:
# load RBFOX2 minicutter outputs - 208 knockdown
load("rb2_g3_hesc_all_lfc_junctions.rda")
g3_208_rbfox2 = juncs_recluster
g3_208_rbfox2$experiment = "g3_208_rbfox2"

In [22]:
# load RBFOX2 minicutter outputs - 209 knockdown
load("rb2_g5_hesc_all_lfc_junctions.rda")
g3_209_rbfox2 = juncs_recluster
g3_209_rbfox2$experiment = "g3_209_rbfox2"

In [23]:
cas13_kd = rbind(wt, total_rbfox2, g3_208_rbfox2, g3_209_rbfox2)

In [24]:
# establish new junction ID based on coordinates 
cas13_kd$junction_id = paste(cas13_kd$chrom, cas13_kd$start, cas13_kd$end, cas13_kd$strand, sep="_")
# keep only junctions that have at least 10 reads in the WT experiment condition 
juncs_keep = unique(dplyr::filter(cas13_kd, experiment=="WT" & readcount >= 10)$junction_id)
cas13_kd = dplyr::filter(cas13_kd, junction_id %in% juncs_keep)
cas13_kd$name = NULL
head(cas13_kd)

chrom,start,end,readcount,strand,cluster_idx,usage_ratio,experiment,junction_id
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>
chr1,14829,14970,16,-,10413,1.0,WT,chr1_14829_14970_-
chr1,136610,136709,17,-,10424,0.4473684,WT,chr1_136610_136709_-
chr1,137329,492191,30,+,9,1.0,WT,chr1_137329_492191_+
chr1,146509,146642,13,-,10429,1.0,WT,chr1_146509_146642_-
chr1,187577,187755,10,-,10435,0.6666667,WT,chr1_187577_187755_-
chr1,497299,498399,37,-,10446,0.925,WT,chr1_497299_498399_-


In [25]:
tt=as.data.table(table(cas13_kd$junction_id))
tt=tt[order(-N)]
tt = filter(tt, N == 4)
head(tt)
# keep only junctions that appear in all four conditions so can evaluate how they change 
cas13_kd = filter(cas13_kd, junction_id %in% tt$V1)

V1,N
<chr>,<int>
chr1_100007156_100011365_+,4
chr1_100011533_100015302_+,4
chr1_100015420_100017682_+,4
chr1_100017815_100022386_+,4
chr1_100038316_100049909_+,4
chr1_100043229_100049909_+,4


In [26]:
minicutter_output = cas13_kd
head(minicutter_output)

chrom,start,end,readcount,strand,cluster_idx,usage_ratio,experiment,junction_id
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>
chr1,14829,14970,16,-,10413,1.0,WT,chr1_14829_14970_-
chr1,136610,136709,17,-,10424,0.4473684,WT,chr1_136610_136709_-
chr1,137329,492191,30,+,9,1.0,WT,chr1_137329_492191_+
chr1,146509,146642,13,-,10429,1.0,WT,chr1_146509_146642_-
chr1,187577,187755,10,-,10435,0.6666667,WT,chr1_187577_187755_-
chr1,497299,498399,37,-,10446,0.925,WT,chr1_497299_498399_-


In [27]:
# run get_DS_juncs on all unique junctions in the dataset and bind together 
all_juncs = unique(minicutter_output$junction_id)
length(all_juncs)

In [28]:
# get DS across all junctions 
DS_juncs = minicutter_output %>% dplyr::group_by(junction_id) %>% mutate(usage_ratio_comparison = usage_ratio / usage_ratio[experiment == "WT"]) %>%
    select(experiment, usage_ratio_comparison, junction_id) 

In [29]:
head(DS_juncs)

experiment,usage_ratio_comparison,junction_id
<chr>,<dbl>,<chr>
WT,1,chr1_14829_14970_-
WT,1,chr1_136610_136709_-
WT,1,chr1_137329_492191_+
WT,1,chr1_146509_146642_-
WT,1,chr1_187577_187755_-
WT,1,chr1_497299_498399_-


In [30]:
head(minicutter_output)

chrom,start,end,readcount,strand,cluster_idx,usage_ratio,experiment,junction_id
<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<chr>,<chr>
chr1,14829,14970,16,-,10413,1.0,WT,chr1_14829_14970_-
chr1,136610,136709,17,-,10424,0.4473684,WT,chr1_136610_136709_-
chr1,137329,492191,30,+,9,1.0,WT,chr1_137329_492191_+
chr1,146509,146642,13,-,10429,1.0,WT,chr1_146509_146642_-
chr1,187577,187755,10,-,10435,0.6666667,WT,chr1_187577_187755_-
chr1,497299,498399,37,-,10446,0.925,WT,chr1_497299_498399_-


In [31]:
# merge this with full minicutter output dataframe and save as text file 
minicutter_output = merge(minicutter_output, DS_juncs, by=c("experiment", "junction_id"))
head(minicutter_output)

Unnamed: 0_level_0,experiment,junction_id,chrom,start,end,readcount,strand,cluster_idx,usage_ratio,usage_ratio_comparison
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
1,g3_208_rbfox2,chr1_100007156_100011365_+,chr1,100007156,100011365,36,+,4241,0.3103448,2.2489372
2,g3_208_rbfox2,chr1_100011533_100015302_+,chr1,100011533,100015302,43,+,4243,0.5890411,0.5890411
3,g3_208_rbfox2,chr1_100015420_100017682_+,chr1,100015420,100017682,13,+,4244,1.0,20.3461538
4,g3_208_rbfox2,chr1_100017815_100022386_+,chr1,100017815,100022386,26,+,4243,0.3561644,5.0921881
5,g3_208_rbfox2,chr1_100038316_100049909_+,chr1,100038316,100049909,78,+,4246,0.8297872,2.4118541
6,g3_208_rbfox2,chr1_100043229_100049909_+,chr1,100043229,100049909,11,+,4246,0.1170213,3.869016


In [35]:
# for each junction and each experiment, get the total cluster counts 
cluster_counts = minicutter_output %>% dplyr::group_by(experiment, cluster_idx) %>% summarize(cluster_counts = sum(readcount)) 
minicutter_output = merge(minicutter_output, cluster_counts, by=c("experiment", "cluster_idx"))

[1m[22m`summarise()` has grouped output by 'experiment'. You can override using the
`.groups` argument.


In [36]:
head(filter(minicutter_output, experiment == "WT"))

Unnamed: 0_level_0,experiment,cluster_idx,junction_id,chrom,start,end,readcount,strand,usage_ratio,usage_ratio_comparison,cluster_counts
Unnamed: 0_level_1,<chr>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
1,WT,100,chr1_1051369_1051453_+,chr1,1051369,1051453,76,+,0.3179916,1,76
2,WT,1000,chr1_12253826_12256333_+,chr1,12253826,12256333,63,+,0.4012739,1,63
3,WT,100002,chr19_48446192_48446385_+,chr19,48446192,48446385,295,+,0.9966216,1,295
4,WT,100003,chr19_48446502_48446681_+,chr19,48446502,48446681,305,+,1.0,1,305
5,WT,100004,chr19_48446843_48450313_+,chr19,48446843,48450313,363,+,1.0,1,363
6,WT,100007,chr19_48450526_48450666_+,chr19,48450526,48450666,263,+,1.0,1,263


In [37]:
head(minicutter_output)

Unnamed: 0_level_0,experiment,cluster_idx,junction_id,chrom,start,end,readcount,strand,usage_ratio,usage_ratio_comparison,cluster_counts
Unnamed: 0_level_1,<chr>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
1,g3_208_rbfox2,10000,chr1_19144091_19144786_-,chr1,19144091,19144786,166,-,1.0,3.1272085,166
2,g3_208_rbfox2,100004,chr2_102719207_102723700_-,chr2,102719207,102723700,17,-,1.0,4.3076923,17
3,g3_208_rbfox2,100006,chr2_102723878_102726845_-,chr2,102723878,102726845,4,-,0.16,1.4933333,25
4,g3_208_rbfox2,100006,chr2_102723909_102726845_-,chr2,102723909,102726845,21,-,0.84,3.2441379,25
5,g3_208_rbfox2,100007,chr2_102726974_102731025_-,chr2,102726974,102731025,19,-,1.0,2.8717949,19
6,g3_208_rbfox2,100008,chr2_102731068_102732322_-,chr2,102731068,102732322,5,-,0.2380952,0.4095238,13


In [39]:
# save as .rda file 
save(minicutter_output, file="RBFOX2_KD_DS_analysis.rda")

In [40]:
getwd()

In [None]:
DS_juncs = filter(DS_juncs, !(experiment == "WT"))

In [None]:
# make one venn diagram for junctions with usage_ratio > 1.75 and one for junctions with usage_ratio < 0.75 to see which are consistently going up or down
DS_up = filter(DS_juncs, usage_ratio > 2)
DS_down = filter(DS_juncs, usage_ratio < 0.5)

In [None]:
filter(minicutter_output, junction_id == "chr19_48638144_48638888_-")

In [None]:
# Step 2: Extract the Unique Junction IDs
junction_ids_condition1 <- unique(filter(DS_up, experiment == "g3_208_rbfox2")$junction_id)
junction_ids_condition2 <- unique(filter(DS_up, experiment == "g3_209_rbfox2")$junction_id)
junction_ids_condition3 <- unique(filter(DS_up, experiment == "total_rbfox2")$junction_id)

venn_result <- venn.diagram(
  x = list(g3_208_rbfox2 = junction_ids_condition1,
           g3_209_rbfox2 = junction_ids_condition2,
           total_rbfox2 = junction_ids_condition3),
           fill = c("dodgerblue", "goldenrod1", "darkorange1"),alpha = 0.25,
  filename = NULL, main = "Venn diagram for junctions with usage_ratios > 2 compared to WT")

grid.draw(venn_result)

In [None]:
# Step 2: Extract the Unique Junction IDs
junction_ids_condition1 <- unique(filter(DS_down, experiment == "g3_208_rbfox2")$junction_id)
junction_ids_condition2 <- unique(filter(DS_down, experiment == "g3_209_rbfox2")$junction_id)
junction_ids_condition3 <- unique(filter(DS_down, experiment == "total_rbfox2")$junction_id)

venn_result <- venn.diagram(
  x = list(g3_208_rbfox2 = junction_ids_condition1,
           g3_209_rbfox2 = junction_ids_condition2,
           total_rbfox2 = junction_ids_condition3),
           fill = c("dodgerblue", "goldenrod1", "darkorange1"),alpha = 0.25,
  filename = NULL, main="Venn diagram for junctions with usage_ratios < 0.5 compared to WT")

  grid.draw(venn_result)