In [1]:
%%javascript
require(["codemirror/keymap/sublime", "notebook/js/cell", "base/js/namespace"],
    function(sublime_keymap, cell, IPython) {
        cell.Cell.options_default.cm_config.keyMap = 'sublime';
        var cells = IPython.notebook.get_cells();
        for(var cl=0; cl< cells.length ; cl++){
            cells[cl].code_mirror.setOption('keyMap', 'sublime');
        }
    }
);

<IPython.core.display.Javascript object>

In [2]:
# change the cell width
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:85% !important; }</style>"))

In [3]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%config InlineBackend.print_figure_kwargs={'facecolor' : "w"}
from copy import copy
import warnings
warnings.filterwarnings('ignore')
# make matplotlib pdf-s text recognizable by evil-Adobe
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
from more_itertools import chunked
# import all relevant func and modules from a companion .py file
from supp_lib import *
# import all samples as dictionary ...
from samples import *

#### get some gene annotation the way we used it in the stackups (one from RefSeq, from HiGlass ...)

In [4]:
genes_df = pd.read_csv(
    "./gene_annot_data/hg19/geneAnnotationsExonUnions.bed",
    sep="\t",
    header=None,
    names = ["chr",
            "txStart",
            "txEnd",
            "geneName",
            "citationCount",
            "strand",
            "union_geneId", # "refseqId",
            "geneId",
            "geneType",
            "geneDesc",
            "cdsStart",
            "cdsEnd",
            "exonStarts",
            "exonEnds"]
)
genes_df.head()

### we'll be removing duplicated genes because they're likely unamppable and/or harder to deal with anyways ...
### there are of course expections, but will do it anyways:
geneId_grp = genes_df.groupby("geneId")
genes_df = geneId_grp.first()[geneId_grp.size() == 1].reset_index()

#### Get expression data measured in TPMs

In [5]:
# #RSEM for siRNA (and 44 and 442)
# !scp ghpcc:/nl/umw_job_dekker/users/av90w/RNAseq/data/siRNA/report2546/rsem/mergedRSEM/merged_TPM_genes.tsv ./merged_TPM_genes_si.tsv
# #RSEM for clones (and 44 and 442)
# !scp ghpcc:/nl/umw_job_dekker/users/av90w/RNAseq/data/siRNA/report2547/rsem/mergedRSEM/merged_TPM_genes.tsv ./merged_TPM_genes_mut.tsv

In [10]:
splice_samples = {
# mutants splicing events ...
"mutCtrl-IAA_mutCtrl-NT":"./Rmats_AS_coordinates_070921/AAVS1_sg24_NT_AAVS1_sg24_IAA.output_gene_coords_071321.txt",
"mutDDX55-IAA_mutCtrl-IAA":"./Rmats_AS_coordinates_070921/AAVS1_sg24_IAA_DDX55_sg2B_IAA+DDX55_sg27_IAA.output_gene_coords_070921.txt",
"mutTAF5L-IAA_mutCtrl-IAA":"./Rmats_AS_coordinates_070921/AAVS1_sg24_IAA_TAF5L_sg23_IAA+TAF5L_sg27_IAA.output_gene_coords_070921.txt",
"mutDDX55-IAA_mutCtrl-NT":"./Rmats_AS_coordinates_070921/AAVS1_sg24_NT_DDX55_sg2B_IAA+DDX55_sg27_IAA.output_gene_coords_070921.txt",
"mutDDX55-NT_mutCtrl-NT":"./Rmats_AS_coordinates_070921/AAVS1_sg24_NT_DDX55_sg2B_NT+DDX55_sg27_NT.output_gene_coords_070921.txt",
"mutTAF5L-IAA_mutCtrl-NT":"./Rmats_AS_coordinates_070921/AAVS1_sg24_NT_TAF5L_sg23_IAA+TAF5L_sg27_IAA.output_gene_coords_070921.txt",
"mutTAF5L-NT_mutCtrl-NT":"./Rmats_AS_coordinates_070921/AAVS1_sg24_NT_TAF5L_sg23_NT+TAF5L_sg27_NT.output_gene_coords_070921.txt",
# siRNA splicing events ...
"siDDX55-IAA_siCtrl-IAA":"./Rmats_AS_coordinates_072221/S442_siCtrl_IAA_S442_siDDX55_IAA.output_gene_coords_072221.txt",
"siTAF5L-IAA_siCtrl-IAA":"./Rmats_AS_coordinates_072221/S442_siCtrl_IAA_S442_siTAF5L_IAA.output_gene_coords_072221.txt",
"siCtrl-IAA_siCtrl-NT":"./Rmats_AS_coordinates_072221/S442_siCtrl_NT_S442_siCtrl_IAA.output_gene_coords_072221.txt",
"siDDX55-IAA_siCtrl-NT":"./Rmats_AS_coordinates_072221/S442_siCtrl_NT_S442_siDDX55_IAA.output_gene_coords_072221.txt",
"siDDX55-NT_S442_siCtrl-NT":"./Rmats_AS_coordinates_072221/S442_siCtrl_NT_S442_siDDX55_NT.output_gene_coords_072221.txt",
"siTAF5L-IAA_siCtrl-NT":"./Rmats_AS_coordinates_072221/S442_siCtrl_NT_S442_siTAF5L_IAA.output_gene_coords_072221.txt",
"siTAF5L-NT_siCtrl-NT":"./Rmats_AS_coordinates_072221/S442_siCtrl_NT_S442_siTAF5L_NT.output_gene_coords_072221.txt",
# main CTCF related libraries ...
"noCTCF_CTCF":"./Rmats_AS_coordinates_070921/S442_NT_S442_IAA.output_gene_coords_070921.txt",
"noCTCF_CTCF-noTIR1":"./Rmats_AS_coordinates_070921/S44_NT_S442_IAA.output_gene_coords_070921.txt",
# ultimate control - shouldn't have any changes ...
"CTCF-noTIR1-IAA_CTCF-noTIR1":"./Rmats_AS_coordinates_070921/S44_NT_S44_IAA.output_gene_coords_071321.txt",
}

In [11]:
for k,v in splice_samples.items():
    df = pd.read_csv(v,sep="\t")
    print(f" saving bed and bigbed for {k} splicing events ...")
    df[df["sig"] == "significant"][["chrom","start","end"]].to_csv(f"splicing_{k}.bed", index=False, header=None, sep="\t")
    ! bedSort splicing_{k}.bed splicing_{k}_sort.bed
    ! bedToBigBed -tab -type=bed3 splicing_{k}_sort.bed hg19.sizes splicing_{k}.bb

 saving bed and bigbed for mutCtrl-IAA_mutCtrl-NT splicing events ...
pass1 - making usageList (23 chroms): 0 millis
pass2 - checking and writing primary data (1303 records, 3 fields): 5 millis
 saving bed and bigbed for mutDDX55-IAA_mutCtrl-IAA splicing events ...
pass1 - making usageList (23 chroms): 0 millis
pass2 - checking and writing primary data (2163 records, 3 fields): 3 millis
 saving bed and bigbed for mutTAF5L-IAA_mutCtrl-IAA splicing events ...
pass1 - making usageList (23 chroms): 0 millis
pass2 - checking and writing primary data (2721 records, 3 fields): 4 millis
 saving bed and bigbed for mutDDX55-IAA_mutCtrl-NT splicing events ...
pass1 - making usageList (23 chroms): 0 millis
pass2 - checking and writing primary data (2510 records, 3 fields): 4 millis
 saving bed and bigbed for mutDDX55-NT_mutCtrl-NT splicing events ...
pass1 - making usageList (23 chroms): 0 millis
pass2 - checking and writing primary data (2329 records, 3 fields): 5 millis
 saving bed and bigbed fo