In [1]:
# main imports
import gc
import os
import ast
import sys
import glob
import math
import json
import shutil
import random
import importlib
import subprocess

import itertools

import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt
from matplotlib import animation
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)

from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset

# import squarify
import seaborn as sns
plt.rcParams['figure.figsize'] = (20.0, 10.0)
plt.rcParams['font.family'] = "serif"
plt.rcParams['font.size'] = 24
%matplotlib inline

In [2]:
%load_ext autoreload
%autoreload 1

sys.path.insert(0, "./soft")
%aimport definitions

In [16]:
# paths

# paths
base_dir = "./"

outdir = base_dir+"sashimi/"
if not os.path.exists(outdir):
    os.makedirs(outdir)

igvtools_bin = "igvtools"
sashimi_bin = "sashimi.py"

ref_fa_fname = "hg38_p12_ucsc.no_alts.no_fixs.fa"
chess3_gtf_fname = "chess3.0.1.primary.gtf"

all_tb_coverage_fname = "all.def.coverage.bedgraph"
all_tb_junctions_fname = "all.def.junctions.bed"

In [4]:
# arguments
num_threads=24

In [6]:
genes = {"CHS.21604":["TMEM11","CHS.21604.1"],
         "CHS.27581":["GP6","CHS.27581.3"]}

In [10]:
# let's extract the coordinates of the genes into a BED file for using with samtools later

# first create subset gtf of all genes
definitions.subset_gtf(chess3_gtf_fname,outdir+"genes.gtf",set(genes),False)

# now convert to bed
definitions.gtf_to_gene_bed(outdir+"genes.gtf",outdir+"genes.bed")

In [11]:
cmd = ["bedtools","intersect","-a",all_tb_coverage_fname,"-b",outdir+"genes.bed","-wa","-u"]
with open(outdir+"genes.cov.bedgraph","w+") as outFP:
    subprocess.call(cmd,stdout=outFP)

In [12]:
cmd = ["bedtools","intersect","-a",all_tb_junctions_fname,"-b",outdir+"genes.bed","-wa","-u"]
with open(outdir+"genes.junctions.bed","w+") as outFP:
    subprocess.call(cmd,stdout=outFP)

In [13]:
for gid,v in genes.items():
    # setup gene directory
    gdir = outdir+gid+"/"
    if not os.path.exists(gdir):
        os.makedirs(gdir)
        
    # extract GTF of just that locus in CHESS
    definitions.subset_gtf(chess3_gtf_fname,gdir+gid+".gtf",set([gid]),False)
    # now convert to bed
    definitions.gtf_to_gene_bed(gdir+gid+".gtf",gdir+gid+".bed")

In [17]:
sashimi_fname = outdir+"sashimi.parallel"
sashimiFP = open(sashimi_fname,"w+")
for gid,v in genes.items():
    mane_tid = v[1]
    
    # setup gene directory
    gdir = outdir+gid+"/"
    if not os.path.exists(gdir):
        os.makedirs(gdir)    

    sashimi_cmd = [sashimi_bin,
                   "--compare",mane_tid,
                   "--sj",outdir+"genes.junctions.bed",
                   "--cov",outdir+"genes.cov.bedgraph",
                   "--title",v[0],
                   "--gtf",gdir+gid+".gtf",
                   "-o",gdir+gid+".svg"]
    sashimiFP.write(" ".join(sashimi_cmd)+"\n")
    
sashimiFP.close()