# Wrapper notebook for submitting the RBP maps script to TSCC

In [9]:
import pandas as pd
import os
import json
import yaml
import glob
# import rethinkdb as r
from collections import defaultdict
from qtools import Submitter
from encode import manifest_helpers as m

from tqdm import tnrange, tqdm_notebook
pd.set_option("display.max_colwidth", 10000)


# Define programs

In [15]:
density_runner = '/home/bay001/projects/codebase/rbp-maps/maps/plot_density.py'

params = {
    'whole_read':{
        'output_dir' : '/home/bay001/projects/maps_20160420/analysis/meta_utr_tpm1',
        'clip_manifest' : '/home/bay001/projects/maps_20160420/permanent_data/ALLDATASETS_submittedonly.txt',
        'prefix' : 'whole_read',
    },
    'peak':{
        'peak_dir' : '/projects/ps-yeolab3/bay001/maps/current_annotations/se_peak_bigbeds/',
        'output_dir' : '/home/bay001/projects/maps_20160420/analysis/peak_meta_utr/',
        'clip_manifest' : '/home/bay001/projects/maps_20160420/permanent_data/ALLDATASETS_submittedonly.txt',
        'prefix' : 'peak_bb',
    },
}

# Define static annotations

In [18]:
current_date = '11-30-2017'

annotation_dir = '/projects/ps-yeolab3/bay001/maps/current_annotations/'
utr5_hepg2 = os.path.join(annotation_dir, 'hg19_v19_five_prime_utrs.HepG2_tpm1.bed')
utr5_k562 = os.path.join(annotation_dir, 'hg19_v19_five_prime_utrs.K562_tpm1.bed')

utr3_hepg2 = os.path.join(annotation_dir, 'hg19_v19_three_prime_utrs.HepG2_tpm1.bed')
utr3_k562 = os.path.join(annotation_dir, 'hg19_v19_three_prime_utrs.K562_tpm1.bed')

bash_scripts_dir = '/projects/ps-yeolab3/bay001/maps/bash_scripts/{}'.format(current_date)

if not os.path.exists(bash_scripts_dir):
    ! mkdir $bash_scripts_dir

# Plot SE Density

In [20]:
img_extension = ['png']


read_type = 'whole_read'
normalization_levels = [2]

### Force override of maps
force = True

clip_df = pd.read_table(params[read_type]['clip_manifest'])
cmds = []

for normalization_level in normalization_levels:
    for ext in img_extension:
        for uid in clip_df['uID']:

            r1, r2, i, rbp, cell = m.get_clip_file_from_uid(clip_df, uid)

            if cell == 'HepG2':
                utr5_annotations = utr5_hepg2
                utr3_annotations = utr3_hepg2
            elif cell == 'K562':
                utr5_annotations = utr5_k562
                utr3_annotations = utr3_k562
            else:
                print(cell)

            ### Foreach replicate, build teh command used to call the python script.
            for r in [r1, r2]:
                name = os.path.basename(r).replace('.bam','.{}.{}'.format(normalization_level, ext))
                output_filename = os.path.join(
                    params[read_type]['output_dir'],
                    name
                )

                # Build the cmd line
                cmd = "python " + density_runner
                cmd = cmd + " --event {}".format('metagene')
                cmd = cmd + " --ipbam {}".format(r)
                cmd = cmd + " --inputbam {}".format(i)
                cmd = cmd + " --output {}".format(output_filename)
                cmd = cmd + " --annotations {} {}".format(
                    utr5_annotations, utr3_annotations
                )
                cmd = cmd + " --annotation_type {} {}".format(
                    'bed', 'bed' 
                )
                cmd = cmd + " --scale"
                cmd = cmd + " --exon_offset 0"
                cmd = cmd + " --intron_offset 0"
                cmd = cmd + " --normalization_level {}".format(normalization_level)
                if not os.path.exists(output_filename) or force == True:
                    cmds.append(cmd)
                    

bash_script_sh = '/projects/ps-yeolab3/bay001/maps/bash_scripts/{}/{}-utrs_{}.{}.sh'.format(
    current_date, 
    params[read_type]['prefix'], 
    ext,
    normalization_level
)

Submitter(
    cmds, 
    "{}-utrs_{}".format(params[read_type]['prefix'], ext), 
    sh=bash_script_sh,
    submit=True,
    array=True,
    walltime='8:00:00',
    queue='home-yeo'
)

Writing 362 tasks as an array-job.
Wrote commands to /projects/ps-yeolab3/bay001/maps/bash_scripts/11-30-2017/whole_read-utrs_png.2.sh.
Submitted script to queue home-yeo.
 Job ID: 10312130


<qtools.submitter.Submitter at 0x2b930c8ae2d0>

In [13]:
img_extension = ['png']


read_type = 'peak'
all_peaks = glob.glob(os.path.join(params[read_type]['peak_dir'],'*.bb'))

### Force override of maps
force = True

clip_df = pd.read_table(params[read_type]['clip_manifest'])
cmds = []

for ext in img_extension:
    for peak in all_peaks:

        uid, rep, other = m.split_uid_and_rep(os.path.basename(peak).split('.')[0])
        _, _, _, rbp, cell = m.get_clip_file_from_uid(
            clip_df, uid
        )

        if cell == 'K562' or cell == 'HepG2':
            utr5_annotations = utr5
            utr3_annotations = utr3
        else:
            print(cell)

        ### build teh command used to call the python script.
        output_filename = os.path.join(
            params[read_type]['output_dir'],
            os.path.basename(peak).replace('.bb','.bb.{}'.format(ext))
        )

        # Build the cmd line
        cmd = "python " + density_runner
        cmd = cmd + " --event {}".format('metagene')
        cmd = cmd + " --peak {}".format(peak)
        cmd = cmd + " --output {}".format(output_filename)
        cmd = cmd + " --annotations {} {}".format(
            utr5_annotations, utr3_annotations
        )
        cmd = cmd + " --annotation_type {} {}".format(
            'bed', 'bed' 
        )
        cmd = cmd + " --scale"
        cmd = cmd + " --exon_offset 0"
        cmd = cmd + " --intron_offset 0"
        cmd = cmd + " --normalization_level {}".format(0)
        if not os.path.exists(output_filename) or force == True:
            cmds.append(cmd)


bash_script_sh = '/projects/ps-yeolab3/bay001/maps/bash_scripts/{}/{}-utrs_{}.{}.sh'.format(
    current_date, 
    params[read_type]['prefix'], 
    ext,
    0
)

Submitter(
    cmds, 
    "{}-utrs_{}".format(params[read_type]['prefix'], ext), 
    sh=bash_script_sh,
    submit=False,
    array=True,
    walltime='8:00:00',
    queue='condo'
)

Writing 362 tasks as an array-job.
Wrote commands to /projects/ps-yeolab3/bay001/maps/bash_scripts/11-30-2017/peak_bb-utrs_png.0.sh.


<qtools.submitter.Submitter at 0x2b933987b450>