# Generates/runs a qsub batch job to run ```score_edits_total_coverage.py``` which:
- filters/removes ApoControl background edits 
- opens a window (flanksize*2 + 1) and computes the edit/C metric, which is: sum of (sum of all edit coverage) across each edit candidate / sum of all coverage across each edit.

In [1]:
import glob
import os
from qtools import Submitter
from tqdm import tnrange, tqdm_notebook

In [2]:
input_dir = '/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/combined_outputs_w_cov_info'
output_dir = '/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups_merged_scores'

### We need the annotated outputs from notebook 03

In [3]:
all_annotated = sorted(glob.glob(os.path.join(input_dir, '*.annotated')))
print(len(all_annotated))
all_annotated

19


['/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/combined_outputs_w_cov_info/RBFOX2_ORFS.barcodes_RBFOX2-TIA1_subset.fx.annotated',
 '/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/combined_outputs_w_cov_info/RBFOX2_ORFS.barcodes_RBFOX2-TIA1_subset_more.fx.annotated',
 '/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/combined_outputs_w_cov_info/RBFOX2_ORFS.barcodes_RBFOX2-TIA1_subset_noFeatureCells.fx.annotated',
 '/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/combined_outputs_w_cov_info/TIA1_ORFS.barcodes_RBFOX2-TIA1_subset.fx.annotated',
 '/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/combined_outputs_w_cov_info/TIA1_ORFS.barcodes_RBFOX2-TIA1_subset_more.fx.annotated',
 '/home/bay001/projects/kris_apobec_20200121/perm

In [4]:
flank = 25 # number of nt upstream and downstream of the edit site (default=24, window size=24+1+24=49)

### Write the score commands and submit to TSCC

In [5]:
# bg_edits_file = '/home/bay001/projects/kris_apobec_20200121/permanent_data/final_analysis/01_SAILOR_bulk_rnaseq/outputs/combined_outputs_w_cov_info/ApoControl-1000_S21_L002_R1_001.fastqTr.sorted.STARUnmapped.out.sorted.STARAligned.out.sorted_a0_b0_e0.01.bed'
bg_edits_file = None
chrom_sizes_file = '/projects/ps-yeolab3/bay001/annotations/hg19/hg19.chrom.sizes'
gtfdb_file = '/projects/ps-yeolab3/bay001/annotations/hg19/gencode_v19/gencode.v19.annotation.gtf.db'
genome_fa = '/projects/ps-yeolab3/bay001/annotations/hg19/hg19.fa'

cmds = []
for annotated in all_annotated:
    output_file = os.path.join(output_dir, os.path.basename(annotated) + '.windows_{}.txt'.format(flank*2+1))
    pos_bw = annotated.replace('.fx.annotated', '.fwd.sorted.rmdup.readfiltered.sorted.bw')
    neg_bw = annotated.replace('.fx.annotated', '.rev.sorted.rmdup.readfiltered.sorted.bw')
    assert os.path.exists(pos_bw) and os.path.exists(neg_bw)
    if not os.path.exists(output_file):
        cmd = 'module load python3essential;'
        cmd += '/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/score_edits_total_coverage.py '
        cmd += '--flank {} '.format(flank)
        cmd += '--conf 0.0 ' 
        cmd += '--gtfdb {} '.format(gtfdb_file)
        cmd += '--chrom_sizes_file {} '.format(chrom_sizes_file)
        cmd += '--pos_bw {} '.format(pos_bw)
        cmd += '--neg_bw {} '.format(neg_bw)
        cmd += '--annotated_edits_file {} '.format(annotated)
        if bg_edits_file is not None:
            cmd += '--bg_edits_file {} '.format(bg_edits_file)
        cmd += '--genome_fa {} '.format(genome_fa)
        cmd += '--output_file {}'.format(output_file)
        cmds.append(cmd)
print(len(cmds))

4


In [6]:
Submitter(commands=cmds, job_name='score_edits', array=True, nodes=1, ppn=1, submit=True, walltime='24:00:00')

Writing 4 tasks as an array-job.
Wrote commands to score_edits.sh.
Submitted script to queue home.
 Job ID: 21389769


<qtools.submitter.Submitter at 0x2b641a4f8390>

In [7]:
cmds

['module load python3essential;/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/score_edits_total_coverage.py --flank 25 --conf 0.0 --gtfdb /projects/ps-yeolab3/bay001/annotations/hg19/gencode_v19/gencode.v19.annotation.gtf.db --chrom_sizes_file /projects/ps-yeolab3/bay001/annotations/hg19/hg19.chrom.sizes --pos_bw /home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/combined_outputs_w_cov_info/RBFOX2_ORFS.barcodes_RBFOX2-TIA1_subset.fwd.sorted.rmdup.readfiltered.sorted.bw --neg_bw /home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/combined_outputs_w_cov_info/RBFOX2_ORFS.barcodes_RBFOX2-TIA1_subset.rev.sorted.rmdup.readfiltered.sorted.bw --annotated_edits_file /home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/combined_outputs_w_cov_info/RBFOX2_ORFS.barcodes_RBFOX2-TIA1_subset.fx.annotated --genome_fa /projects