# Runs the script to generate windows across all exons (CDS, UTRs)

In [1]:
import glob
import os
from qtools import Submitter
from tqdm import tnrange, tqdm_notebook

In [2]:
input_dir = '/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/combined_outputs_w_cov_info'
output_dir = '/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/window_outputs_deep_cds_only/'

In [3]:
all_annotated = sorted(glob.glob(os.path.join(input_dir, 'RPS2-STAMP_possorted_genome_bam_MD-RPS2_STAMP_Apo_filtered_lenti_common_expression_barcodes.txt.fx.annotated')))
print(len(all_annotated))
all_annotated[:3]

1


['/home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/combined_outputs_w_cov_info/RPS2-STAMP_possorted_genome_bam_MD-RPS2_STAMP_Apo_filtered_lenti_common_expression_barcodes.txt.fx.annotated']

# Write the commands to score all edits.

In [4]:
flank = 25 # number of nt upstream and downstream of the edit site (default=24, window size=24+1+24=49)

In [5]:
# bg_edits_file = '/home/bay001/projects/kris_apobec_20200121/permanent_data/final_analysis/01_SAILOR_bulk_rnaseq/outputs/combined_outputs_w_cov_info/ApoControl-1000_S21_L002_R1_001.fastqTr.sorted.STARUnmapped.out.sorted.STARAligned.out.sorted_a0_b0_e0.01.bed'
bg_edits_file = None
chrom_sizes_file = '/projects/ps-yeolab3/bay001/annotations/hg19/hg19.chrom.sizes'
gtfdb_file = '/projects/ps-yeolab3/bay001/annotations/hg19/gencode_v19/gencode.v19.annotation.gtf.db'
genome_fa = '/projects/ps-yeolab3/bay001/annotations/hg19/hg19.fa'

cmds = []
for annotated in all_annotated:
    output_file = os.path.join(output_dir, os.path.basename(annotated) + '.windows_{}.txt'.format(flank*2+1))
    pos_bw = os.path.splitext(annotated)[0].replace('.fx','') + '.fwd.sorted.rmdup.readfiltered.sorted.bw'
    neg_bw = os.path.splitext(annotated)[0].replace('.fx','') + '.rev.sorted.rmdup.readfiltered.sorted.bw'
    if not os.path.exists(output_file) and os.path.exists(pos_bw) and os.path.exists(neg_bw):
        cmd = 'module load python3essential;'
        cmd += '/home/bay001/projects/kris_apobec_20200121/scripts/score_edits_total_coverage_sc.py '
        cmd += '--flank {} '.format(flank)
        cmd += '--conf 0.0 ' 
        cmd += '--gtfdb {} '.format(gtfdb_file)
        cmd += '--chrom_sizes_file {} '.format(chrom_sizes_file)
        cmd += '--pos_bw {} '.format(pos_bw)
        cmd += '--neg_bw {} '.format(neg_bw)
        cmd += '--annotated_edits_file {} '.format(annotated)
        if bg_edits_file is not None:
            cmd += '--bg_edits_file {} '.format(bg_edits_file)
        cmd += '--genome_fa {} '.format(genome_fa)
        cmd += '--output_file {} > {}.log 2>&1'.format(output_file, output_file)
        cmds.append(cmd)
    elif not os.path.exists(pos_bw) or not os.path.exists(neg_bw):
        print("pos exists? {}. neg exists? {}".format(os.path.exists(pos_bw), os.path.exists(neg_bw)))
print(len(cmds))

1


In [None]:
Submitter(commands=cmds, job_name='score_edits', array=True, nodes=1, ppn=3, submit=True, walltime='80:00:00')

In [6]:
cmds

['module load python3essential;/home/bay001/projects/kris_apobec_20200121/scripts/score_edits_total_coverage_sc.py --flank 25 --conf 0.0 --gtfdb /projects/ps-yeolab3/bay001/annotations/hg19/gencode_v19/gencode.v19.annotation.gtf.db --chrom_sizes_file /projects/ps-yeolab3/bay001/annotations/hg19/hg19.chrom.sizes --pos_bw /home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/combined_outputs_w_cov_info/RPS2-STAMP_possorted_genome_bam_MD-RPS2_STAMP_Apo_filtered_lenti_common_expression_barcodes.txt.fwd.sorted.rmdup.readfiltered.sorted.bw --neg_bw /home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/combined_outputs_w_cov_info/RPS2-STAMP_possorted_genome_bam_MD-RPS2_STAMP_Apo_filtered_lenti_common_expression_barcodes.txt.rev.sorted.rmdup.readfiltered.sorted.bw --annotated_edits_file /home/bay001/projects/kris_apobec_20200121/permanent_data2/07_scRNA_groups/sailor_outputs_groups_deep/combined_out