# We need bigwig files for fast access to coverage (used by the score_edits_* scripts)
- Used when computing edit/C metric
- this notebook assumes a reverse stranded SE library. Otherwise you may need to reverse the ```-strand +``` param

In [1]:
import glob
import os
from qtools import Submitter
from tqdm import tnrange, tqdm_notebook

In [2]:
input_dir = '/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/'
output_dir = '/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/combined_outputs_w_cov_info'

### inputs should be the two ```*.sorted.rmdup.readfiltered.bam``` files that are intermediates from SAILOR. These are BAM files, split into their respective strands, and filtered for any low-quality or unexpected (non C/T) reads.

In [3]:
all_fwd_bams = glob.glob(os.path.join(input_dir, '*/results/*.fwd.sorted.rmdup.readfiltered.bam'))
print(len(all_fwd_bams))
all_fwd_bams

19


['/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/sampled_200_RBFOX2_ORFS.barcodes_RBFOX2-TIA1_subset_more_noFeatureCells/results/sampled_200_RBFOX2_ORFS.barcodes_RBFOX2-TIA1_subset_more_noFeatureCells.fwd.sorted.rmdup.readfiltered.bam',
 '/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/sampled_200_RBFOX2_ORFS.barcodes_RBFOX2-TIA1_subset/results/sampled_200_RBFOX2_ORFS.barcodes_RBFOX2-TIA1_subset.fwd.sorted.rmdup.readfiltered.bam',
 '/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/RBFOX2_ORFS.barcodes_RBFOX2-TIA1_subset/results/RBFOX2_ORFS.barcodes_RBFOX2-TIA1_subset.fwd.sorted.rmdup.readfiltered.bam',
 '/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/sailor_outputs_groups/sampled_500_TIA1_ORFS/results/sampled_500_TIA1_ORFS.fwd.sorted.rmdup.readfiltered.bam',
 '/home/bay001/projects/kris_apobec_2020

# Write the bam to bigwig commands

In [4]:
chrom_sizes_file = '/home/bay001/projects/kris_apobec_20200121/permanent_data2/04_scRNA_RBFOX2_TIA/inputs/refdata-cellranger-hg19_lenti_common-3.0.0/star/chrNameLength.txt'
cmds = []
for fwd_bam in all_fwd_bams:
    fwd_bg = os.path.join(output_dir, os.path.splitext(os.path.basename(fwd_bam))[0] + ".bg")
    fwd_sorted_bg = os.path.join(output_dir, os.path.splitext(os.path.basename(fwd_bam))[0] + ".sorted.bg")
    fwd_sorted_bw = os.path.join(output_dir, os.path.splitext(os.path.basename(fwd_bam))[0] + ".sorted.bw")
    if not os.path.exists(fwd_sorted_bw):
        cmd = 'module load makebigwigfiles;'
        cmd += 'bedtools genomecov -split -strand + -g {} -bg -ibam {} > {};'.format(chrom_sizes_file, fwd_bam, fwd_bg)
        cmd += 'bedtools sort -i {} > {};'.format(fwd_bg, fwd_sorted_bg)
        cmd += 'bedGraphToBigWig {} {} {};'.format(fwd_sorted_bg, chrom_sizes_file, fwd_sorted_bw)
        cmds.append(cmd)
    
for fwd_bam in all_fwd_bams:
    rev_bam = fwd_bam.replace('.fwd.sorted.rmdup.readfiltered.bam', '.rev.sorted.rmdup.readfiltered.bam')
    rev_bg = os.path.join(output_dir, os.path.splitext(os.path.basename(rev_bam))[0] + ".bg")
    rev_sorted_bg = os.path.join(output_dir, os.path.splitext(os.path.basename(rev_bam))[0] + ".sorted.bg")
    rev_sorted_bw = os.path.join(output_dir, os.path.splitext(os.path.basename(rev_bam))[0] + ".sorted.bw")
    if not os.path.exists(rev_sorted_bw):
        cmd = 'module load makebigwigfiles;'
        cmd += 'bedtools genomecov -split -strand - -g {} -bg -ibam {} > {};'.format(chrom_sizes_file, rev_bam, rev_bg)
        cmd += 'bedtools sort -i {} > {};'.format(rev_bg, rev_sorted_bg)
        cmd += 'bedGraphToBigWig {} {} {};'.format(rev_sorted_bg, chrom_sizes_file, rev_sorted_bw)
        cmds.append(cmd)
print(len(cmds))

4


In [5]:
Submitter(commands=cmds, job_name='bamToBigWig', array=True, nodes=1, ppn=1, submit=True, walltime='8:00:00')

Writing 4 tasks as an array-job.
Wrote commands to bamToBigWig.sh.
Submitted script to queue home.
 Job ID: 21389169


<qtools.submitter.Submitter at 0x2b94f7e487d0>