In [1]:
# cnv_realign - using detected cnv areas, realign cram file using jump aligner around cnv edges

In [126]:
# argumennt processing + debug environment files
import sys
import os
if "cnv_realign" in sys.argv[0]:
    # commandline invocation
    IN_CRAM = sys.argv[1]
    CNV_BED = sys.argv[2]
    REF_FASTA = sys.argv[3]
else:
    IN_CRAM = "data/chr22_1M.cram"
    CNV_BED = "data/chr22.bed"
    REF_FASTA = os.path.expanduser("~/tmp/ref/Homo_sapiens_assembly38.fasta")

debug = True;
FETCH_READ_PADDING = 500
FETCH_REF_PADDING = 500

JUMP_ALIGN_CMD = ["docker", "run", "-i", "jump_align_dev", "/bin/bash", "-c", "jump_align 10 -25 -50 -10 -5 0"]

CNV_BED

'data/chr22.bed'

In [127]:
# open files
import pysam
reads_file = pysam.AlignmentFile(IN_CRAM, "rb")
fasta_file = pysam.FastaFile(REF_FASTA)

In [128]:
import subprocess

def run_process(command, input_path):

  with open(input_path, 'r') as file:
    input_data = file.read()  
    
  try:
    process = subprocess.Popen(command, 
                              stdin=subprocess.PIPE, 
                              stdout=subprocess.PIPE, 
                              stderr=subprocess.PIPE, 
                              text=True) 
    if input_data:
      process.stdin.write(input_data)
      #process.stdin.close()

    stdout, stderr = process.communicate()
    returncode = process.returncode
    print(stderr)

  except subprocess.CalledProcessError as e:
    print(f"Error executing command: {e}")
    return None, e.returncode

  return stdout, returncode

In [171]:
import tempfile

# process a single cnv
def process_cnv(chrom, start, end, type):
    if debug: print("process_cnv: ", locals())

    # get all reads that cross the two cnv edges
    reads = dict()
    refs = []
    for loc in [int(start), int(end)]:
        ref = fasta_file.fetch(chrom, loc - FETCH_REF_PADDING, loc + FETCH_REF_PADDING)
        refs.append([loc - FETCH_REF_PADDING, ref])
        for read in reads_file.fetch(chrom, loc - FETCH_READ_PADDING, loc + FETCH_READ_PADDING):
            reads[read.qname] = read
    if debug: print("found", len(reads), "reads") 
    #if debug: print("refs", refs)

    # create input file for jump aligner
    tmp = "/tmp/jump_align_input." + str(os.getpid())
    ref_emitted = False
    reads_in_order = []
    with open(tmp, 'w') as f:
        for read in reads.values():
            reads_in_order.append(read)
            if not ref_emitted:
                line = read.seq + "\t" + refs[0][1] + "\t" + refs[1][1] + "\n"
                ref_emitted = True
            else:
                line = read.seq + "\t=\n"
            f.write(line)
    if debug: print("tmp", tmp)

    # run jump_align
    alignments = run_process(JUMP_ALIGN_CMD, tmp)
    header_seen = False
    realignments = []
    for alignment, read in zip(alignments[0].split("\n"), [None, *reads_in_order]):
        if not header_seen:
            header_seen = True;
        else:
            a = alignment.split("\t")
            realignments.append([read.qname, refs[0][0], refs[1][0], a])
    return realignments


In [173]:
# loop on bed file
with open(CNV_BED) as f:
    for line in f:
        bed_line = line.strip().split()
        realignments = process_cnv(*bed_line)

process_cnv:  {'chrom': 'chr22', 'start': '10510000', 'end': '10512000', 'type': 'CN1'}
found 42 reads
tmp /tmp/jump_align_input.78608

process_cnv:  {'chrom': 'chr22', 'start': '10513000', 'end': '10514000', 'type': 'CN0'}
found 59 reads
tmp /tmp/jump_align_input.78608

process_cnv:  {'chrom': 'chr22', 'start': '10514500', 'end': '10517000', 'type': 'CN1'}
found 88 reads
tmp /tmp/jump_align_input.78608

process_cnv:  {'chrom': 'chr22', 'start': '10517500', 'end': '10518500', 'type': 'CN0'}
found 125 reads
tmp /tmp/jump_align_input.78608

process_cnv:  {'chrom': 'chr22', 'start': '10522500', 'end': '10524500', 'type': 'CN0'}
found 158 reads
tmp /tmp/jump_align_input.78608

process_cnv:  {'chrom': 'chr22', 'start': '10526000', 'end': '10527000', 'type': 'CN1'}
found 26 reads
tmp /tmp/jump_align_input.78608

process_cnv:  {'chrom': 'chr22', 'start': '10527000', 'end': '10530000', 'type': 'CN0'}
found 81 reads
tmp /tmp/jump_align_input.78608

process_cnv:  {'chrom': 'chr22', 'start': '105

KeyboardInterrupt: 