In [4]:
import os

In [1]:
def prepare_sam(basename):
    """convert a sam file to a sorted bam file that can
    be opend with igv"""
    assert os.path.isfile("{0}.sam".format(basename))
    !samtools view -S -b {basename}.sam > {basename}.bam
    !samtools sort {basename}.bam {basename}.sorted
    !samtools index {basename}.sorted.bam
    return "{0}.sorted.bam".format(basename)

In [2]:
def get_file_and_channel(filename):
    """extract fild id and channel from filename or fasta-header"""
    result = re.search(r'ch(\d+)_file(\d+)_', filename)
    file_id = int(result.group(2))
    channel_id = int(result.group(1))
    return file_id, channel_id

In [3]:
FMAP = None
def prepare_filemap(file_data):
    """create global dict to which allows to get a file
    by channel and file id in constant time"""
    global FMAP
    FMAP = {"ch{0}_file{1}".format(f["channel"], f["file_id"]) : f
            for f in file_data}
        
def get_file(channel, file_id):
    """ get a file from the FMAP dictionary by channel 
    and file id. Requires to run 'prepare_filemap' first."""
    global FMAP
    assert FMAP is not None, "filemap not initialized. "
    return FMAP["ch{0}_file{1}".format(channel, file_id)]

In [None]:
def mk_fastq(fastq_file, file_data, add_header="read"):
    """write file-objects as fastq-file.
    The headers are newly generated, such that they 
    can be accessed later"""
    with open(fastq_file, 'w') as f: 
        for file_obj in file_data:
            f.write("@ch{0}_file{1}_{2}\n".format(
                    file_obj["channel"],
                    file_obj["file_id"],
                    add_header))
            f.write("\n".join(file_obj["fastq"].split("\n")[1:]))

In [None]:
def graphmap(ref_file, fastq_file, sam_file, ncores=1):
    """
    align reads against reference using graphmap. 
    
    Args:
        ref_file: fasta file with reference genome
        fastq_file: fasta or fastq-file with the reads
        sam_file: output file of the alignment (SAM)
        n_cores: use multithreading with ncores
    """
    !/home/ibis/gregor.sturm/nanopore/tools/graphmap/graphmap -r {ref_file} -d {fastq_file} -o {sam_file} -t{ncores}

In [None]:
def bwamem(ref_file, fastq_file, sam_file, ncores=1):
    """
    align reads against reference using graphmap. 
    
    Args:
        ref_file: fasta file with reference genome
        fastq_file: fasta or fastq-file with the reads
        sam_file: output file of the alignment (SAM)
        n_cores: use multithreading with ncores
    """
    !bwa index {ref_file}
    !bwa mem -t{ncores} {ref_file} {fastq_file} > {sam_file}