### Crispr Amplicon

Read alignment
Consensus 
Visualisation
Stats

In [1]:
import sys
import pysam
import edlib

In [45]:
bam = "/mnt/storage1/projects/research/22070I_1057_Cas9-ONT/Sample_22070LRa005/22070LRa005.bam"
bed = "/mnt/storage1/projects/research/22070I_1057_Cas9-ONT/opsin_region.bed"
ref = "/mnt/storage2/megSAP/data/genomes/GRCh38.fa"
fasta_anchors = "anchors.fasta"

Read target bedfile

In [3]:
n_window = 100
roi = []
with open(bed)as f:
    for line in f:
        l = line.strip().split()
        l[1] = max(0, int(l[1]) - n_window)
        l[2] = int(l[2]) + n_window
        roi.append(l)
f.close()

roi

[['chrX', 154143540, 154295780]]

Read reference genome

In [55]:
f = pysam.FastaFile(ref)
seq_ref = f.fetch(reference = roi[0][0], start = roi[0][1], end = roi[0][2])
print("Ref Length:", len(seq_ref))

Ref Length: 152240


Read anchors

In [56]:
f = pysam.FastxFile(fasta_anchors)
for anchor in f:
    print(anchor.name, ":", anchor.sequence)

f.close()

anchor_BL-715 : GGCAAGGGGGAGGAGGGAAGCCAACAGCAGGATGT
anchor_CHOP_267 : TGGGGGCTAGTGCTGGCACCACCG
anchor_IDT_855 : TGTGCACATGCAAGTCACAAACATTCCAGAT
anchor_CSPO_586 : GGGGCCTGTGGTTGGTGAAGCCCAAGGCA


Read Bamfile

In [26]:
b = pysam.AlignmentFile(bam, "rb")
print("Found", b.count(contig = roi[0][0], start = roi[0][1], end = roi[0][2]), "read alignments")

# Extract from documentation:
# Note that region strings are 1-based, while start and end denote an interval in python coordinates. The region is specified by reference, start and end.
# Todo: Maybe reduce coordinates by 1?
reads = {}
for read in b.fetch(contig = roi[0][0], start = roi[0][1], end = roi[0][2]):
    #print(read.query_name)
    if read.flag == 0 or read.flag == 4:
        reads[str(read.query_name)] = {
            'pos'   : read.get_reference_positions(full_length=True),
            'seq_query' : read.get_forward_sequence(),
            'pairs' : read.get_aligned_pairs(),
        }
b.close()

print("Stored", len(reads), "unique sequences")

Found 288 read alignments
Stored 75 unique sequences


Test edlib: Align single read to reference

In [29]:

testread =  next(iter(reads.values()))['seq_query']
print("QRY:", testread)
print("REF:", seq_ref)

QRY: GGTATTGCTTCGTTCCAGTTACATGTGCTCGGGGGGCCTGTAGTTGGTGAAGCCCAAGGCAGGCTGTGAACTCAGGGCAAGGGAGAGCGTGAGCAGGCGCTGCCGGTGGGCTGATGTGGGCACTGCATGTGCACCACAAGCGGCCAAAGGACCTACAGCTCATGGGGGCAAGGGGAGGAGGAAGCCAACAGCGGATATCGCGATCGATCTGCCCCCTACACTGGAGGAGAGCCCCCGGCACAAATCTCGCCCGTTGCCGGGCCCACCGGACATGGCTGGCCTATAAGGAGGATCGGTTCAGGCCTCGGCCTAAATAGTCTCCCTGGGCTTTCAAGAGAACCACATGAGAAAGGAGGATTCGGGCTCTGAGCAGTTTCACACCCACCCCCAGTCTGCAAATCTGACCGTGGGTCCACCTGCCCCCAAAGGCGGGCGCAGGACAGTAGAGAGGGAACAGAGAACACATAAACACAGAGAGGGCCTGACCAGCTCCACAGTCACCGCCACCTTCCTGGCGGGATACTGGTCGACGTCTGAAGGTTGGTTCCCAGCAAATCCCTCTGAGCCGCCCTTGCGAGATTCGCCTCAGGAAACAGGGGCAAGAAGAGAGGTGGGAGGAGGAGGAGTCTAAGTCAGGCCCAATTAAGAGATCAGGTAGTGTAGGGTTTAGGAGCTTAAAGGTGAAGAGGCCGGGCTGATCCCACAGGCCAGTATAAAGCGCCGTGACCCTCAGGTGATGCGCCAGGGCGGCTGCCGTCGGGGACAGGGCTTTCCATAGCCATGGCCCAGCAGTGGAGCCTCCAAAGGCTCGCGAGCCGCCATCCATGGGACAGCTATGAGGACAGCACCCAGTCAGCATCTTCACCTACACCAACAGCAGCTCCACCAGAGGTGAGCAGCAGGCCCGTGGAGGCTGGGTGGCTGCACTGAGGCTGATACCCACCTGCCCCGCCCAAGGAATCTCTCTTCTGCGTCCCCACCCAGCAGAAGGCTCT

In [42]:
aln = edlib.align(testread, seq_ref, task="path")
print(aln)
print(edlib.getNiceAlignment(aln, testread, seq_ref))

{'editDistance': 148886, 'alphabetLength': 4, 'locations': [(0, 152239)], 'cigar': '57099D1=38055D1=19025D1=19030D1=1D1=5D2=7D1=3D3=1D1=8D1=6D1=5D1=3D1=6D2=3D1=7D1=2D1=5D1=4D1=2D2=9D1=1D1=19D1=12D1=1D1=6D1=6D1=2D1=7D1=1D2=5D1=2D1=7D2=2D1=11D2=3D1=13D2=2D1=2D1=3D1=1D2=5D3=6D1=3D3=4D1=1D1=2D1=2D1=1D1=3D1=1D1=4D1=5D1=3D1=2D1=1D1=6D2=5D1=3D1=1D1=7D2=1D1=9D1=2D5=2D2=7D1=4D1=2D1=3D1=10D1=9D1=4D1=1D4=4360D1=24D3=7D1=14D2=4D1=3D4=2D2=1D2=2D1=2D1=6D1=11D1=2D1=1D1=2D1=2D1=2D1=5D1=4D2=13D1=11D2=6D1=2D1=10D1=5D1=6D1=1D1=8D1=21D1=1D2=5D2=7D1=3D3=4D1=12D1=2D1=3D2=2D1=8D2=1D1=6D1=7D1=1D1=16D1=3D2=4D1=1D1=13D1=7D1=1D2=5D1=5D1=9D1=1D1=45D1=2D1=4D2=7D1=5D1=5D1=7D1=2D2=2D1=10D2=3D1=9D1=1D2=2D3=1D1=4D1=3D1=7D1=2D1=10D1=5D1=2D1=3D1=1D1=3D1=8D1=3D1=2D1=2D2=2D1=2D1=2D1=10D1=9D2=1D2=1D2=2D1=4D1=10D4=1D2=8D1=1D1=4D2=1D2=4D2=1D1=8D1=1D1=2D1=4D1=1D2=4D1=1D1=2D1=8D2=1D2=2D1=9D2=6D1=7D1=9D2=1D1=3D1=2D2=1D1=5D1=2D1=8D1=6D1=5D1=5D1=8D1=5D1=1D2=1D2=1D1=11D2=1D3=1D1=5D1=1D1=1D3=2D1=2D1=1D1=2D1=9D1=2D1=1D1=2D2=18D2=16D