In [None]:
# default_exp core

# pytrim2

> A python program for trimming and demultiplexing naopore reads.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
# Import dependencies
from Bio import SeqIO
import numpy as np
from Bio import Align
from Bio import pairwise2

In [None]:
#export

def findAdapter(seq_record, barcode_primer, cutOff):
    # Alignment using pairwise2
    alignments = pairwise2.align.localms(seq_record.seq, barcode_primer, 1, 0, -2, -2)

    # Find best match
    alingmentAccuracy = np.array( [i.score for i in alignments]  )
    alingmentEnd = np.array( [i.end for i in alignments]  )
    maxAcc = np.max(alingmentAccuracy)
    maxAcc = np.max(alingmentAccuracy)
    maxAccAlignment = np.where(alingmentAccuracy == maxAcc)[0]
    inwardAlingmentEnd = np.max(alingmentEnd[maxAccAlignment])
    
    maxAcc = maxAcc / len(barcode_primer) * 100

    if maxAcc >= cutOff:
        print("TRUE")
    maxAcc = maxAcc
    print("queried length:", len(seq_record.seq), "; accuracy [%]:", maxAcc)

In [None]:
#export

def findAlingments(record_dict, barcode_primer, inward_end):
    
    record_keys = list(record_dict.keys())
    
    aligner = Align.PairwiseAligner()
    aligner.match_score = 1.0
    aligner.mismatch_score = 0
    aligner.gap_score = -2
    aligner.mode = "local"
    
    al = "none"
    for i in record_keys[0:100]:
        seq = record_dict[i].seq[0:inward_end]        
        alignments = aligner.align(seq, barcode_primer)
        score = alignments.score
        al = alignments[0].aligned
            
    return(al)
    
    

In [None]:
#slow

record_dict = SeqIO.index("test_data/test.fastq", "fastq")
barcode_primer = "AATGCGAAG"
findAlingments(record_dict, barcode_primer, 200)

(((93, 101),), ((0, 8),))

In [None]:
record_keys = list(record_dict.keys())
    
aligner = Align.PairwiseAligner()
aligner.match_score = 1.0
aligner.mismatch_score = 0
aligner.gap_score = -2
aligner.mode = "local"
    
al = "none"
for i in record_keys[0:10000]:
    seq = record_dict[i].seq[0:200]        
    alignments = aligner.align(seq, barcode_primer)
    score = alignments.score
    for j in alignments:
        print(j.aligned)
            

(((79, 87),), ((0, 8),))
(((89, 97),), ((0, 8),))
(((23, 31),), ((0, 8),))
(((43, 52),), ((0, 9),))
(((107, 115),), ((0, 8),))
(((113, 121),), ((1, 9),))
(((118, 124),), ((1, 7),))
(((172, 181),), ((0, 9),))
(((0, 1),), ((2, 3),))
(((27, 35),), ((0, 8),))
(((46, 53),), ((0, 7),))
(((46, 51), (51, 54)), ((0, 5), (6, 9)))
(((100, 108),), ((0, 8),))
(((107, 114),), ((1, 8),))
(((118, 125),), ((0, 7),))
(((118, 123), (123, 126)), ((0, 5), (6, 9)))
(((118, 121), (121, 126)), ((0, 3), (4, 9)))
(((118, 126),), ((1, 9),))
(((141, 145), (146, 150)), ((1, 5), (5, 9)))
(((141, 150),), ((0, 9),))
(((147, 155),), ((1, 9),))
(((152, 160),), ((0, 8),))
(((166, 174),), ((1, 9),))
(((172, 181),), ((0, 9),))
(((181, 186), (187, 191)), ((0, 5), (5, 9)))
(((182, 191),), ((0, 9),))
(((189, 198),), ((0, 9),))
(((193, 200),), ((1, 8),))
(((16, 24),), ((1, 9),))
(((21, 29),), ((1, 9),))
(((29, 37),), ((0, 8),))
(((191, 200),), ((0, 9),))
(((6, 15),), ((0, 9),))
(((68, 77),), ((0, 9),))
(((104, 113),), ((0, 9)

In [None]:
print(al)

(((46, 55),), ((0, 9),))
