In [None]:
# default_exp core

# pytrim2

> A python program for trimming and demultiplexing nanopore reads.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
# Import dependencies
from Bio import SeqIO
from Bio import Align
import numpy as np

In [None]:
#export

def findAlingments(record_dict, barcode_primer, inward_end, max_alignments):
    
    record_keys = list(record_dict.keys())

    aligner = Align.PairwiseAligner()
    aligner.match_score = 1.0
    aligner.mismatch_score = 0
    aligner.gap_score = -2
    aligner.mode = "local"

    n_sequences = len(record_keys)

    array_cols = max_alignments + 2
    al_array = np.zeros( (n_sequences, array_cols) )

    for i in list(range(0, n_sequences, 1)):
        al = []
        seq = record_dict[record_keys[i]].seq[0:200]        
        alignments = aligner.align(seq, barcode_primer)
        len_alignments = len(alignments)
        if(len_alignments <= max_alignments):
            score = alignments.score
            al = [j.aligned for j in alignments]
            len_al = len(al)
            for k in range(0, len_al):
                al[k] = (al[k][0][0][1])
            al_array[i, 0:len(al)] = al
            al_array[i, -1] = len_al
            al_array[i, -2] = alignments.score/len(barcode_primer)
            
    return(al_array)
    
    

In [None]:
#hide

record_dict = SeqIO.index("test_data/test.fasta", "fasta")
barcode_primer = "AGTGCCCCGCGCCACGCGCG"
ends = findAlingments(record_dict, barcode_primer, 200, 10)
print(ends)


[[ 20.    0.    0.    0.    0.    0.    0.    0.    0.    0.    1.    1. ]
 [ 62.   73.    0.    0.    0.    0.    0.    0.    0.    0.    0.5   2. ]
 [192.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.5   1. ]]


In [None]:
#slow
record_keys = list(record_dict.keys())
    
aligner = Align.PairwiseAligner()
aligner.match_score = 1.0
aligner.mismatch_score = 0
aligner.gap_score = -2
aligner.mode = "local"
   
max_alignments = 10
n_sequences = len(record_keys)

array_cols = max_alignments + 2
al_array = np.zeros( (n_sequences, array_cols) )

for i in list(range(0, n_sequences, 1)):
    al = []
    seq = record_dict[record_keys[i]].seq[0:200]        
    alignments = aligner.align(seq, barcode_primer)
    len_alignments = len(alignments)
    if(len_alignments <= max_alignments):
        score = alignments.score
        al = [j.aligned for j in alignments]
        len_al = len(al)
        for k in range(0, len_al):
            al[k] = (al[k][0][0][1])
        al_array[i, 0:len(al)] = al
        al_array[i, -1] = len_al
        al_array[i, -2] = alignments.score

