In [41]:
from Bio import SeqIO
from Bio.Seq import Seq

from tqdm import tqdm

In [46]:
class BarcodeCounter:
    def __init__(self, trimmed_forward, trimmed_reverse, bc14, bc30, linker='TGGT'):
        self.trimmed_forward = trimmed_forward
        self.trimmed_reverse = trimmed_reverse
        self.bc14 = bc14
        self.bc30 = bc30
        self.linker = linker

    def fasta_reader(self, fasta):
        """
        Reads a fasta file and returns a list of sequences
        
        :param fasta: the fasta file containing the sequences
        :return: A list of sequences.
        """
        n = []
        for record in SeqIO.parse(fasta , "fasta"):
            n.append(str(record.seq))
        return n
    
    def reverse_fix(self):
        """
        Reads a reverse fasta file and returns a list of complemented seqeunce

        :return: A list of the reverse complement of the trimmed reads.
        """
        reverse_fasta = self.fasta_reader(self.trimmed_reverse)

        corrected = []
        for reads in reverse_fasta:
            corrected.append(str(Seq(reads).complement()))
        return corrected

    def merger(self):
        """
        The function takes the trimmed forward and reverse reads and merges them into one list

        :return: A list of tuples.
        """
        forward_fasta = self.fasta_reader(self.trimmed_forward)
        reverse_fasta = self.reverse_fix()
        
        merged = [*forward_fasta, *reverse_fasta]
        return merged
        
    def barcode_creater(self):
        """
        The function takes in two barcodes and concatenates them with the linker sequence
        Creates 48 bp barcodes

        :return: A list of barcodes with the linker sequence added to the end of each
        """
        bp_14 = self.fasta_reader(self.bc14)
        bp_30 = self.fasta_reader(self.bc30)

        created = [b + self.linker + x for b in bp_14 for x in bp_30]
        return created

    def count_bc14(self):
        """
        The function counts the number of times each barcode is found in the trimmed_forward file

        :return: A dictionary with the barcode as the key and the number of reads as the value.
        """
        bp_14 = self.fasta_reader(self.bc14)
        reads = [a[:14] for a in self.fasta_reader(self.trimmed_forward)]

        counts = {}
        for bc in bp_14:
            counts[bc] = reads.count(bc)
        return counts

    def count_bc30(self):
        """
        The function counts the number of times each barcode is found in the trimmed_forward file

        :return: A dictionary with the barcode as the key and the number of reads as the value.
        """
        bp_30 = self.fasta_reader(self.bc30)
        reads = [a[-30:] for a in self.fasta_reader(self.trimmed_forward)]

        counts = {}
        for bc in bp_30:
            counts[bc] = reads.count(bc)
        return counts

    def count_barcodes(self):
        """
        The function counts the number of times each barcode appears in the trimmed_forward file
        
        :return: A dictionary with the barcode as the key and the number of reads as the value.
        """
        barcodes = self.barcode_creater()
        reads = self.fasta_reader(self.trimmed_forward)

        counts = {}
        for barcode in barcodes:
            counts[barcode] = reads.count(barcode)
        return counts
    

In [47]:
args = {
    'trimmed_forward':'/mnt/c/Users/pc/Downloads/barcod/qc_processed_barcode_1/trimmed_forward.fasta',
    'trimmed_reverse': '/mnt/c/Users/pc/Downloads/barcod/qc_processed_barcode_1/trimmed_reverse.fasta',
    'bc14': '/mnt/c/Users/pc/Downloads/barcod/qc_processed_barcode_1/b14.fasta',
    'bc30':'/mnt/c/Users/pc/Downloads/barcod/qc_processed_barcode_1/b30.fasta'
}

result = BarcodeCounter(**args)

In [48]:
result.count_bc14()

{'GTTTAGATATACAC': 124656,
 'TAACTTCGCCTGCT': 350912,
 'ATCCCCAAAGAGGA': 135381,
 'TCATATCAGCCGTC': 111048,
 'AGGTGTCCGGTATG': 85648,
 'ATATGTTCTGGCAT': 399719,
 'ACACGCAGGAAACT': 73266,
 'GAGTTGTAAGAGAC': 89211,
 'AGCAGAAAAGTTCG': 1448,
 'ATTATTCTGCGCCT': 196193}

In [49]:
result.count_bc30()