In [1]:
from Bio import SeqIO
from Bio.Seq import Seq

from tqdm import tqdm

import json

In [2]:
trimmed_forward = '/mnt/c/Users/pc/Downloads/barcod/qc_processed_barcode_1/trimmed_forward.fasta'
trimmed_reverse = '/mnt/c/Users/pc/Downloads/barcod/qc_processed_barcode_1/trimmed_reverse.fasta'
bc14 = '/mnt/c/Users/pc/Downloads/barcod/qc_processed_barcode_1/b14.fasta'
bc30 = '/mnt/c/Users/pc/Downloads/barcod/qc_processed_barcode_1/b30.fasta'

In [3]:
def fasta_reader(fasta):
    """
    Reads a fasta file and returns a list of sequences
    
    :param fasta: the fasta file containing the sequences
    :return: A list of sequences.
    """
    n = []
    for record in SeqIO.parse(fasta , "fasta"):
        n.append(str(record.seq))
    return n

In [4]:
reversed_fasta = fasta_reader(trimmed_reverse)
forward_fasta = fasta_reader(trimmed_forward)
bp_14 = fasta_reader(bc14)
bp_30 = fasta_reader(bc30)

In [5]:
def reverse_fix():
    """
    Reads a reverse fasta file and returns a list of complemented seqeunce

    :return: A list of the reverse complement of the trimmed reads.
    """

    corrected = []
    for reads in reversed_fasta:
        corrected.append(str(Seq(reads).complement()))
    return corrected

In [6]:
def merger():
    """
    The function takes the trimmed forward and reverse reads and merges them into one list

    :return: A list of tuples.
    """
        
    merged = [*forward_fasta, *reversed_fasta]
    return merged

In [7]:
def barcode_creater():
    """
    The function takes in two barcodes and concatenates them with the linker sequence
    Creates 48 bp barcodes

    :return: A list of barcodes with the linker sequence added to the end of each
    """

    created = [b + 'TGGT' + x for b in bp_14 for x in bp_30]
    return created

barcodes = barcode_creater()

In [8]:
def count_bc14():
    """
    The function counts the number of times each 14-bp sequence occurs in the forward reads.

    :return: A dictionary with the barcode as the key and the number of reads as the value.
    """
    reads = [a[:14] for a in forward_fasta]

    counts = {}
    for bc in tqdm(bp_14):
        counts[bc] = reads.count(bc)

    with open('b14_counts.json', 'w') as fp:
        json.dump(counts, fp)

count_bc14()

100%|██████████| 10/10 [00:00<00:00, 36.88it/s]


In [9]:
def count_bc30():
    """
    The function counts the number of times each 30-bp sequence occurs in the forward reads.

    :return: A dictionary with the barcode as the key and the number of reads as the value.
    """
    reads = [a[-30:] for a in forward_fasta]

    counts = {}
    for bc in tqdm(bp_30):
        counts[bc] = reads.count(bc)

    with open('bc30_counts.json', 'w') as fp:
        json.dump(counts, fp)

count_bc30()

100%|██████████| 100000/100000 [48:30<00:00, 34.36it/s] 


In [10]:
def count_barcodes():
    """
    The function counts the number of times all sequence occurs in the forward reads.

    :return: A dictionary with the barcode as the key and the number of reads as the value.
    """
    reads = forward_fasta

    counts = {}
    for barcode in tqdm(barcodes):
        counts[barcode] = reads.count(barcode)
    
    with open('barcode_counts.json', 'w') as fp: 
        json.dump(counts, fp)

count_barcodes()

  0%|          | 755/1000000 [00:27<10:12:06, 27.21it/s]


KeyboardInterrupt: 