# Filtering primers against SARS-CoV-2
A truncated list of primers from the validated [240,000 orthogonal primer library](https://www.pnas.org/doi/10.1073/pnas.0812506106) were checked against SARS-CoV-2 genome using BLAST. The generated alignment XML file were then parsed and further filtered to have a melting temperature between 60 and 65 degrees Celcius to generate a primer list (`seq_list.txt`).

In [48]:
from Bio import SeqIO
from Bio.Blast import NCBIXML
from Bio.SeqUtils import MeltingTemp as mt

fasta_file = 'bc25mer.240k.700trunct.fasta'
xml_file = '7P4JPJ2M016-Alignment.xml'

q_dict = SeqIO.index(fasta_file,'fasta')

hits = []
for record in NCBIXML.parse(open(xml_file)):
    if record.alignments:
        hits.append(record.query.split()[0])

misses = set(q_dict.keys()) - set(hits)
orphan_records = [q_dict[name] for name in misses]

seq_file = open('seq_file.txt','w')
for name in misses:
    tm = mt.Tm_NN(q_dict[name].seq, Mg = 1.5)
    if tm > 60 and tm < 65:
        seq_file.write(str(q_dict[name].seq) + '\n')
seq_file.close()

# Concatenate primers to create unique barcodes
Primers are selected from the filtered primer list to create unique barcodes with the following structure:

`Master forward + NNNNNN + Unique barcode + NNNNN + Master reverse`

In [50]:
import string
import xlsxwriter

with open('seq_file.txt') as f:
    seq_list = [line.strip() for line in f]
           
# Select master primers
for_primer = seq_list[0]
seq_list.remove(seq_list[0])
rev_primer = seq_list[0]
seq_list.remove(seq_list[0])

# Generate internal barcodes
barcodes_label = [('>barcode%i' % i) for i in range(96)]
barcodes_alignment = [barcode for barcode in seq_list[:len(barcodes_label)]]
barcodes_seq = [(for_primer + 'NNNNN' + barcode + 'NNNNN' + rev_primer) for barcode in seq_list[:len(barcodes_label)]]

# Write FASTA file
with open('templates_full.fasta', 'w') as f:
    [f.write(line1 + '\n' + line2 + '\n' + '\n') for (line1, line2) in zip(barcodes_label, barcodes_seq)]
    
with open('templates.fasta', 'w') as f:
    [f.write(line1 + '\n' + line2 + '\n' + '\n') for (line1, line2) in zip(barcodes_label, barcodes_alignment)]
    
# Generate xlsx file for IDT ordering
row_label = [list(string.ascii_uppercase)[i] for i in range(8)]
well_address = [i + str(j) for i in row_label for j in range(1, 13)]

workbook = xlsxwriter.Workbook('IDT_order.xlsx')
worksheet = workbook.add_worksheet()

row = 0
col = 0

# Row headers
row_headers = ['Well Address', 'Name', 'Sequences']
[worksheet.write(row, col + i, row_headers[i]) for i in range(3)]

idt_list = zip(well_address, barcodes_label, barcodes_seq)

for row, idt_list in enumerate(idt_list):
    worksheet.write_row(row + 1, 0, idt_list)
workbook.close()