In [24]:
import re
###  Cleaning the fasta file so that is easier to work with it 
def clean_fasta(input_file, output_file):  # to clean the "\n" out of the DNA sequences but not from the headers fo the sequences or the end of the DNA sequence
    with open(input_file, 'r') as f_in, open(output_file, 'w') as f_out:
        seq = ''
        for line in f_in:
            if line.startswith('>'):  # if this line is a header line, write out the previous sequence (if any), and write the header line to the output file
                if seq:
                    f_out.write(re.sub(r'\n', '', seq) + '\n')
                f_out.write(line)
                seq = ''
            else:  
                seq += line.strip()  # if this line is a sequence line, add it to the current sequence
        f_out.write(re.sub(r'\n', '', seq) + '\n')  # write out the last sequence

clean_fasta('A_thaliana_cDNA_seqs.txt', "Clean_cDNA_seqs.txt")
clean_seqs_open = open("Clean_cDNA_seqs.txt", "r")  # open the new file to start with the core of the task 
clean_seqs = clean_seqs_open.readlines()
clean_seqs_open.close()
print(len(clean_seqs))  # to check how many arguments are in the list and save the names of the sequences in a list
seq_names = clean_seqs[::2]  # list with the names of every sequence
seq_list = clean_seqs[1::2]  # list with every sequence

### Creating the primers from the sequences ###
forward_primers_list = []  # list to hold every forward primer
reverse_primers_list = []  # list to hold every reverse primer
for seq in seq_list:
    pure_seq = seq.replace("\n","")  # erasing the "n\" character to avoid slicing errors 
    forward_bit = pure_seq[0:20]
    forward_a = forward_bit.replace("A","t")
    forward_ac = forward_a.replace("C","g")
    forward_act = forward_ac.replace("T","a")
    forward_primer = forward_act.replace ("G","c")
    forward_primers_list.append(forward_primer + "\n")  # appending the complement of the 20 first nucleotides to the forward primers list 
    reverse_bit = pure_seq[-20:]
    reversed_bit = reverse_bit[::-1]
    reverse_a = reversed_bit.replace("A","t")
    reverse_ac = reverse_a.replace("C","g")
    reverse_act = reverse_ac.replace("T","a")
    reverse_primer = reverse_act.replace("G","c")
    reverse_primers_list.append(reverse_primer+ "\n")  # appending the reversed complement of the 20 last nucleotides to the reverse primers list
print("This is the list for the forward primers" + str(forward_primers_list))
print("This is the list for the reverse primers" + str(reverse_primers_list) + "\n")

### Checking the melting temperature ###
# Formula to be used: Melting point = 2*(A_counts+T_counts) + 4*(G_counts+C_counts)

def forward_melting_point():    
    forward_melting_points = []
    for primer in forward_primers_list:
        numFA = primer.count('a')
        numFT = primer.count('t')
        numFG = primer.count('g')
        numFC = primer.count('c')
        melting_point = 2 * (numFA + numFT) + 4 * (numFG + numFC)
        forward_melting_points.append(melting_point)
    return forward_melting_points

forward_melting_points = forward_melting_point()

def reverse_melting_point():
    reverse_melting_points = []
    for primer in reverse_primers_list:
        numRA = primer.count('a')
        numRT = primer.count('t')
        numRG = primer.count('g')
        numRC = primer.count('c')
        melting_point = 2 * (numRA + numRT) + 4 * (numRG + numRC)
        reverse_melting_points.append(melting_point)
    return reverse_melting_points

reverse_melting_points = reverse_melting_point() 

### Cheking if melting temperatures are between 55 and 62 degrees and if the difference between primers melting temperatures is less or equal to 4 degrees
position = -1
forward_positions_above_temp = []
forward_positions_below_temp = []
for i in forward_melting_points:
    position = position + 1
    if i > 62:
        forward_positions_above_temp.append(position)
    elif i < 55:
        forward_positions_below_temp.append(position)
reverse_positions_above_temp = []
reverse_positions_below_temp = []
for i in reverse_melting_points:
    position = position + 1
    if i > 62:
        reverse_positions_above_temp.append(position)
    elif i < 55:
        reverse_positions_below_temp.append(position)
if len(forward_positions_above_temp) > 0:
    print("This positions from the forward primers need to be revised because of high temperature: " + str(forward_positions_above_temp))
elif len(forward_positions_below_temp) > 0:
    print("This positions from the forward primers need to be revised because of low temperature: " + str(forward_positions_below_temp) + "\n") # we need to rise this temp manually
elif len(reverse_positions_above_temp) > 0:
    print("This positions from the reverse primers need to be revised because of high temperature: " + str(reverse_positions_above_temp))
elif len(reverse_positions_above_temp) > 0:
    print("This positions from the reverse primers need to be revised because of low temperature: " + str(reverse_positions_below_temp))
for i in forward_positions_below_temp:  # to rise the melting temperature of the primers
    first_index = forward_primers_list[i].find("a")  # find the first occurrence of the character
    second_index = forward_primers_list[i].find("a", first_index + 1)
    mod_string = forward_primers_list[i][:second_index] + forward_primers_list[i][second_index:].replace("a", "g", 2)  # changing 2 "a" for "g" to rise the melting point
    forward_primers_list[i] = mod_string

### Calculating again the melting point to check the changes made
forward_melting_points = forward_melting_point()
reverse_melting_points = reverse_melting_point()
print("Forward melting points are:" + str(forward_melting_points))
print("Reverse melting points are:" + str(reverse_melting_points) + "\n")

### Checking temperature difference between primers
flag = False
for i in range(12):
    if abs(forward_melting_points[i] - reverse_melting_points[i]) > 4:
        print("Melting point needs to be checked")
        flag = True
if flag == False:
    print("Difference between melting temperatures is optimal" + "\n")

### Export the primers with their corresponding headers into one file
primers_list = []
for i in range (0,12):
    primers_list.append(seq_names[i])
    primers_list.append(forward_primers_list[i])
    primers_list.append(reverse_primers_list[i])

def write_fasta(filename, data):
    with open(filename, 'w') as f:
        for entry in data:
            
            header, sequence = entry.split('\n', 1)  # split the header and sequence
            
            f.write(header + '\n')  # write the header to the file
            
            f.write(sequence.replace('\n', ''))  # write the sequence to the file, removing any newline characters
write_fasta("primers_file.txt", primers_list)
with open("primers_file.txt")as final_output:
    print("The primers for each sequence are:" + "\n" + str(final_output.read()))

24
This is the list for the forward primers['tacccaagaataggcctacc\n', 'taccaccttgtttgacacca\n', 'taccaccgatacaaagttct\n', 'tactcctatttttactccaa\n', 'tacgaatgaaggaagtttag\n', 'tacctgttacattctccaag\n', 'tacgggaataggaaactttc\n', 'taccgccttaggccgctaaa\n', 'tacgtttgcagagtctttgt\n', 'tacttttagggacgtagtag\n', 'tactgatgttttgcgtatct\n', 'taccttagaccaaagaggcc\n']
This is the list for the reverse primers['ctatgaagacgaaggaaccc\n', 'ttacctccaagctgaagcaa\n', 'tcacttcctgcatctccaag\n', 'ttacctccaagcacaagaag\n', 'tcatttccacgacgaaacgg\n', 'tcacttccatgcacaagatg\n', 'ctaacatttccaagcagaga\n', 'ctaagaacgaggcgtccaag\n', 'ctagtttgggtttggcttcc\n', 'ctaaaaccgccaagctgatg\n', 'tcaatcgaacggctgagatt\n', 'ttaacgccaagcggatgcga\n']

This positions from the forward primers need to be revised because of low temperature: [3, 4, 10]

Forward melting points are:[60, 58, 56, 56, 58, 56, 56, 62, 56, 56, 58, 60]
Reverse melting points are:[60, 58, 60, 58, 60, 58, 56, 62, 60, 60, 58, 62]

Difference between melting temperature