In [78]:
from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp as mt
from Bio.Alphabet import IUPAC
from pathlib import Path
from typing import Union
import pandas as pd

In [97]:
def generate_primers(dna, start, end, output_file: Union[str, Path],
                     codon='NNS', length_primer=15, tm=None, return_df=False):
    '''
    Generate primers for saturation mutagenesis.

    Parameters
    -----------
    dna : string
        DNA sequence containing the protein of study. 
        The DNA sequence should also contain at least 15 base pairs before the 
        starting ATG and 15 base pairs after the stop codon.

    start : string
        Beginning of the DNA sequence that will be mutageneized.
        For example, if you will start mutating the first methionine, copy a few more dna bases
        so the algorithm can identify it 'ATGACCAGC'.

    end : string
        The algorithm will stop creating primers once it reaches that base.
        For example, if you will stop mutating at the stop codon, copy a few more dna bases ie. 'TAAATGATT'.

    output_file : str
        File where the list of primers will be exported to. Only exports to excel.
        Example: 'path/primers.xlsx'. 

    codon : str, default 'NNS'
        Degenerate codon that will be used to create the primers. Check idt's website for a list
        of all mixed bases and letter code (https://www.idtdna.com/pages/products/custom-dna-rna/mixed-bases).
        This parameter should contain 3 letters, although can contain more.
        
    length_primer: int, default 15
        Number of bases that the primers will have to each side of the mutated codon.
        Total primer length will be 2*length_primer+3.

    tm : int, default None
        Melting temperature in Celsius of the primers. Will override length_primer.
        If none, primers will have a total length of 2*length_primer+3

    return_df : boolean, default False
        If true, will export a dataframe with the primers.

    Returns
    --------
    df : pandas dataframe, optional
        Dataframe containing the primers.

    '''
    # Transform to upper case
    dna = dna.upper()

    # Find the first and last codons
    start_codon = dna.find(start.upper())
    end_codon = dna.find(end.upper())

    # loop through DNA and make a list with fp and second list with rp

    label_fp = ['fp '+str(i) for i in range(0, int((end_codon-start_codon)/3))]
    label_rp = ['rp '+str(i) for i in range(0, int((end_codon-start_codon)/3))]
    forward_primers, reverse_primers = _create_primers_list(
        dna, start_codon, end_codon, codon, length_primer, tm)

    # Create dataframe
    dictionary = {'FP_label': label_fp, 'FP_seq': forward_primers,
                  'RP_label': label_rp, 'RP_seq': reverse_primers}
    df = pd.DataFrame(dictionary)

    # Export dataframe
    if output_file:
        df.to_excel(Path(output_file), sheet_name='Primers', index=False)

    # Return dataframe
    if return_df:
        return df

In [96]:
def _create_primers_list(dna, start_codon, end_codon, codon, length_primer, tm):
    '''Aux function to create list with fp and list with rp'''
    forward_primers = []
    reverse_primers = []
    for codonposition in range(start_codon, end_codon, 3):
        # Create fp, rp for that position
        fp, rp = _primerdesign(dna, codon, codonposition, length_primer, tm)
        # Append to list
        forward_primers.append(fp)
        reverse_primers.append(rp)
    return forward_primers, reverse_primers


def _reverse_complement(dna):
    '''aux function that uses biopython to calculate the reverse complement of a DNA string.
    Includes mixed-base code. More info in https://biopython.org/docs/1.75/api/Bio.Seq.html'''

    # Needs to be converted to str
    reverse_dna = str(Seq(dna, IUPAC.ambiguous_dna).reverse_complement())

    return reverse_dna


def _primerdesign(dna, codon, codonposition, length_primer, tm):
    '''aux function to design the degenerate primers given a sequence and a codon position. 
    The length of the primer is fixed.

    Parameters
    -----------
    dna : string
        DNA sequence containing the protein of study. 
        The DNA sequence should also contain at least 15 base pairs before the 
        starting ATG and 15 base pairs after the stop codon.

    codon : str
        Degenerate codon that will be used to create the primers. Check idt's website for a list
        of all mixed bases and letter code (https://www.idtdna.com/pages/products/custom-dna-rna/mixed-bases).

    codonposition : int
        Position of the codon  to mutate with respect to the gene.

    length_primer : int
        Number of bases that the primers will have to each side of the mutated codon.

    tm : int
        melting temperature that the primers need to have

    Returns
    ---------
    forward_primer, reverse_primer
    '''

    if tm:
        # loop until tm is achieved
        x = meltingT_fp = 6
        while meltingT_fp < tm:
            forward_primer = dna[(codonposition-x):codonposition] + \
                codon + dna[(codonposition+3):(codonposition+x)]
            meltingT_fp = mt.Tm_NN(forward_primer)
            x += 1
    else:
        forward_primer = dna[(codonposition-length_primer):codonposition] + \
            codon + dna[(codonposition+3):(codonposition+length_primer+3)]

    reverse_primer = _reverse_complement(forward_primer)
    return forward_primer, reverse_primer

In [93]:
'''dna = 'TGTACAGTAATACAAGGGGTGTTATGGAAAAAATTATGCCGGAAGAAGAATACAGCGAATTTAAAGAACTGATTCTGCAGAAGGAACTGCACGTGGTGTATGCACTGAGCCACGTGTGTGGCCAGGATCGTACCCTGCTGGCCAGTATCTTACTGCGCATCTTTCTGCACGAGAAGCTGGAGAGCCTGTTACTGTGCACACTGAACGATCGCGAGATCAGCATGGAAGATGAAGCCACCACCCTGTTCCGCGCAACAACCCTGGCCAGCACCCTGATGGAGCAGTATATGAAAGCCACCGCCACCCAGTTCGTGCATCATGCCCTGAAAGATAGCATTTTAAAAATTATGGAAAGCAAACAGAGCTGCGAACTGAGCCCGAGCAAGCTGGAGAAAAACGAGGACGTGAACACCAACCTGACCCACCTGCTGAACATTCTGAGCGAACTGGTGGAAAAAATCTTTATGGCAAGCGAAATCCTGCCTCCGACCCTGCGTTACATCTACGGCTGCCTGCAGAAGAGCGTGCAGCATAAATGGCCGACCAATACCACCATGCGCACACGTGTGGTGAGCGGTTTTGTGTTCCTGCGTCTGATCTGCCCGGCAATCCTGAACCCGCGCATGTTCAACATCATTAGCGACAGCCCGAGTCCTATCGCAGCACGTACCCTGATCCTGGTGGCAAAAAGCGTGCAAAATCTGGCCAACCTGGTGGAATTTGGCGCCAAAGAGCCGTACATGGAAGGCGTGAATCCGTTTATCAAAAGTAACAAACATCGCATGATCATGTTCCTGGACGAACTGGGCAACGTTCCGGAACTGCCGGATACAACCGAACATAGTCGCACAGACCTGAGTCGTGACCTGGCCGCCCTGCATGAAATCTGCGTGGCCCATAGCGATGAGCTGCGCACACTGAGCAACGAGCGTGGCGCCCAGCAGCACGTGCTGAAGAAACTGCTGGCCATTACCGAACTGCTGCAACAAAAGCAGAACCAGTACACCAAAACCAACGACGTGCGTtatccgtatgatgtgccggattatgcgTAAccatcacttggctagaggcatc'
start = ('ATGGAAAAAATTATGCCGGAAGAA')
end = ('tatccgtatgatgtgccggattatgcgTA')

df_prim = generate_primers(dna, start, end, output_file=None,
                           codon='NNS', length_primer=15, tm=60, return_df=True)'''

In [None]:
'''def _enumerate_dnavariants(wtSeqList, codon_list, dna_sequence):

    # First instance that we see the wt?
    firstwtseq = False

    # Loop over codons
    for position in range(0,len(wtSeqList)):
        for codons in (codon_list):
            variant = ''.join(wtSeqList[0:position]) + \
                codons + ''.join(wtSeqList[position+1:])
            if (variant == dna_sequence):  # Store redundant wild-types
                if firstwtseq:
                    variant = 'wtSeq' + str(position)
                firstwtseq = True
            variants[variant] = 0
    return variants'''

# options to return all mutants, or filter out synonymous wt alleles