In [2]:
import numpy as np
import pandas as pd
import  re
from Bio.SeqUtils import MeltingTemp as mt
from Bio.Seq import Seq
from Bio.SeqUtils.MeltingTemp import make_table, DNA_NN2
import pyperclip
#https://biopython.org/docs/1.75/api/Bio.SeqUtils.MeltingTemp.html
Breslauer= mt.DNA_NN1
Santalucia= mt.DNA_NN4 #same as benchling
Tm_Opt =Santalucia

In [10]:
def reverse_complement(Seq):
    Rv=''
    for bp in Seq.upper():
        if bp == 'A':
            Rv+='T'
        if bp == 'T' or bp == 'U':
            Rv+='A'
        if bp == 'C':
            Rv+='G'
        if bp == 'G':
            Rv+='C'
    assert len(Rv)==len(Seq), "There was an error reversing, Check Input Sequence"
    return Rv[::-1]

def Anneal2Patch(Patch, Reverse):
    Patch=Patch.upper()
    if Reverse == True:
        Patch = reverse_complement(Patch)
        
    GC_clamps=re.finditer("[G|C][G|C]",Patch[20:])

    for clamp in GC_clamps:
        found=0
        #3' of primer
        ThreeP = clamp.span()[1]+20 #position on patch
        for bp in [0,+1,-1,+2,-2]: #allows the primer length to vary a little
            FiveP = ThreeP  - 20 + bp

            Anneal=Patch[FiveP:ThreeP] #primer binding site

            #find if Annealing region has AT region of 3bp or more
            #the following will detect: streches of A or T of length 3+
            #AT region of length 5+
            ATs=re.findall("AAA+|TTT+|[A/T][A/T][A/T][A/T][A/T]+",Anneal) #[A/T][A/T][A/T]+
    
            
            Tm=mt.Tm_NN( Anneal , nn_table=Tm_Opt, check=False)
            
            if 48 <Tm and Tm< 54 and len(ATs)==0:
                #print(Anneal,ThreeP)
                #here we return the primers left most position on the patch 
                return Anneal, ThreeP if Reverse == False else len(Patch) - ThreeP 
    return None, None



def design_sequencing_primers(Seq):
    print('Has len', len(Seq))

    Primers_List={}
    patch=Seq[0:200]
    primer_num=1
    Primer_Fwd, Patch_Pos= Anneal2Patch(patch, Reverse=False)
    Primers_List['Primer'+str(primer_num)] = Primer_Fwd
    primer_num+=1

    Current_pos= Patch_Pos #right most limit of any most right primer

    while len(Seq) - Current_pos > 1300: #the bckwd primer on XII-5 down can cover 300bp of insert

      #  print('Current_pos start',Current_pos)

        Primer_Bwd = None
        down=0 #used to enlarge search area
        #this while loop expands search area to the left by 50bp if a backwd primer cant be found
        while Primer_Bwd == None:
            #patch_up: patch upper limit
            patch_up=min(Current_pos + 1900 , len(Seq) ) #avoids going beyond seq length
            patch_down= patch_up-150-down
            patch_Bwd=Seq[ patch_down:patch_up] #landing area for back primer
            Primer_Bwd, Primer_Pos = Anneal2Patch(patch_Bwd, Reverse=True) #finds bckd primer
            down+=50
        Primers_List['Primer'+str(primer_num)] = Primer_Bwd
        primer_num+=1

        Current_pos = patch_down + Primer_Pos 
       # print('Current_pos after back',Current_pos, patch_up,patch_down )
        #tries to find primer in the 50 bp immediately before the 200bp buffer region
         #if primer is not found tries to go down an extra 50bp:
        Primer_Fwd = None
        down=50
        while Primer_Fwd == None:

            patch_up = Current_pos-200
            patch_down = patch_up - down
            patch_Fwd=Seq[ patch_down: patch_up] #landing area for forward primer
            Primer_Fwd, Primer_pos = Anneal2Patch(patch_Fwd, Reverse=False)
            down+=50
        Primers_List['Primer'+str(primer_num)] =Primer_Fwd
        primer_num+=1


        Current_pos = patch_down + Primer_pos  
      #  print('Current_pos after front',Current_pos)
        print('... Done' )
        
    return Primers_List
    


### Imput your DNA sequence to be sequenced

In [4]:
#when sequencing a user integration plasmid, copy from homology up, to the end of your insert
#this script will always find the same 1st fwd primer (tgagtcaagttaggtcatcc) 
#you can also always use the last reverse primer ()
Sequences={}
Sequences['insert_pCCM001']='caatctggcggcttgagttctcaacatgttttattttttacttatattgctggtagggtaaaaaaatataactcctaggaataggttgtctatatgtttttgtcttgcttctataattgtaacaaacaaggaaagggaaaatactgggtgtaaaagccattgagtcaagttaggtcatcccttttatacaaaatttttcaattttttttccaagattcttgtacgattaattattttttttttgcgtcctacagcgtgatgaaaatttcgcctgctgcaagatgagcgggaacgggcgaaatgtgcacgcgcacaacttacgaaacgcggatgagtcactgacagccaccgcagaggttctgactcctactgagctctattggaggtggcagaaccggtaccggaggaggccgctataaccggtttgaatttattgtcacagtgtcacatcagcattaagtcctcagcgagctcgcatggaatgcgtgcgatgagcgacctcatgctatacctgagaaagcaacctgacctacaggaaagagttactcaagaataagaattttcgttttaaaacctaagagtcactttaaaatttgtatacacttattttttttataacttatttaataataaaaatcataaatcataagaaattcgcttatttagaagtgtcaacaacgtatctaccaacggaatgCGTGCGAtGATCCAGGCAACTTTAGTGCtgacacataatactccataggtattttattatacaaataatgtgtttgaacttattaaaacattcttttaaggtataaacaacaggcatatatatatgtgtgcgacgacacatgatcatatggcatgcatgtgctctgtatgtatataaaactcttgttttcttcttttctctaaatattctttccttatacattaggacctttgcagcataaattactatacttctatagacacacaaacacaaatacacacactaaattaataatctgtcataaaacaatgtctaaaggtgaagaattattcactggtgttgtcccaattttggttgaattagatggtgatgttaatggtcacaaattttctgtctccggtgaaggtgaaggtgatgctacttacggtaaattgaccttaaaatttatttgtactactggtaaattgccagttccatggccaaccttagtcactactttcggttatggtgttcaatgttttgcgagatacccagatcatatgaaacaacatgactttttcaagtctgccatgccagaaggttatgttcaagaaagaactatttttttcaaagatgacggtaactacaagaccagagctgaagtcaagtttgaaggtgataccttagttaatagaatcgaattaaaaggtattgattttaaagaagatggtaacattttaggtcacaaattggaatacaactataactctcacaatgtttacatcatggctgacaaacaaaagaatggtatcaaagttaacttcaaaattagacacaacattgaagatggttctgttcaattagctgaccattatcaacaaaatactccaattggtgatggtccagtcttgttaccagacaaccattacttatccactcaatctgccttatccaaagatccaaacgaaaagagagaccacatggtcttgttagaatttgttactgctgctggtattacccatggtatggatgaattgtacaaataaatcgcgtgcattcatccgctctaaccgaaaaggaaggagttagacaacctgaagtctaggtccctatttatttttttatagttatgttagtattaagaacgttatttatatttcaaatttttcttttttttctgtacagacgcgtgtacgcatgtaacattatactgaaaaccttgcttgagaaggttttgggacgctcgaAGATCGCGTCAGCTGAAatcgcgtgcattc'
Sequences['insert_pCCM002']='caatctggcggcttgagttctcaacatgttttattttttacttatattgctggtagggtaaaaaaatataactcctaggaataggttgtctatatgtttttgtcttgcttctataattgtaacaaacaaggaaagggaaaatactgggtgtaaaagccattgagtcaagttaggtcatcccttttatacaaaatttttcaattttttttccaagattcttgtacgattaattattttttttttgcgtcctacagcgtgatgaaaatttcgcctgctgcaagatgagcgggaacgggcgaaatgtgcacgcgcacaacttacgaaacgcggatgagtcactgacagccaccgcagaggttctgactcctactgagctctattggaggtggcagaaccggtaccggaggaggccgctataaccggtttgaatttattgtcacagtgtcacatcagcattaagtcctcagcgagctcgcatggaatgcgtgcgatgagcgacctcatgctatacctgagaaagcaacctgacctacaggaaagagttactcaagaataagaattttcgttttaaaacctaagagtcactttaaaatttgtatacacttattttttttataacttatttaataataaaaatcataaatcataagaaattcgcttatttagaagtgtcaacaacgtatctaccaacggaatgCGTGCGAtAGCGGATAACAATTTCACACAggaaacagctatgaccatgattacgccaagcgcgcaattaaccctcactaaagggaacaaaagctggagctcttcttaggcacaacaatatttataaaagaagaatgtaagtatgacgacgaaaacgaaaagactaatagtttcatataatactaaacgatttcaattttcgttttatatccttcctattcataaattttatttcgctatgccaataatgagtggatgtatagttgatatctgtttcgataattttaaagtagatagtaaaatcttactgacatgaataatccgtccaaaactagattactgtatacaagcatatttaaaaaaataatgaaactatgatcaaaaaactgtgctactacagcggtgttgttatccgatacaaccggatatttttcttttaatgagtctaaaccgtgatagcttcaggttaatacaatcaaaaaaagctcaaatattcttttaatgccgcgttcacagattccaattgaatacaactaggtagttcattatatgaagcctttgctactatttttcactatagtctgccttcaccttaatgcagacatccacatattttaatcactttaaaataaaaaggaagatatattagaagctatgatccaatctgtaagccagattaaaattcacgaactcttctttcatttgaattgaatgctttgagttggggtagattatcgcaaattactcatcacatttattgactacgaacttgctgatgtcctttttttatttatatttttcttcagtgaagcgattttttttttacacagaccaagacggaaaaaagtagctaaggaagaaaacaaaatcatgaaaaaaatgtgaagtgatcatgcacatcgcatcaacttaaacattggcttagagatatatagagttagagtttacggcaacctttaagcaccaataccttttggcatagtctaaagacctggttcttaattttaaacaaatttaactaaagatttccctatcaaagaagtaacgagttgacagattttctcaaaataaatcgatactgcatttctaggcatatccagcgatctgtcataaaacaatggaattgagacacttgagatacttcgttgccgttgttgaagaacaatcttttacaaaggctgccgacaagttgtgtattgctcaaccaccattatccagacaaatccaaaacttggaagaagaattgggtatccaattattggaaagaggttccagaccagttaagactactccagaaggtcatttcttttaccaatacgccatcaagttgctaagcaacgttgatcaaatggtcagtatgaccaagagaattgcctctgttgaaaagaccattagaatcggttttgttggttccttgttgttcggtttgttgccaagaattatccacttgtacagacaagctcatccaaacttgagaatcgaattatacgaaatgggtactaaggctcaaaccgaagctttgaaagaaggtagaattgacgctggttttggtagattgaagatttctgatccagccatcaagagaaccttgttgagaaacgaaagattgatggttgctgttcatgcttcccatccattgaatcaaatgaaggataagggtgttcacttgaacgatttgatcgacgaaaagatcttgttgtacccatcttctccaaagccaaacttctctactcatgttatgaacatcttctctgaccatggtttggaacctaccaagattaacgaagttagagaagtccaattggccttgggtttggttgctgctggtgaaggtatttcattggttccagcttctacccaatccattcaattattcaacttgtcctacgtcccattattagatccagatgctattaccccaatctacattgctgttagaaacatggaagaatccacctacatctactcattatacgaaaccatcagacaaatctacgcctacgaaggttttactgaaccaccaaattggtaAAGCGGCCGCATTCatcgcgtgcattcatccgctctaaccgaaaaggaaggagttagacaacctgaagtctaggtccctatttatttttttatagttatgttagtattaagaacgttatttatatttcaaatttttcttttttttctgtacagacgcgtgtacgcatgtaacattatactgaaaaccttgcttgagaaggttttgggacgctcgaagatcgcgtacccaattcgccctatagtgagtcg'
Sequences['insert_pCCM003']='caatctggcggcttgagttctcaacatgttttattttttacttatattgctggtagggtaaaaaaatataactcctaggaataggttgtctatatgtttttgtcttgcttctataattgtaacaaacaaggaaagggaaaatactgggtgtaaaagccattgagtcaagttaggtcatcccttttatacaaaatttttcaattttttttccaagattcttgtacgattaattattttttttttgcgtcctacagcgtgatgaaaatttcgcctgctgcaagatgagcgggaacgggcgaaatgtgcacgcgcacaacttacgaaacgcggatgagtcactgacagccaccgcagaggttctgactcctactgagctctattggaggtggcagaaccggtaccggaggaggccgctataaccggtttgaatttattgtcacagtgtcacatcagcattaagtcctcagcgagctcgcatggaatgcgtgcgatgagcgacctcatgctatacctgagaaagcaacctgacctacaggaaagagttactcaagaataagaattttcgttttaaaacctaagagtcactttaaaatttgtatacacttattttttttataacttatttaataataaaaatcataaatcataagaaattcgcttatttagaagtgtcaacaacgtatctaccaacggaatgCGTGCGAtATCTAGATCAGAGGGTGGTaaatgaagtgtaatagtattcatttttcttataaatcatcccttccgtgatttatacaaaagaagaggagaatatgctgaatacttggtatattactctacattatactcttatcttgacgggtattctgagcatcttactcagtttcaagatcttttaatgtccaaaaacatttgagccgatctaaatacttctgtgttttcattaatttataaattgtactcttttaagacatggaaagtaccaacatcggttgaaacagtttttcatttacttatggtttattggtttttccagtgaatgattatttgtcgttaccctttcgtaaaagttcaaacacgtttttaagtattgtttagttgctctttcgacatatatgattatccctgcgcggctaaagttaaggatgcaaaaaacataagacaactgaagttaatttacgtcaattaagttttccagggtaatgatgttttgggcttccactaattcaataagtatgtcatgaaatacgttgtgaagagcatccagaaataatgaaaagaaacaacgaaactgggtcggcctgttgtttcttttctttaccacgtgatctgcggcatttacaggaagtcgcgcgttttgcgcagttgttgcaacgcagctacggctaacaaagcctagtggaactcgactgatgtgttagggcctaaaactggtggtgacagctgaagtgaactattcaatccaatcatgtcatggctgtcacaaagaccttgcggaccgcacgtacgaacacatacgtatgctaatatgtgttttgatagtacccagtgatcgcagacctgcaatttttttgtaggtttggaagaatatataaaggttgcactcattcaagatagtttttttcttgtgtgtctattcattttattattgtttgtttaaatgttaaaaaaaccaagaacttagtttcaaaTTAAATTCATCACACAAACAAACAAAACAAAAGCCCTCTtACAATGACTCAATTCACTGACATTGATaagctagccgtctccaccataagaattttggctgtggacaccgtatccaaggccaactcaggtcacccaggtgctccattgggtatggcaccagctgcacacgttctatggagtcaaatgcgcatgaacccaaccaacccagactggatcaacagagatagatttgtcttgtctaacggtcacgcggtcgctttgttgtattctatgctacatttgactggttacgatctgtctattgaagacttgaaacagttcagacagttgggttccagaacaccaggtcatcctgaatttgagttgccaggtgttgaagttactaccggtccattaggtcaaggtatctccaacgctgttggtatggccatggctcaagctaacctggctgccacttacaacaagccgggctttaccttgtctgacaactacacctatgttttcttgggtgacggttgtttgcaagaaggtatttcttcagaagcttcctccttggctggtcatttgaaattgggtaacttgattgccatctacgatgacaacaagatcactatcgatggtgctaccagtatctcattcgatgaagatgttgctaagagatacgaagcctacggttgggaagttttgtacgtagaaaatggtaacgaagatctagccggtattgccaaggctattgctcaagctaagttatccaaggacaaaccaactttgatcaaaatgaccacaaccattggttacggttccttgcatgccggctctcactctgtgcacggtgccccattgaaagcagatgatgttaaacaactaaagagcaaattcggtttcaacccagacaagtcctttgttgttccacaagaagtttacgaccactaccaaaagacaattttaaagccaggtgtcgaagccaacaacaagtggaacaagttgttcagcgaataccaaaagaaattcccagaattaggtgctgaattggctagaagattgagcggccaactacccgcaaattgggaatctaagttgccaacttacaccgccaaggactctgccgtggccactagaaaattatcagaaactgttcttgaggatgtttacaatcaattgccagagttgattggtggttctgccgatttaacaccttctaacttgaccagatggaaggaagcccttgacttccaacctccttcttccggttcaggtaactactctggtagatacattaggtacggtattagagaacacgctatgggtgccataatgaacggtatttcagctttcggtgccaactacaaaccatacggtggtactttcttgaacttcgtttcttatgctgctggtgccgttagattgtccgctttgtctggccacccagttatttgggttgctacacatgactctatcggtgtcggtgaagatggtccaacacatcaacctattgaaactttagcacacttcagatccctaccaaacattcaagtttggagaccagctgatggtaacgaagtttctgccgcctacaagaactctttagaatccaagcatactccaagtatcattgctttgtccagacaaaacttgccacaattggaaggtagctctattgaaagcgcttctaagggtggttacgtactacaagatgttgctaacccagatattattttagtggctactggttccgaagtgtctttgagtgttgaagctgctaagactttggccgcaaagaacatcaaggctcgtgttgtttctctaccagatttcttcacttttgacaaacaacccctagaatacagactatcagtcttaccagacaacgttccaatcatgtctgttgaagttttggctaccacatgttggggcaaatacgctcatcaatccttcggtattgacagatttggtgcctccggtaaggcaccagaagtcttcaagttcttcggtttcaccccagaaggtgttgctgaaagagctcaaaagaccattgcattctataagggtgacaagctaatttcTCCTTTGAAAAAAGCTTTCTGAACACAGGCtGTGAATTTACTTTAAATCTTGCATTTAAATAAATTTTCTTTTTATAGCTTTATGACTTAGTTTCAATTTATATACTATTTTAATGACATTTTCGATTCATTGATTGAAAGCTTTGTGTTTTTTCTTGATGCGCTATTGCATTGTTCTTGTCTTTTTCGCCACATGTAATATCTGTAGTAGATACCTGATACATTGTGGATGCTGAGTGAAATTTTAGTTAATAATGGAGGCGCTCTTAATAATTTTGGGGATATTGGCTTTTTTTTTTAAAGTTTACAAATGAATTTTTTCCGCCAGGATAACGATTCTGAAGTTACTCTTAGCGTTCCTATCGGTACAGCCATCAAATCATGCCTATAAATCATGCCTATATTTGCGTGCAGTCAGTATCATCTACATGAAAAAAACTCCCGCAATTTCTTATAGAATACGTTGAAAATTAAATGTACGCGCCAAGATAAGATAACATATATCTAGATGCAGTAATATACACAGATTCCCGCGGACGTGGGAAGGAAAAAATTAGATAACAAAATCTGAGTGATATGGAAATTCCGCTGTATAGCTCATATCTTTCCCTACCTGCAGAaTCGCGTGcattc'
Sequences['insert_pCCM004']='caatctggcggcttgagttctcaacatgttttattttttacttatattgctggtagggtaaaaaaatataactcctaggaataggttgtctatatgtttttgtcttgcttctataattgtaacaaacaaggaaagggaaaatactgggtgtaaaagccattgagtcaagttaggtcatcccttttatacaaaatttttcaattttttttccaagattcttgtacgattaattattttttttttgcgtcctacagcgtgatgaaaatttcgcctgctgcaagatgagcgggaacgggcgaaatgtgcacgcgcacaacttacgaaacgcggatgagtcactgacagccaccgcagaggttctgactcctactgagctctattggaggtggcagaaccggtaccggaggaggccgctataaccggtttgaatttattgtcacagtgtcacatcagcattaagtcctcagcgagctcgcatggaatgcgtgcgatgagcgacctcatgctatacctgagaaagcaacctgacctacaggaaagagttactcaagaataagaattttcgttttaaaacctaagagtcactttaaaatttgtatacacttattttttttataacttatttaataataaaaatcataaatcataagaaattcgcttatttagaagtgtcaacaacgtatctaccaacggaatgCGTGCGAtACTGGTAGAGAGCGACTTTgtatgccccaattgcgaaacccgcgatatccttctcgattctttagtacccgaccaggacaaggaaaaggaggtcgaaacgtttttgaagaaacaagaggaactacacggaagctctaaagatggcaaccagccagaaactaagaaaatgaagttgatggatccaactggcaccgctggcttgaacaacaataccagccttccaacttctgtaaataacggcggtacgccagtgccaccagtaccgttacctttcggtatacctcctttccccatgtttccaatgcccttcatgcctccaacggctactatcacaaatcctcatcaagctgacgcaagccctaagaaatgaataacaatactgacagtactaaataattgcctacttggcttcacatacgttgcatacgtcgatatagataataatgataatgacagcaggattatcgtaatacgtaatagttgaaaatctcaaaaatgtgtgggtcattacgtaaataatgataggaatgggattcttctatttttcctttttccattctagcagccgtcgggaaaacgtggcatcctctctttcgggctcaattggagtcacgctgccgtgagcatcctctctttccatatctaacaactgagcacgtaaccaatggaaaagcatgagcttagcgttgctccaaaaaagtattggatggttaataccatttgtctgttctcttctgactttgactcctcaaaaaaaaaaaatctacaatcaacagatcgcttcaattacgccctcacaaaaacttttttccttcttcttcgcccacgttaaattttatccctcatgttgtctaacggatttctgcacttgatttattataaaaagacaaagacataatacttctctatcaatttcagttattgttcttccttgcgttattcttctgttcttctttttcttttgtcatatataacCATAACCAAGTAATACATATTCAAAAGCCCTCTtACAATGCCATCCAAGTTGGccattacctctatgtctttgggtagatgttatgccggtcattctttcactactaagttggatatggctagaaagtacggttaccaaggtttggaattattccatgaagatttggctgatgtcgcctatagattgtctggtgaaactccatctccatgtggtccatcaccagctgctcaattgtctgctgctagacaaattttgagaatgtgccaagtcagaaacatcgaaatcgtttgcttgcaaccattctctcaatacgatggtttgttggatagagaagaacacgaaagaagattggaacaattggaattctggatcgaattggcccatgaattggataccgatattattcaaattccagccaacttcttgccagccgaagaagttactgaagatatctctttgattgtctccgacttgcaagaagtagctgatatgggtttacaagctaacccaccaattagattcgtttacgaagctttgtgttggtccactagagttgatacttgggaaagatcttgggaagttgttcaaagagttaacagaccaaacttcggtgtttgtttggacacttttaacattgccggtagagtttatgctgatccaactgttgcttctggtagaactccaaatgctgaagaagctatcagaaagtccattgccagattggttgaaagagttgacgtttccaaggttttctacgttcaagttgttgatgccgaaaagttgaagaaaccattggttccaggtcacagattctatgatccagaacaaccagctagaatgtcttggtctagaaactgcagattattctacggtgaaaaggatagaggtgcttacttgccagtaaaagaaattgcttgggcttttttcaacggtttgggttttgaaggttgggtttccttagaattattcaacagaagaatgtccgataccggttttggtgttccagaagaattagctagaagaggtgctgtttcttgggctaaattggttagagatatgaagatcaccgttgactctccaactcaacaacaagctacacaacaacctatcagaatgttgtCTTTGTCAGCTGCTTTGTGAACACAGGCtATAGATTAATTTAAACAGTATATGTACAGTTTtatatatatatatatatatatatacatatataaagaaacctgtgcgttttttgtattttcaaatacatttagttttggcgccacttcTATAAAAGGTCTCATGATATTGTTACatcgcgtgcattc'
Sequences['insert_pCCM005']='caatctggcggcttgagttctcaacatgttttattttttacttatattgctggtagggtaaaaaaatataactcctaggaataggttgtctatatgtttttgtcttgcttctataattgtaacaaacaaggaaagggaaaatactgggtgtaaaagccattgagtcaagttaggtcatcccttttatacaaaatttttcaattttttttccaagattcttgtacgattaattattttttttttgcgtcctacagcgtgatgaaaatttcgcctgctgcaagatgagcgggaacgggcgaaatgtgcacgcgcacaacttacgaaacgcggatgagtcactgacagccaccgcagaggttctgactcctactgagctctattggaggtggcagaaccggtaccggaggaggccgctataaccggtttgaatttattgtcacagtgtcacatcagcattaagtcctcagcgagctcgcatggaatgcgtgcgatgagcgacctcatgctatacctgagaaagcaacctgacctacaggaaagagttactcaagaataagaattttcgttttaaaacctaagagtcactttaaaatttgtatacacttattttttttataacttatttaataataaaaatcataaatcataagaaattcgcttatttagaagtgtcaacaacgtatctaccaacggaatgCGTGCGAtAAGGATGAGCCAAGAATAAGGgaacaagattttggtaatttccaaaaaatcaatagcatgcaggacgttatgaagaagagatctacgtatggtcatttcttcttcagattccctcatggagaaagtgcggcagatgtatatgacagagtcgccagtttccaagagactttattcaggcacttccatgataggcaagagagaagacccagagatgttgttgtcctagttacacatggtatttattccagagtattcctgatgaaatggtttagatggacatacgaagagtttgaatcgtttaccaatgttcctaacgggagcgtaatggtgatggaactggacgaatccatcaatagatacgtcctgaggaccgtgctacccaaatggactgattgtgagggagacctaactacatagtgtttaaagattacggatatttaacttacttagaataatgccatttttttgagttataataatcctacgttagtgtgagcgggatttaaactgtgaggaccttaatacattcagacacttctgcggtatcaccctacttattcccttcgagattatatctaggaacccatcaggttggtggaagattacccgttctaagacttttcagcttcctctattgatgttacacctggacaccccttttctggcatccagtttttaatcttcagtggcatgtgagattctccgaaattaattaaagcaatcacacaattctctcggataccacctcggttgaaactgacaggtggtttgttacgcatgctaatgcaaaggagcctatatacctttggctcggctgctgtaacagggaatataaagggcagcataatttaggagtttagtgaacttgcaacatttactattttcccttcttacgtaaatatttttctttttaattctaaatcaatctttttcaattttttgtttgtattcttttcttgcttaaatctataACTACAAAAAACACATACATAAACTAAAAAGCCCTCTtACCATGGGTACCACTCTtgacgacacggcttaccggtaccgcaccagtgtcccgggggacgccgaggccatcgaggcactggatgggtccttcaccaccgacaccgttttccgcgtcaccgccaccggggacggcttcaccctgcgggaggtgccggtggacccgcccctgaccaaggtgttccccgacgacgaatcggacgacgaatcggacgacggggaggacggcgacccggactcccggacgttcgtcgcgtacggggacgacggcgacctggcgggcttcgtggtcgtctcgtactccggctggaaccgccggctgaccgtcgaggacatcgaggtcgccccggagcaccgggggcacggggtcgggcgcgcgttgatggggctcgcgacggagttcgcccgcgagcggggcgccgggcacctctggctggaggtcaccaacgtcaacgcaccggcgatccacgcgtaccggcggatggggttcaccctctgcggcctggacaccgccctgtacgacggcaccgcctcggacggcgagcaggcgctctacatgagcaTGCCCTGCCCCTAAACACAGGCtGCTTCTCGAGAAAAACAAAAGAGTTAataataaagtatatatgcttttttactattaataaagtttcttattttccctacatgtatacatacatataaatatactcttttGGAAAAAATTTCTTTTTTTTTTTTTTAATTTTatcgcgtgcattc'
Sequences['insert_pCCM006']='caatctggcggcttgagttctcaacatgttttattttttacttatattgctggtagggtaaaaaaatataactcctaggaataggttgtctatatgtttttgtcttgcttctataattgtaacaaacaaggaaagggaaaatactgggtgtaaaagccattgagtcaagttaggtcatcccttttatacaaaatttttcaattttttttccaagattcttgtacgattaattattttttttttgcgtcctacagcgtgatgaaaatttcgcctgctgcaagatgagcgggaacgggcgaaatgtgcacgcgcacaacttacgaaacgcggatgagtcactgacagccaccgcagaggttctgactcctactgagctctattggaggtggcagaaccggtaccggaggaggccgctataaccggtttgaatttattgtcacagtgtcacatcagcattaagtcctcagcgagctcgcatggaatgcgtgcgatgagcgacctcatgctatacctgagaaagcaacctgacctacaggaaagagttactcaagaataagaattttcgttttaaaacctaagagtcactttaaaatttgtatacacttattttttttataacttatttaataataaaaatcataaatcataagaaattcgcttatttagaagtgtcaacaacgtatctaccaacggaatgCGTGCGAtCAGCATTTTCAAAGGTGTgttcttcgtcagacatgttttagtgtgtgaatgaaataggtgtatgttttctttttgctagacaataattaggaacaaggtaagggaactaaagtgtagaataagattaaaaaagaagaacaagttgaaaaggcaagttgaaatttcaagaaaaaagtcaattgaagtacagtaaattgacctgaatatatctgagttccgacaacaatgagtttaccaaagagaacaatggaataggaaactttgaacgaagaaaggaaagcaggaaaggaaaaaatttttaggctcgagaacaatagggcgaaaaaacaggcaacgaacgaacaatggaaaaacgaaaaaaaaaaaaaaaaacacagaaaagaatgcagaaagatgtcaactgaaaaaaaaaaaggtgaacacaggaaaaaaaataaaaaaaaaaaaaaaaaaaggaggacgaaacaaaaaagtgaaaaaaaatgaaaatttttttggaaaaccaagaaatgaattatatttccgtgtgagacgacatcgtcgaatatgattcagggtaacagtattgatgtaatcaatttcctacctgaatctaaaattcccgggagcaagatcaagatgttttcaccgatctttccggtctctttggccggggtttacggacgatggcagaagaccaaagcgccagttcatttggcgagcgttggttggtggatcaagcccacgcgtaggcaatcctcgagcagatccgccaggcgtgtatatatagcgtggatggccaggcaactttagtgctgacacatacaggcatatatatatgtgtgcgacgacacatgatcatatggcatgcatgtgctctgtatgtatataaaactcttgttttcttcttttctctaaatattctttccttatacattaggacctttgcagcataaattactatacttctatagacacacaAACACAAATACACACACTAAATTAATAAGCCCTCTtACAATGTCCCAAGCTTTCAccgaatctgttaagacttctttgggtccaaatgctactccaagagctaaaaagttgattgcctctttggttcaacacgttcatgatttcgctagagaaaaccatttgactaccgaagattggttgtggggtgttgatttcattaacagaattggtcaaatgtccgactccagaagaaacgaaggtattttggtttgcgatatcatcggtttggaaaccttggttgatgctttgactaacgaatccgaacaatctaaccatacctcctctgctattttgggtcctttttacttgccagattctccagtttatccaaacggtggttctatcgttcaaaaggctattccaactgatgttaagtgcttcgttagaggtaaggttactgatactgaaggtaaaccattgggtggtgctcaattggaagtttggcaatgtaattctgctggtttctactctcaacaagctgatcatgatggtccagaattcaatttgagaggtactttcattaccgacgacgaaggtaattactccttcgaatgtttaagaccaacctcctatccaattccatacgatggtcctgctggtgatttgttgaaaatcatggatagacatccaaacagaccatcccatattcattggagagtttctcatccaggttaccatactttgatcacccaaatctatgatgctgaatgtccatacaccaacaacgattctgtttacgctgttaaggatgacatcatcgttcacttcgaaaaggttgataacaaggataaggatttggtcggtaaggtcgaatacaagttggattacgatatttccttggccaccgaatcctctattcaagaagctagagctgctgctaaagctagaCAAGATGCTGAAATCAAGTTGTAAACACAGGCtGCGCAATGATTGAATAGTCAAAgattttttttttttaattttttttttttaattttttttttttttcatagaactttttatttaaataaatcacgtctatatatgtatcagtataacgtaaaaaaaaaaacaccgtcagttaaacaaaacataaataaaaaaaaAAAGAAGTGTCAAATCAAGTGTCAAATCatcgcgtgcattc'
Sequences['insert_pCCM007']='caatctggcggcttgagttctcaacatgttttattttttacttatattgctggtagggtaaaaaaatataactcctaggaataggttgtctatatgtttttgtcttgcttctataattgtaacaaacaaggaaagggaaaatactgggtgtaaaagccattgagtcaagttaggtcatcccttttatacaaaatttttcaattttttttccaagattcttgtacgattaattattttttttttgcgtcctacagcgtgatgaaaatttcgcctgctgcaagatgagcgggaacgggcgaaatgtgcacgcgcacaacttacgaaacgcggatgagtcactgacagccaccgcagaggttctgactcctactgagctctattggaggtggcagaaccggtaccggaggaggccgctataaccggtttgaatttattgtcacagtgtcacatcagcattaagtcctcagcgagctcgcatggaatgcgtgcgatgagcgacctcatgctatacctgagaaagcaacctgacctacaggaaagagttactcaagaataagaattttcgttttaaaacctaagagtcactttaaaatttgtatacacttattttttttataacttatttaataataaaaatcataaatcataagaaattcgcttatttagaagtgtcaacaacgtatctaccaacggaatgCGTGCGAtGCTATAAAAAACACGCTTTTTCAgttcgagtttatcattatcaatactgccatttcaaagaatacgtaaataattaatagtagtgattttcctaactttatttagtcaaaaaattagccttttaattctgctgtaacccgtacatgcccaaaatagggggcgggttacacagaatatataacatcgtaggtgtctgggtgaacagtttattcctggcatccactaaatataatggagcccgctttttaagctggcatccagaaaaaaaaagaatcccagcaccaaaatattgttttcttcaccaaccatcagttcataggtccattctcttagcgcaactacagagaacaggggcacaaacaggcaaaaaacgggcacaacctcaatggagtgatgcaacctgcctggagtaaatgatgacacaaggcaattgacccacgcatgtatctatctcattttcttacaccttctattaccttctgctctctctgatttggaaaaagctgaaaaaaaaggttgaaaccagttccctgaaattattcccctacttgactaataagtatataaagacggtaggtattgattgtaattctgtaaatctatttcttaaacttcttaaattctacttttatagttagtcttttttttagttttaaaacaccaagaacttagtttcgaataaacacacataaacaaacaaaagtgcaggtagtgcaggtaaaacaatgaagttgatcatcggtatgactggtgctacaggtgctccattgggtgttgctttgttgcaagctttgagagatatgccagaagttgaaacccatttggttatgtctaaatgggctaagaccaccattgaattggaaactccatggactgctagagaagttgctgctttggctgatttttctcattctccagctgatcaagctgctactatttcttctggttctttcagaactgatggtatgatcgttattccatgctctatgaaaaccttggctggtattagagctggttatgctgaaggtttggttggtagagctgctgatgttgttttgaaagaaggtagaaagttggtcttggtcccaagagaaatgccattgtctactatccatttggaaaacatgttggccttgtctagaatgggtgtagctatggttccaccaatgccagcttattacaatcatccagaaaccgttgatgacatcaccaaccatatagttaccagagttttggaccaattcggtttggattatcacaaagctagaagatggaacggtttgagaactgctgaacaattcgctcaagaaattgaatgaatCGCACGCATTCCGTTGACACAGGCtTCACATATGAAAGTATATACCCGcttttgtacactatgtagctataattcaatcgtattattgtagctccgcacgaccatgccttagaaataTCCGCAGCGCGCatcgcgtgcattc'
Sequences['insert_pCCM008']='caatctggcggcttgagttctcaacatgttttattttttacttatattgctggtagggtaaaaaaatataactcctaggaataggttgtctatatgtttttgtcttgcttctataattgtaacaaacaaggaaagggaaaatactgggtgtaaaagccattgagtcaagttaggtcatcccttttatacaaaatttttcaattttttttccaagattcttgtacgattaattattttttttttgcgtcctacagcgtgatgaaaatttcgcctgctgcaagatgagcgggaacgggcgaaatgtgcacgcgcacaacttacgaaacgcggatgagtcactgacagccaccgcagaggttctgactcctactgagctctattggaggtggcagaaccggtaccggaggaggccgctataaccggtttgaatttattgtcacagtgtcacatcagcattaagtcctcagcgagctcgcatggaatgcgtgcgatgagcgacctcatgctatacctgagaaagcaacctgacctacaggaaagagttactcaagaataagaattttcgttttaaaacctaagagtcactttaaaatttgtatacacttattttttttataacttatttaataataaaaatcataaatcataagaaattcgcttatttagaagtgtcaacaacgtatctaccaacggaatgCGTGCGAtGCACACACCATAGCTTCAAaatgtttctactccttttttactcttccagattttctcggactccgcgcatcgccgtaccacttcaaaacacccaagcacagcatactaaatttcccctctttcttcctctagggtgtcgttaattacccgtactaaaggtttggaaaagaaaaaagagaccgcctcgtttctttttcttcgtcgaaaaaggcaataaaaatttttatcacgtttctttttcttgaaaatttttttttttgatttttttctctttcgatgacctcccattgatatttaagttaataaacggtcttcaatttctcaagtttcagtttcatttttcttgttctattacaactttttttacttcttgctcattagaaagaaagcatagcaatctaatctaagttttaattacaaatctgtcatatctgtcataaaacaatgaccgccccaatccaagatttgagagatgctattgctttgttacaacaacacgacaatcaatacttggaaaccgatcatccagttgatccaaatgctgaattggctggtgtttacagacatattggtgctggtggtactgtaaaaagaccaactagaattggtccagccatgatgttcaacaacattaagggttatccacactccagaatcttggttggtatgcatgcttctagacaaagagcagctttgttgttgggttgtgaagcttctcaattggctttggaagttggtaaagctgttaagaaaccagttgctccagttgttgttccagcttcttctgctccatgtcaagaacaaattttcttggctgatgatccagacttcgatttgagaactttgttgccagctcataccaacactccaattgatgctggtccatttttttgtttgggtttggctttagcttctgatcctgttgatgcttctttgaccgatgttaccattcatagattgtgcgttcaaggtagagatgaattgtctatgtttttggctgccggtagacatatcgaagtttttagacaaaaagctgaagctgctggtaagccattgccaattactattaacatgggtttagatccagccatctacattggtgcttgttttgaagctccaactactccatttggttacaacgaattgggtgttgctggtgctttgagacaaagaccagttgaattggttcaaggtgtttctgttccagaaaaggctattgctagagccgaaatagttatcgaaggtgaattattgccaggtgtcagagttagagaagatcaacatacaaattccggtcatgctatgccagaatttccaggttattgtggtggtgctaatccatctttgccagttattaaggttaaggccgttaccatgagaaacaacgctattttacaaactttggtcggtccaggtgaagaacatacaactttggctggtttgccaaccgaagcttctatttggaatgctgttgaagctgcaattccaggtttcttgcaaaatgtttatgctcatacagctggtggtggtaagttcttgggtatattgcaagtcaagaaaagacaaccagctgacgaaggtagacaaggtcaagctgctttattagctttggctacttactccgaattgaagaatatcatcttggtcgatgaagatgttgatatcttcgattccgatgatattttgtgggctatgactactagaatgcaaggtgatgtttccattactaccattccaggtattagaggtcaccaattagatccatctcaaaccccagaatactccccatcaattagaggtaatggtatctcctgtaagaccattttcgattgcactgttccatgggctttgaagtctcattttgaaagagcaccatttgctgacgttgatcctagaccttttgctccagaatatttcgctagattggaaaagAATCAAGGTTCCGCTAAGTAAACACAGGCtTCGAATTTACGTAGCCCAATCTAccacttttttttttcattttttaaagtgttatacttagttatgctctaggataatgaactacttttttttttttttttttactgttatcataaatatatataccttattgttgtttgcaaccgtcggttaattccttatcaaggttccCCAAGTTCGGATCATTACCATCatcgcgtgcattc'
Sequences['insert_pCCM009']='caatctggcggcttgagttctcaacatgttttattttttacttatattgctggtagggtaaaaaaatataactcctaggaataggttgtctatatgtttttgtcttgcttctataattgtaacaaacaaggaaagggaaaatactgggtgtaaaagccattgagtcaagttaggtcatcccttttatacaaaatttttcaattttttttccaagattcttgtacgattaattattttttttttgcgtcctacagcgtgatgaaaatttcgcctgctgcaagatgagcgggaacgggcgaaatgtgcacgcgcacaacttacgaaacgcggatgagtcactgacagccaccgcagaggttctgactcctactgagctctattggaggtggcagaaccggtaccggaggaggccgctataaccggtttgaatttattgtcacagtgtcacatcagcattaagtcctcagcgagctcgcatggaatgcgtgcgatgagcgacctcatgctatacctgagaaagcaacctgacctacaggaaagagttactcaagaataagaattttcgttttaaaacctaagagtcactttaaaatttgtatacacttattttttttataacttatttaataataaaaatcataaatcataagaaattcgcttatttagaagtgtcaacaacgtatctaccaacggaatgCGTGCGAtAAAGAAACTTAATACGTTATGCCgtaatgaagggctaccaaaaacgataatctcaactgtaaacaggtacaatgcggacccttttgccacaaaacatacatcattcattgccggaaaaagaaagaagtgaagacagcagtgcagccagccatgttgcgccaatctaattatagatgctggtgccctgaggatgtatctggagccagccatggcatcatgcgctaccgccggatgtaaaatccgacacgcaaaagaaaaccttcgaggttgcgcacttcgcccacccatgaaccacacggttagtccaaaaggggcagttcagattccagatgcgggaattagcttgctgccaccctcacctcactaacgctgcggtgtgcggatacttcatgctatttatagacgcgcgtgtcggaatcagcacgcgcaagaaccaaatgggaaaatcggaatgggtccagaactgctttgagtgctggctattggcgtctgatttccgttttgggaatcctttgccgcgcgcccctctcaaaactccgcacaagtcccagaaagcgggaaagaaataaaacgccaccaaaaaaaaaaaaataaaagccaatcctcgaagcgtgggtggtaggccctggattatcccgtacaagtatttctcaggagtaaaaaaaccgtttgttttggaattccccatttcgcggccacctacgccgctatctttgcaacaactatctgcgataactcagcaaattttgcatattcgtgttgcagtattgcgataatgggagtcttacttccaacataacggcagaaagaaatgtgagaaaattttgcatcctttgcctccgttcaagtatataaagtcggcatgcttgataatctttctttccatcctacattgttctaattattcttattctcctttattctttcctaacataccaagaaattaatcttctgTCATTCGCTTAAACACTATATCAATAAGCCCTCTtACAATGATCTGTCCAAGATGCgccgacgaaaaaattgaagttatggctacttctccagttaagggtgtttggactgtttatcaatgtcaacactgcttgtacacttggagagatactgaaccattgagaagaacctctagagaacattaccctgaagctttcagaatgacccaaaaggatattgatgaagctccacaagttcctcatgttccacCATTATTGCCAGAAGATAAGAGATAAACACAGGCtACAGAAGACGGGAGACACtagcacacaactttaccaggcaaggtatttgacgctagcatgtgtccaattcagtgtcatttatgattttttgtagtaggatataaatatatacagcgctccaaatagtgcggttgccccaaaaacaccacggaacctcatctgttctcgtactttgttgtgacaaagtagctcactgccttattatcacattttcattatgcaacgctTCGGAAAATACGATGTTGAAAATatcgcgtgcattc'



In [11]:
Seq=np.empty((0,2), dtype=object)
for i in range(1,10):
    print('insert_pCCM00'+str(i))
    Primers = design_sequencing_primers(Sequences['insert_pCCM00'+str(i)])
    for p,S in Primers.items():
        Seq=np.vstack([Seq,['pCCM00'+str(i),S]])

insert_pCCM001
Has len 1953
... Done
insert_pCCM002
Has len 2976
... Done
insert_pCCM003
Has len 4373
... Done
... Done
insert_pCCM004
Has len 2988
... Done
insert_pCCM005
Has len 2454
... Done
insert_pCCM006
Has len 2830
... Done
insert_pCCM007
Has len 2168
... Done
insert_pCCM008
Has len 2873
... Done
insert_pCCM009
Has len 2206
... Done


In [12]:
#delete repeating primers but keep associated plasmid name
i2del=[]
name=''
for i,S in enumerate(Seq[:,1]):
    
    if (S==Seq[:,1]).sum()>1:
        i2del.append(i)
        name+=Seq[i,0]+ ' '
#change name of first occurance delete all other occurances
Seq[i2del[0],0]=name
Seq = np.delete(Seq, (i2del[1:]), axis=0)
print(str(len(Seq))+ ' Primers were designed')

21 Primers were designed


In [13]:
#copy to clipboard the primer list
#Copy=pd.DataFrame(Seq,  columns=['Template', 'Seq'])
Copy=''
for i in range(len(Seq)):
    Copy+='CCM.S'+str(i+1).zfill(3) + '\t' + Seq[i,1] + '\t' +Seq[i,0] +'\n'
pyperclip.copy(Copy)