In [1]:
def unique_oligos_beta(full, select, bp = 30):
    '''Finds unique oligos of a particular length
    within a particular region of a repetitive elemement'''
    full_list = []
    for i in range(len(full) - bp):
        full_list.append(full[i:i+bp])
    full_set = [y for y in full_list if full_list.count(y) == 1]
    uniques = []
    for i in full_set:
        if i in select:
            uniques.append(i)
    if len(uniques) == 0:
        print('I\'m sorry, there are no unique oligos of length ' 
             + str(bp) + ' in the selection provided')
    else:
        print('The following sequence(s) of length ' + str(bp) + 
             ' are unique to your sequence')
        print(uniques)

In [2]:
unique_oligos_beta('''GCCACCATGGAATGGAATCATCGCACTGAAATCTTCCCAGGAACATCTGCAAGAAT
              AAATCCTAAACCAGGAGATCCCTGTTCAGATCAGCTTCAGGAACAACATGTTGATTC
              ACAGAACAAAAATGACAAGGCCAGCAATGAAGTAAAAAGAAAATCCAAACCCAGGCA
              GAGGATTTCAACAACCTTTCCCAGCACACTCAAAGAACAAATGAGATCTGAGGAAAG
              TAAGAGAACTGTGGAAGAGCTCAGAACAGGCCAGACAACAAATACAGAGGACACAGT
              CAAATCATTTATTGCATCAGAAATCTCAAGTATTGAAAGACAATGTGGGCAATATTT
              CAGTGATAAGTCAAATGTCAATGAGCACCAGAAGACACACACAGGGGAGAAGCCCTA
              TGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAACTCACACCTCATCCAGCACCA
              GAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTAC
              ACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGT
              TTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAG
              GACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACA
              GAAGTCAGTCCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTG
              CAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGTCCTCATCAAGCACCAGAGGACA
              CACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGT
              CAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGA
              GTGTGGGCGGGGCTTTACAGCGAAGTCAAACCTCATCCAGCACCAGAGGACACACACA
              GGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCC
              TCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGG
              GCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGA
              AGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAAACCTCATCAAG
              CACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGTGGGGCT
              TTACACAGAAGTCAG''', '''GTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAA
              GTCAGACCTCATCAAGCACCAGAGGACACAC''')

The following sequence(s) of length 30 are unique to your sequence
['GTCAGACCTCATCAAGCACCAGAGGACACA', 'TCAGACCTCATCAAGCACCAGAGGACACAC']


In [33]:
def unique_oligos(full, select, lowbp = 15, highbp = 30, lowtm = 55, hightm = 65):
    '''Finds unique oligos of a particular length and melting temp
    within a particular region of a repetitive elemement'''
    # Generate complete list of oligos of the correct length
    full_list = []
    for n in range(lowbp, (highbp+1)):
        for i in range(len(full) + 1 - n):
            full_list.append(full[i:i+n])
    print("Size and first ten entries of full list")
    print(len(full_list))
    print(full_list[0:10])
    
    # Generate list of oligos that only occur once in full_list
    full_set = [y for y in full_list if full_list.count(y) == 1]
    if len(full_set) < 1:
        raise ValueError('Unfortunately, there are no unique oligos between' + str(lowbp)
             + ' and ' + str(highbp) + ' bp in the full sequence')
    print("Size and first ten entries of full set")
    print(len(full_set))
    print(full_set[0:10])
    
    # Account for given Tm values
    from Bio.SeqUtils import MeltingTemp as mt
    from Bio.Seq import Seq
    
    # The adjustments here match Bioline's Taq master mix as far 
    # as the documentation says. The salt corr was changed to bring the values 
    # into line with Thermo and NEB's estimations
    temp_adjust = [y for y in full_set if lowtm < mt.Tm_NN(y, Na=50, K = 50, Tris=10, Mg=3, 
                                                           dNTPs=1, saltcorr = 4.5) < hightm]
    if len(temp_adjust) < 1:
        raise ValueError('Unfortunately there are no unique oligos with Tm values between '
              + str(lowtm) + ' and ' + str(hightm) + ' degrees Celsius in the sequence')
    print("Number of oligos with correct parameters in full sequence")
    print(len(temp_adjust))
    
    # Find oligos with the correct Tm that appear in the target
    uniques = [i for i in temp_adjust if i in select]
                             
    # Print results
    if len(uniques) < 1:
        print('''Unfortunately, there are no oligos unique to your selection that fit the provided parameters''')
    else:
        import pandas as pd
        global pd_uniques
        pd_uniques = pd.DataFrame({'Unique Oligos': uniques})
        print('''The following sequences are unique to your sequence and fit the
          parameters provided. The search yielded ''' + str(len(uniques)) + ''' oligos.''')
        print(pd_uniques)

In [34]:
# This is a control to see if the program will detect the region already modified
# with stop codons

unique_oligos('GCCACCATGGAATGGAATCATCGCACTGAAATCTTCCCAGGAACATCTGCAAGAATAAATCCTAAACCAGGAGATCCCTGTTCAGATCAGCTTCAGGAACAACATGTTGATTCACAGAACAAAAATGACAAGGCCAGCAATGAAGTAAAAAGAAAATCCAAACCCAGGCAGAGGATTTCAACAACCTTTCCCAGCACACTCAAAGAACAAATGAGATCTGAGGAAAGTAAGAGAACTGTGGAAGAGCTCAGAACAGGCCAGACAACAAATACAGAGGACACAGTCAAATCATTTATTGCATCAGAAATCTCAAGTATTGAAAGACAATGTGGGCAATATTTCAGTGATAAGTCAAATGTCAATGAGCACCAGAAGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAACTCACACCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGAAGGGAGTGAGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGTCCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGTCCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAAACCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAAACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGTGGGGCTTTACACAGAAGTCAGACCTCATCCAGCACCAGAGGACACATACAAGAGAGAAGTAATATATTTTCGAAAAGAATGAGAAAGCCAACAGCAATAAAACCACATCTCAACAATTACAGGAAGACAAATGTAGTCACTAAACATCTGTTCTGCTAAAACTTCTAAGGAGTCTACTGATTTTTAAAACTAGAATATAAAATGACTAGAAAAGGGAATTAAATCCCCTTCTTTTTCTTTTCTTTTTCGGTTTTTAAAGACAGATTTCTCTGTGCAGTCTGGTTGTCCTAGAACTGTTTCTGTAGACCAGGTTGGCCTCAAAATCAGAGTTGCTAGCTTCTGCCTCCCCAATACTAGGAGTAAAGCCCCATTGCAAATTCTC',
             'GACACACACAGGGGAGAAGCCCTATGTTTGAAGGGAGTGAGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGC',
              lowbp = 25, highbp = 45, lowtm = 75, hightm = 95)

Size and first ten entries of full list
33726
['GCCACCATGGAATGGAATCATCGCA', 'CCACCATGGAATGGAATCATCGCAC', 'CACCATGGAATGGAATCATCGCACT', 'ACCATGGAATGGAATCATCGCACTG', 'CCATGGAATGGAATCATCGCACTGA', 'CATGGAATGGAATCATCGCACTGAA', 'ATGGAATGGAATCATCGCACTGAAA', 'TGGAATGGAATCATCGCACTGAAAT', 'GGAATGGAATCATCGCACTGAAATC', 'GAATGGAATCATCGCACTGAAATCT']
Size and first ten entries of full set
19284
['GCCACCATGGAATGGAATCATCGCA', 'CCACCATGGAATGGAATCATCGCAC', 'CACCATGGAATGGAATCATCGCACT', 'ACCATGGAATGGAATCATCGCACTG', 'CCATGGAATGGAATCATCGCACTGA', 'CATGGAATGGAATCATCGCACTGAA', 'ATGGAATGGAATCATCGCACTGAAA', 'TGGAATGGAATCATCGCACTGAAAT', 'GGAATGGAATCATCGCACTGAAATC', 'GAATGGAATCATCGCACTGAAATCT']
Number of oligos with correct parameters in full sequence
12853
The following sequences are unique to your sequence and fit the
          parameters provided. The search yielded 788 oligos.
                                     Unique Oligos
0                        TATGTTTGAAGGGAGTGAGGGCGGG
1                        ATGTTTGAAG

In [35]:
unique_oligos('GCCACCATGGAATGGAATCATCGCACTGAAATCTTCCCAGGAACATCTGCAAGAATAAATCCTAAACCAGGAGATCCCTGTTCAGATCAGCTTCAGGAACAACATGTTGATTCACAGAACAAAAATGACAAGGCCAGCAATGAAGTAAAAAGAAAATCCAAACCCAGGCAGAGGATTTCAACAACCTTTCCCAGCACACTCAAAGAACAAATGAGATCTGAGGAAAGTAAGAGAACTGTGGAAGAGCTCAGAACAGGCCAGACAACAAATACAGAGGACACAGTCAAATCATTTATTGCATCAGAAATCTCAAGTATTGAAAGACAATGTGGGCAATATTTCAGTGATAAGTCAAATGTCAATGAGCACCAGAAGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAACTCACACCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGAAGGGAGTGAGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGTCCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGTCCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAAACCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAAACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGTGGGGCTTTACACAGAAGTCAGACCTCATCCAGCACCAGAGGACACATACAAGAGAGAAGTAATATATTTTCGAAAAGAATGAGAAAGCCAACAGCAATAAAACCACATCTCAACAATTACAGGAAGACAAATGTAGTCACTAAACATCTGTTCTGCTAAAACTTCTAAGGAGTCTACTGATTTTTAAAACTAGAATATAAAATGACTAGAAAAGGGAATTAAATCCCCTTCTTTTTCTTTTCTTTTTCGGTTTTTAAAGACAGATTTCTCTGTGCAGTCTGGTTGTCCTAGAACTGTTTCTGTAGACCAGGTTGGCCTCAAAATCAGAGTTGCTAGCTTCTGCCTCCCCAATACTAGGAGTAAAGCCCCATTGCAAATTCTC',
             'GACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCA',
              lowbp = 25, highbp = 45, lowtm = 75, hightm = 95)

Size and first ten entries of full list
33726
['GCCACCATGGAATGGAATCATCGCA', 'CCACCATGGAATGGAATCATCGCAC', 'CACCATGGAATGGAATCATCGCACT', 'ACCATGGAATGGAATCATCGCACTG', 'CCATGGAATGGAATCATCGCACTGA', 'CATGGAATGGAATCATCGCACTGAA', 'ATGGAATGGAATCATCGCACTGAAA', 'TGGAATGGAATCATCGCACTGAAAT', 'GGAATGGAATCATCGCACTGAAATC', 'GAATGGAATCATCGCACTGAAATCT']
Size and first ten entries of full set
19284
['GCCACCATGGAATGGAATCATCGCA', 'CCACCATGGAATGGAATCATCGCAC', 'CACCATGGAATGGAATCATCGCACT', 'ACCATGGAATGGAATCATCGCACTG', 'CCATGGAATGGAATCATCGCACTGA', 'CATGGAATGGAATCATCGCACTGAA', 'ATGGAATGGAATCATCGCACTGAAA', 'TGGAATGGAATCATCGCACTGAAAT', 'GGAATGGAATCATCGCACTGAAATC', 'GAATGGAATCATCGCACTGAAATCT']
Number of oligos with correct parameters in full sequence
12853
The following sequences are unique to your sequence and fit the
          parameters provided. The search yielded 190 oligos.
                                     Unique Oligos
0                      TGGGCGGGGCTTTACACAGAAGTCAGA
1                     GTGGGCGGGGCTT

In [14]:
def rep_pcr(full, select, lowproduct = None, highproduct = None, 
            lowtm = 55, hightm = 65, idealtm = 60, lowbp = 15, highbp = 30):
    '''A PCR primer finding program optimized for repetitive regions'''
    # Set some defaults for lowproduct and highproduct if the user
    # doesn't provide any
    if lowproduct = None:
        lowproduct = len(select)
    if highproduct = None:
        highproduct = len(select) + 50
    else:
        break
        
    # Set the two regions of the full sequence that will be searched for oligos
    # This code also trims the full sequence to account for the low and high
    # product variable, which speeds up the program during exclusion steps
    left = full[:(str.find(full, select) - highproduct  - len(select))]
    right = full[(str.find(full, select) + len(select)):(-1 * (highproduct - len(select)))]
          
    # Make lists of all the oligos
    leftls = [left[i:i+n] for n in range(lowbp,(highbp + 1)) for i in 
              range(len(left) + 1 - n)]
    rightls = [right[i:i+n] for n in range(lowbp,(highbp + 1)) for i in 
               range(len(right) + 1 - n)]
    print('Total number of oligos analyzed: ' + str(len(leftls) + len(rightls))
        
    # Exclude anything in the selection provided
    leftls = [i for i in leftls if i not in select]
    rightls = [i for i in rightls if i not in select]
    print('Oligos with replicates in amplicon excluded')
    print(str(len(rightls) + len(leftls)) + 'remain')
          
    # Exclude all duplicates
    leftls = [i for i in leftls if leftls.count(i) == 1]
    rightls = [i for i in rightls if rightls.count(i) == 1]
    print('Oligos with replicates outside amplicon excluded')
    print(str(len(rightls) + len(leftls)) + 'remain')
    
    # Exclude duplicates between the two sets
    leftls = [i for i in leftls if rightls.count(i) == 0]
    rightls = [i for i in rightls if leftls.count(i) == 0]
    print('Oligos on both 5 and 3 prime ends excluded')
    print(str(len(rightls) + len(leftls)) + 'remain')
          
    # Exclude for melting temperature
    from Bio.SeqUtils import MeltingTemp as mt
    from Bio.Seq import Seq
    leftls = [i for i in leftls if lowtm <= mt.Tm_NN(i, Na=50, K = 50, 
                    Tris=10, Mg=3, dNTPs=1, saltcorr = 4.5) <= hightm]
    rightls = [i for i in rightls if lowtm <= mt.Tm_NN(i, Na=50, K = 50, 
                    Tris=10, Mg=3, dNTPs=1, saltcorr = 4.5) <= hightm]
    print('Oligos of incorrect melting temperature excluded')
    print(str(len(rightls) + len(leftls)) + 'remain')
          
    # Make pairs of oligos with acceptible melting temperatures
    pairls = []
    for i in leftls:
        for j in rightls:
            if abs(mt.Tm_NN(i, Na=50, K = 50, Tris=10, Mg=3, dNTPs=1, saltcorr = 4.5) 
                   - mt.Tm_NN(j, Na=50, K = 50, Tris=10, Mg=3, dNTPs=1, saltcorr = 4.5)) <= 5:
                pairls.append([i,j])
    if len(pairls) < 1:
          raise ValueError('Unfortunately, no primer pairs match the parameters')
    
    # Sort for pairs that produce acceptible sizes. The trimmed
    # sequence allows for products slightly larger than the desired length
    # to allow for better coverage. This step cuts out primers near the
    # edges of the trimmed sequence
    pairls = [i for i in pairls if lowbp <= (str.find(full,i[1]) + len(i[1]) - 
                                             str.find(a,i[0])) <= highbp]
    if len(pairls) < 1:
          raise ValueError('Unfortunately, no primer pairs match the parameters')
          
    
    left_primers = [Seq(i[0]) for i in pairls]
    right_primers = [Seq(i[1]).reverse_complement() for i in pairls]
    left_tm = [mt.Tm_NN(i, Na=50, K = 50, Tris=10, Mg=3, dNTPs=1, saltcorr = 4.5)
              for i in left_primers]
    right_tm = [mt.Tm_NN(i, Na=50, K = 50, Tris=10, Mg=3, dNTPs=1, saltcorr = 4.5)
              for i in right_primers]
    import pandas as pd
    primers = pd.DataFrame({'5\' Oligo': left_primers, '5\' Tm':right_primers,
                           '3\'Oligo': right_primers, '3\' Tm': right_tm})
    primers['Deg from Ideal'] = pd.Series(abs((primers['5\' Tm'] + primers['3\' Tm'])/2 - 60))
    primers.sort_values('Deg from Ideal')
    print('The search returned ' + str(len(primers['Deg from Ideal'])) + ' primer pairs')
    return primers

14
1
13
12


In [47]:
a = 'abcdefg'
b = [['abc','efg'], ['bcd','efg']]
c = [i for i in b if (str.find(a,i[1]) + len(i[1]) - str.find(a,i[0])) <= 7]
print(c)
print('Done')

[['abc', 'efg'], ['bcd', 'efg']]
Done


In [46]:
print(str.find(a,b[1][0]))
print(str.find(a,b[1][1]))

1
4
