In [1]:
def unique_oligos_beta(full, select, bp = 30):
    '''Finds unique oligos of a particular length
    within a particular region of a repetitive elemement'''
    full_list = []
    for i in range(len(full) - bp):
        full_list.append(full[i:i+bp])
    full_set = [y for y in full_list if full_list.count(y) == 1]
    uniques = []
    for i in full_set:
        if i in select:
            uniques.append(i)
    if len(uniques) == 0:
        print('I\'m sorry, there are no unique oligos of length ' 
             + str(bp) + ' in the selection provided')
    else:
        print('The following sequence(s) of length ' + str(bp) + 
             ' are unique to your sequence')
        print(uniques)

In [2]:
unique_oligos_beta('''GCCACCATGGAATGGAATCATCGCACTGAAATCTTCCCAGGAACATCTGCAAGAAT
              AAATCCTAAACCAGGAGATCCCTGTTCAGATCAGCTTCAGGAACAACATGTTGATTC
              ACAGAACAAAAATGACAAGGCCAGCAATGAAGTAAAAAGAAAATCCAAACCCAGGCA
              GAGGATTTCAACAACCTTTCCCAGCACACTCAAAGAACAAATGAGATCTGAGGAAAG
              TAAGAGAACTGTGGAAGAGCTCAGAACAGGCCAGACAACAAATACAGAGGACACAGT
              CAAATCATTTATTGCATCAGAAATCTCAAGTATTGAAAGACAATGTGGGCAATATTT
              CAGTGATAAGTCAAATGTCAATGAGCACCAGAAGACACACACAGGGGAGAAGCCCTA
              TGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAACTCACACCTCATCCAGCACCA
              GAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTAC
              ACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGT
              TTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAG
              GACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACA
              GAAGTCAGTCCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTG
              CAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGTCCTCATCAAGCACCAGAGGACA
              CACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGT
              CAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGA
              GTGTGGGCGGGGCTTTACAGCGAAGTCAAACCTCATCCAGCACCAGAGGACACACACA
              GGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCC
              TCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGG
              GCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGA
              AGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAAACCTCATCAAG
              CACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGTGGGGCT
              TTACACAGAAGTCAG''', '''GTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAA
              GTCAGACCTCATCAAGCACCAGAGGACACAC''')

The following sequence(s) of length 30 are unique to your sequence
['GTCAGACCTCATCAAGCACCAGAGGACACA', 'TCAGACCTCATCAAGCACCAGAGGACACAC']


In [1]:
def unique_oligos(full, select, lowbp = 15, highbp = 30, lowtm = 55, hightm = 65):
    '''Finds unique oligos of a particular length and melting temp
    within a particular region of a repetitive elemement'''
    #Generate complete list of oligos of the correct length
    full_list = []
    for n in range(lowbp, highbp):
        for i in range(len(full) - n):
            full_list.append(full[i:i+n])
    print("Size and first ten entries of full list")
    print(len(full_list))
    print(full_list[0:10])
    
    #Generate list of oligos that only occur once in full_list
    full_set = [y for y in full_list if full_list.count(y) == 1]
    if len(full_set) < 1:
        raise ValueError('Unfortunately, there are no unique oligos between' + str(lowbp)
             + ' and ' + str(highbp) + ' bp in the full sequence')
    print("Size and first ten entries of full set")
    print(len(full_set))
    print(full_set[0:10])
    
    #Account for given Tm values
    from Bio.SeqUtils import MeltingTemp as mt
    from Bio.Seq import Seq
    temp_adjust = [y for y in full_set if lowtm < mt.Tm_NN(y, Na=50, K = 50, Tris=10, Mg=3, 
                                                           dNTPs=1, saltcorr = 4.5) < hightm]
    if len(temp_adjust) < 1:
        raise ValueError('Unfortunately there are no unique oligos with Tm values between '
              + str(lowtm) + ' and ' + str(hightm) + ' degrees Celsius in the sequence')
    print("Number of oligos with correct parameters in full sequence")
    print(len(temp_adjust))
    
    #Find oligos with the correct Tm that appear in the target
    uniques = [i for i in temp_adjust if i in select]
                             
    #Print results
    if len(uniques) < 1:
        print('''Unfortunately, there are no oligos unique to your selection that fit the provided parameters''')
    else:
        import pandas as pd
        global pd_uniques
        pd_uniques = pd.DataFrame({'Unique Oligos': uniques})
        print('''The following sequences are unique to your sequence and fit the
          parameters provided. The search yielded ''' + str(len(uniques)) + ''' oligos''')
        print(pd_uniques)

In [4]:
# This is a control to see if the program will detect the region already modified
# with stop codons

unique_oligos('GCCACCATGGAATGGAATCATCGCACTGAAATCTTCCCAGGAACATCTGCAAGAATAAATCCTAAACCAGGAGATCCCTGTTCAGATCAGCTTCAGGAACAACATGTTGATTCACAGAACAAAAATGACAAGGCCAGCAATGAAGTAAAAAGAAAATCCAAACCCAGGCAGAGGATTTCAACAACCTTTCCCAGCACACTCAAAGAACAAATGAGATCTGAGGAAAGTAAGAGAACTGTGGAAGAGCTCAGAACAGGCCAGACAACAAATACAGAGGACACAGTCAAATCATTTATTGCATCAGAAATCTCAAGTATTGAAAGACAATGTGGGCAATATTTCAGTGATAAGTCAAATGTCAATGAGCACCAGAAGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAACTCACACCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGAAGGGAGTGAGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGTCCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGTCCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAAACCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAAACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGTGGGGCTTTACACAGAAGTCAGACCTCATCCAGCACCAGAGGACACATACAAGAGAGAAGTAATATATTTTCGAAAAGAATGAGAAAGCCAACAGCAATAAAACCACATCTCAACAATTACAGGAAGACAAATGTAGTCACTAAACATCTGTTCTGCTAAAACTTCTAAGGAGTCTACTGATTTTTAAAACTAGAATATAAAATGACTAGAAAAGGGAATTAAATCCCCTTCTTTTTCTTTTCTTTTTCGGTTTTTAAAGACAGATTTCTCTGTGCAGTCTGGTTGTCCTAGAACTGTTTCTGTAGACCAGGTTGGCCTCAAAATCAGAGTTGCTAGCTTCTGCCTCCCCAATACTAGGAGTAAAGCCCCATTGCAAATTCTC',
             'GACACACACAGGGGAGAAGCCCTATGTTTGAAGGGAGTGAGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGC',
              lowbp = 25, highbp = 45, lowtm = 75, hightm = 95)

Size and first ten entries of full list
32110
['GCCACCATGGAATGGAATCATCGCA', 'CCACCATGGAATGGAATCATCGCAC', 'CACCATGGAATGGAATCATCGCACT', 'ACCATGGAATGGAATCATCGCACTG', 'CCATGGAATGGAATCATCGCACTGA', 'CATGGAATGGAATCATCGCACTGAA', 'ATGGAATGGAATCATCGCACTGAAA', 'TGGAATGGAATCATCGCACTGAAAT', 'GGAATGGAATCATCGCACTGAAATC', 'GAATGGAATCATCGCACTGAAATCT']
Size and first ten entries of full set
18295
['GCCACCATGGAATGGAATCATCGCA', 'CCACCATGGAATGGAATCATCGCAC', 'CACCATGGAATGGAATCATCGCACT', 'ACCATGGAATGGAATCATCGCACTG', 'CCATGGAATGGAATCATCGCACTGA', 'CATGGAATGGAATCATCGCACTGAA', 'ATGGAATGGAATCATCGCACTGAAA', 'TGGAATGGAATCATCGCACTGAAAT', 'GGAATGGAATCATCGCACTGAAATC', 'GAATGGAATCATCGCACTGAAATCT']
Number of oligos with correct parameters in full sequence
11908
The following sequences are unique to your sequence and fit the
          parameters provided. The search yielded 748 oligos
                                    Unique Oligos
0                       TATGTTTGAAGGGAGTGAGGGCGGG
1                       ATGTTTGAAGGGAG

In [5]:
unique_oligos('GCCACCATGGAATGGAATCATCGCACTGAAATCTTCCCAGGAACATCTGCAAGAATAAATCCTAAACCAGGAGATCCCTGTTCAGATCAGCTTCAGGAACAACATGTTGATTCACAGAACAAAAATGACAAGGCCAGCAATGAAGTAAAAAGAAAATCCAAACCCAGGCAGAGGATTTCAACAACCTTTCCCAGCACACTCAAAGAACAAATGAGATCTGAGGAAAGTAAGAGAACTGTGGAAGAGCTCAGAACAGGCCAGACAACAAATACAGAGGACACAGTCAAATCATTTATTGCATCAGAAATCTCAAGTATTGAAAGACAATGTGGGCAATATTTCAGTGATAAGTCAAATGTCAATGAGCACCAGAAGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAACTCACACCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGAAGGGAGTGAGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGTCCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGTCCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAAACCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACAGCGAAGTCAGTCCTCATCCAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAAACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGTGGGGCTTTACACAGAAGTCAGACCTCATCCAGCACCAGAGGACACATACAAGAGAGAAGTAATATATTTTCGAAAAGAATGAGAAAGCCAACAGCAATAAAACCACATCTCAACAATTACAGGAAGACAAATGTAGTCACTAAACATCTGTTCTGCTAAAACTTCTAAGGAGTCTACTGATTTTTAAAACTAGAATATAAAATGACTAGAAAAGGGAATTAAATCCCCTTCTTTTTCTTTTCTTTTTCGGTTTTTAAAGACAGATTTCTCTGTGCAGTCTGGTTGTCCTAGAACTGTTTCTGTAGACCAGGTTGGCCTCAAAATCAGAGTTGCTAGCTTCTGCCTCCCCAATACTAGGAGTAAAGCCCCATTGCAAATTCTC',
             'GACACACACAGGGGAGAAGCCCTATGTTTGCAGGGAGTGTGGGCGGGGCTTTACACAGAAGTCAGACCTCATCAAGCACCAGAGGACACACACAGGGGAGAAGCCCTATGTTTGCA',
              lowbp = 25, highbp = 45, lowtm = 75, hightm = 95)

Size and first ten entries of full list
32110
['GCCACCATGGAATGGAATCATCGCA', 'CCACCATGGAATGGAATCATCGCAC', 'CACCATGGAATGGAATCATCGCACT', 'ACCATGGAATGGAATCATCGCACTG', 'CCATGGAATGGAATCATCGCACTGA', 'CATGGAATGGAATCATCGCACTGAA', 'ATGGAATGGAATCATCGCACTGAAA', 'TGGAATGGAATCATCGCACTGAAAT', 'GGAATGGAATCATCGCACTGAAATC', 'GAATGGAATCATCGCACTGAAATCT']
Size and first ten entries of full set
18295
['GCCACCATGGAATGGAATCATCGCA', 'CCACCATGGAATGGAATCATCGCAC', 'CACCATGGAATGGAATCATCGCACT', 'ACCATGGAATGGAATCATCGCACTG', 'CCATGGAATGGAATCATCGCACTGA', 'CATGGAATGGAATCATCGCACTGAA', 'ATGGAATGGAATCATCGCACTGAAA', 'TGGAATGGAATCATCGCACTGAAAT', 'GGAATGGAATCATCGCACTGAAATC', 'GAATGGAATCATCGCACTGAAATCT']
Number of oligos with correct parameters in full sequence
11908
The following sequences are unique to your sequence and fit the
          parameters provided. The search yielded 171 oligos
                                    Unique Oligos
0                     TGGGCGGGGCTTTACACAGAAGTCAGA
1                    GTGGGCGGGGCTTTACA