In [65]:
# generate all possible 8-mer DNA sequences without homopolymers

from itertools import product
from primer3 import calcHomodimer, calcHeterodimer, calcTm
import numpy as np 

In [66]:
alphabet = 'ATCG'

In [67]:
def has_homopolymers( sequence ):
    for i in range( len( sequence ) - 1 ):
        if sequence[ i ] == sequence[ i + 1 ]:
            return True 
    return False

In [68]:
barcodes = []
for combo in product( alphabet, repeat=8 ):
    if not has_homopolymers( combo ):
        barcodes.append( ''.join( combo ) )

In [69]:
# primer sequences for our test run are 
# T7 (forward): 5'-TAATACGACTCACTATAGGG-3'
# T7term (reverse): 5'-GCTAGTTATTGCTCAGCGG-3'
# we will add barcodes to the 5' end of both primers 
# assume the Tm of the T7 primers is good as is

In [70]:
# calculate the propensity of the primers to dimerize 

In [71]:
fwd_primers = [ b + 'TAATACGACTCACTATAGGG' for b in barcodes ]
rev_primers = [ b + 'GCTAGTTATTGCTCAGCGG' for b in barcodes ]

In [72]:
len( fwd_primers )

8748

In [73]:
# exclude homodimers 
# fwd_primers = list( filter( lambda x: calcHomodimer( x ).structure_found, fwd_primers ) ) 
# rev_primers = list( filter( lambda x: calcHomodimer( x ).structure_found, rev_primers ) ) 
# print( len( fwd_primers) )
# print( len( rev_primers) )
# I guess there are none 

In [74]:
# OK, we'll pick 10 random ones
for p1 in np.random.choice( fwd_primers, 20 ):
    print( p1, calcTm( p1 ) )

CAGTGCTCTAATACGACTCACTATAGGG 56.1394819706731
TGACTCACTAATACGACTCACTATAGGG 55.12304248723251
GTACACTATAATACGACTCACTATAGGG 52.26747484354979
CTGTCTGCTAATACGACTCACTATAGGG 56.1394819706731
CTATACACTAATACGACTCACTATAGGG 52.04750495254456
CTGAGCTGTAATACGACTCACTATAGGG 56.1394819706731
CTATCTCATAATACGACTCACTATAGGG 52.082236209162545
CACTCATATAATACGACTCACTATAGGG 52.511468298241994
GAGCTGCATAATACGACTCACTATAGGG 56.65092191994347
ACTCGATGTAATACGACTCACTATAGGG 55.45717734260154
CATGTACTTAATACGACTCACTATAGGG 52.888752428114344
AGACGTCATAATACGACTCACTATAGGG 55.45717734260154
GCGTGAGCTAATACGACTCACTATAGGG 58.37847663311709
TGTGACATTAATACGACTCACTATAGGG 54.26617062685153
ACGCGCTCTAATACGACTCACTATAGGG 59.42157635248509
ACGATATGTAATACGACTCACTATAGGG 53.76986052227312
CACTCTGATAATACGACTCACTATAGGG 54.19908919416531
ACGCGATCTAATACGACTCACTATAGGG 57.51109067387051
GCGCGCTCTAATACGACTCACTATAGGG 60.368735485033255
TCTAGTGCTAATACGACTCACTATAGGG 54.965283534302614


In [75]:
# OK, we'll pick 10 random ones
for p2 in np.random.choice( rev_primers, 20 ):
    print( p2, calcTm( p2 ) )

GCTAGAGTGCTAGTTATTGCTCAGCGG 59.34929611305705
TGAGACAGGCTAGTTATTGCTCAGCGG 60.32844922553966
GTGTGCTCGCTAGTTATTGCTCAGCGG 61.94775908524821
GATACATCGCTAGTTATTGCTCAGCGG 57.91901952521704
CTACACGAGCTAGTTATTGCTCAGCGG 59.35466609610569
AGCTCGCGGCTAGTTATTGCTCAGCGG 64.61655885757227
CACAGACAGCTAGTTATTGCTCAGCGG 59.710113972490376
GCTGCTCTGCTAGTTATTGCTCAGCGG 61.75827567390911
TGTGAGATGCTAGTTATTGCTCAGCGG 59.02895674750073
TCATGCTCGCTAGTTATTGCTCAGCGG 61.0980801077701
TGCTGCGAGCTAGTTATTGCTCAGCGG 63.040402296072784
ATGTATAGGCTAGTTATTGCTCAGCGG 56.652792706647006
CGTGAGCAGCTAGTTATTGCTCAGCGG 61.74818792158243
CTGTGATGGCTAGTTATTGCTCAGCGG 59.759433403576566
GCGATGTAGCTAGTTATTGCTCAGCGG 59.86634175470101
TGTCACGAGCTAGTTATTGCTCAGCGG 60.78944845485432
GAGCTATAGCTAGTTATTGCTCAGCGG 57.225119974727136
ATGTCTGAGCTAGTTATTGCTCAGCGG 58.81749818133778
CTAGCGTCGCTAGTTATTGCTCAGCGG 61.386657036378836
AGAGTGTCGCTAGTTATTGCTCAGCGG 60.584747075965765
