# Useful functions and tools
### a codon table dictionary with codons as keys and amino acids as values
### a function that makes a complementary strand of DNA
### a function that flips a strand of DNA
### a function that translates DNA into a sequence of amino acids
### a function that finds instances of one dict of DNA in another
### a function that checks for aa sequences in a protein

In [233]:
### a codon table dictionary with codons as keys and amino acids as values
codontable = {
    'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
    'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
    'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
    'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
    'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
    'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
    'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
    'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
    'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
    'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
    'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
    'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
    'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
    'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
    'TAC':'Y', 'TAT':'Y', 'TAA':'*', 'TAG':'*',
    'TGC':'C', 'TGT':'C', 'TGA':'*', 'TGG':'W',
    }

### a function that makes a complementary strand of DNA
def complementary(DNA):
    
    DNA_comp = ""
    
    # makes the complementary sequence
    for i in range(len(DNA)):
        if DNA[i].lower() == "a":
            DNA_comp += "t"
        if DNA[i].lower() == "t":
            DNA_comp += "a"
        if DNA[i].lower() == "c":
            DNA_comp += "g"
        if DNA[i].lower() == "g":
            DNA_comp += "c"
    
    # returns the complementary sequence
    return DNA_comp.upper()

### a function that flips a strand of DNA
def flip(DNA):

    DNA_flipped = ""
    
    # makes the flipped sequence
    for i in range(len(DNA)):
        DNA_flipped += DNA[-(i + 1)]
    
    # returns the flipped sequence in a DNA dictionary construct
    return DNA_flipped

### a function that translates DNA into a sequence of amino acids
# must begin at the beginning of the coding sequence
def translate(DNA):
    
    protein = ""
    
    while len(DNA) >= 3:
        codon = DNA[0:3]
        protein += codontable[codon.upper()]
        DNA = DNA[3:]
    
    return(protein)

### a function that finds instances of one dict of DNA in another
# first entry is dict of DNA fragments (to be searched)
# second entry is dict of DNA to be searched
## returned is a list of lists
# first output is a list of frags found in DNA
# second output is a list of DNA with frags found in them
# third output is a list of indices that tell us where frags were found in DNA
def fraginDNA(frag, DNA):
    frag_list = []
    DNA_list = []
    index = []
    for DNAs in DNA:
        for frags in frag:
            if frag[frags].lower() in DNA[DNAs].lower():
                frag_list.append(frag[frags])
                DNA_list.append(DNA[DNAs])
                index.append(DNA[DNAs].lower().find(frag[frags].lower()))
    return([frag_list, DNA_list, index])

### a function that checks for aa sequences in a protein
# first entry is dictionary of amino acid sequences
# first is a dictionary of protein (aa sequences)
## returned is a list of lists
# first output is a list of aa seqs found in protein
# second output is a list of proteins with aa seqs found in them
# third is a list of indices that tells us where frags were found in DNA
def aainprot(aa, prot):
    aa_list = []
    prot_list = []
    index = []
    for prots in prot:
        for aas in aa:
            if aa[aas].upper() in prot[prots].upper():
                aa_list.append(aa[aas])
                prot_list.append(prot[prots])
                index.append(prot[prots].upper().find(aa[aas].upper()))
    return([aa_list, prot_list, index])

# Our illegal sites
### Restriction sites from MoClo restriction enzymes - BBF RFC 94
### Restriction sites from BBF RFC 10
### Restriction sites from BBF RFC 25
### Dictionary of all of our restriction sites
### Enterokinase protease site

In [234]:
# all of the sequences in this document should be considered as 5' to 3' in direction

### Restriction sites from MoClo restriction enzymes - BBF RFC 94
bbsi_recognition_sequence   = 'gaagac'
bsai_recognition_sequence   = 'ggtctc'

### Restriction sites from BBF RFC 10
ecori_recognition_sequence  = 'gaattc'
psti_recognition_sequence   = 'ctgcag'
noti_recognition_sequence   = 'gcggccgc'
xbai_recognition_sequence   = 'tctaga'
spei_recognition_sequence   = 'actagt'

### Restriction sites from BBF RFC 25
ngomiv_recognition_sequence = 'gccgcc'
agei_recognition_sequence   = 'accggt'

### Dictionary of all of our restriction sites
restriction_dict = {'bbsi': bbsi_recognition_sequence, 'bsai': bsai_recognition_sequence, 
                          'ecori': ecori_recognition_sequence, 'psti': psti_recognition_sequence, 
                          'noti': noti_recognition_sequence,
                          'xbai': xbai_recognition_sequence, 'spei': spei_recognition_sequence, 
                          'ngomiv': ngomiv_recognition_sequence, 'agei': agei_recognition_sequence}

### Enterokinase protease site
enterokinase_cleavage_sequence_aa = "DDDDK"

proteolytic_dict = {'enterokinase_cleavage_sequence_aa': enterokinase_cleavage_sequence_aa}

# Our Parts List
### Sec + n22 tags
### Linkers
### Biopolymer
### Payload
### Tag

In [235]:
### Sec + n22 tags
input0 = "ATG AAA TTA TTG AAA GTT GCT GCC ATT GCT GCG ATT GTT TTC AGT GGC TCT GCC CTG GCT GGG GTT GTC CCG CAA TAC GGT GGT GGA GGC AAT CAC GGG GGT GGC GGA AAT AAT TCA GGA CCT AAC"
sec_n22 = input0.replace(" ", "")
frontag_dict = {'sec_n22': sec_n22}

### Linkers
# long rigid linker 
input1 = "GCAGAGGCAGCGGCAAAGGAAGCGGCTGCAAAAGAGGCCGCAGCGAAAGAAGCAGCCGCGAAGGCTCTTGAAGCGGAAGCGGCAGCCAAAGAAGCAGCGGCTAAGGAGGCAGCCGCAAAAGAAGCAGCAGCCAAGGCG"
rigid_linker_long = input1.replace(' ', '')

# short rigid linker - DEFAULT
rigid_linker_short = 'ggaagc'.upper()

# long flexible linker
input2 = 'GGTGGGGGGGGCTCTGGCGGTGGGGGTAGTGGCGGAGGTGGTAGT'
flexible_linker_long = input2.replace(' ', '')

# short flexible linker
input3 = 'GGGGGTGGTGGCGGGGGAGGCGGA'
flexible_linker_short = input3.replace(' ', '')

linker_dict = {'rigid_linker_long': rigid_linker_long,
              'rigid_linker_short': rigid_linker_short, 'flexible_linker_long': flexible_linker_long,
              'flexible_linker_short': flexible_linker_short}

### Biopolymer
csgA = "ATGAAACTTTTAAAAGTAGCAGCAATTGCAGCAATCGTATTCTCCGGTAGCGCTCTGGCAGGTGTTGTTCCTCAGTACGGGGGAGGCGGTAACCACGGTGGTGGCGGTAATAATAGCGGCCCAAATTCTGAGCTGAACATTTACCAGTACGGTGGCGGTAACTCTGCACTTGCTCTGCAAACTGATGCCCGTAACTCTGACTTGACTATTACCCAGCATGGTGGCGGTAATGGTGCAGATGTTGGTCAGGGCTCAGATGACAGCTCAATCGATCTGACCCAACGTGGCTTCGGTAACAGCGCTACTCTTGATCAGTGGAACGGCAAAAATTCTGAAATGACGGTTAAACAGTTCGGTGGTGGCAACGGTGCAGCAGTTGACCAGACTGCATCTAACTCCTCCGTCAACGTGACTCAGGTTGGCTTTGGTAACAACGCGACCGCTCATCAGTACTAA"
biopolymer_dict = {'csgA': csgA}


### Payload
# fungal AMPs
payload_dict = {}
input4 = "GAC AGC CAC GAA GAA CGC CGT CAG GGC CGT CAC GGT CAT CAT GAG TAT GGA CGC AAG TTCCAC GAA AAA CAC CAT TCC CAC CGT GGT TAC"
id1 = input4.replace(" ", "")
payload_dict['id1'] = id1
input5 = "GAC GGG GTG AAG TTG TGT GAT GTG CCG TCA GGA ACT TGG TCG GGT CAT TGT GGC AGC TCATCA AAG TGT TCA CAG CAA TGT AAA GAC CGT GAA CAT TTC GCT TAT GGG GGT GCC TGT CACTAT CAA TTC CCT AGC GTA AAA TGC TTT TGC AAG CGC CAG TGC"
id2 = input5.replace(" ", "")
payload_dict['id2'] = id2
input6 = "AAC CTG TGC GAG CGT GCG TCT CTG ACG TGG ACG GGA AAT TGC GGA AAC ACA GGA CAC TGCGAC ACC CAG TGT CGC AAC TGG GAG TCG GCC AAA CAC GGT GCG TGT CAT AAG CGT GGT AACTGG AAA TGC TTT TGT TAC TTC GAC TGC"
id3 = input6.replace(" ", "")
payload_dict['id3'] = id3
input7 = "GGT TTG TTT GAT ATC ATT AAG AAG ATT GCT GAA TCC ATT"
id4 = input7.replace(" ", "")
payload_dict['id4'] = id4
input8 = "CGC GAA TGC AAA GCA CAA GGT CGC CAC GGT ACG TGT TTC CGC GAC GCC AAC TGT GTT CAAGTT TGT GAA AAA CAA GCC GGA TGG TCC CAC GGC GAT TGT CGC GCT CAG TTT AAG TGC AAGTGC ATT TTT GAA TGC"
id5 = input8.replace(" ", "")
payload_dict['id5'] = id5
input9 = "TTC CTG CCT ATT GTG GGC AAA CTT CTT TCG GGA TTG CTT"
id6 = input9.replace(" ", "")
payload_dict['id6'] = id6
input10 = "GCT ACA TAC AAT GGT AAG TGC TAC AAA AAG GAC AAC ATC TGC AAA TAC AAG GCG CAA TCAGGC AAG ACA GCT ATT TGC AAG TGT TAC GTG AAG AAA TGC CCG CGT GAC GGG GCT AAG TGTGAG TTT GAC TCG TAT AAG GGC AAG TGT TAC TGC"
id7 = input10.replace(" ", "")
payload_dict['id7'] = id7
input11 = "CAT CGC CAT CAG GGG CCG ATC TTT GAC ACA CGC CCG AGT CCT TTT AAC CCA AAC CAG CCC CGC CCA GGG CCC ATT TAT"
id8 = input11.replace(" ", "")
payload_dict['id8'] = id8
input12 = "GCA TTT ACA TGC CAT TGT CGC CGT AGT TGT TAT TCT ACG GAA TAT TCG TAC GGT ACT TGCACG GTC ATG GGT ATC AAT CAT CGC TTT TGC TGC CTG"
id9 = input12.replace(" ", "")
payload_dict['id9'] = id9
input13 = "CAC CCA TTG AAA CAG TAC TGG TGG CGC CCC TCG ATT"
id10 = input13.replace(" ", "")
payload_dict['id10'] = id10
input14 = "ATC TGT ATT TTC TGT TGC GGG TGC TGC CAT CGC AGC AAG TGT GGA ATG TGT TGT AAG ACA"
id11 = input14.replace(" ", "")
payload_dict['id11'] = id11
input15 = "GTA GGA GAG TGC GTT CGT GGA CGT TGT CCG AGT GGC ATG TGT TGT TCA CAG TTC GGG TACTGC GGC AAG GGT CCA AAG TAT TGT GGA CGT"
id12 = input15.replace(" ", "")
payload_dict['id12'] = id12
input16 = "CGT GGC GGA CGC CTT TGC TAC TGT CGC CGT CGT TTT TGC GTA TGT GTA GGC CGT"
id13 = input16.replace(" ", "")
payload_dict['id13'] = id13
input17 = "TGC ATC GGG AAT GGA GGC CGT TGT AAC GAA AAC GTG GGA CCT CCT TAT TGC TGC TCT GGA TTT TGT TTG CGT CAA CCT AAC CAG GGC TAC GGT GTT TGT CGC AAC CGT"
id14 = input17.replace(" ", "")
payload_dict['id14'] = id14
input18 = "CAG TGT ATC GGA AAT GGA GGC CGT TGC AAC GAA AAC GTA GGA CCG CCA TAT TGT TGT AGTGGA TTT TGC CTG CGT CAA CCG GGC CAA GGC TAT GGT TAC TGT AAG AAT CGC"
id15 = input18.replace(" ", "")
payload_dict['id15'] = id15
input19 = "CAA CAA TGT GGA CGC CAA GCA TCA GGG CGC CTG TGT GGC AAT CGC CTG TGT TGC AGT CAG TGG GGG TAT TGC GGA TCC ACC GCT TCA TAC TGC GGC GCC GGC TGT CAA TCT CAA TGT CGC TCT"
id16 = input19.replace(" ", "")
payload_dict['id16'] = id16
input20 = "CAA AAA CTG TGT GAA CGT CCC TCG GGT ACT TGG TCG GGG GTT TGC GGG AAC AAT AAT GCTTGT AAA AAC CAG TGT ATC AAT CTT GAA AAG GCC CGC CAC GGT AGT TGT AAC TAT GTG TTCCCT GCA CAT AAA TGT ATC TGT TAT TTC CCC TGT"
id17 = input20.replace(" ", "")
payload_dict['id17'] = id17
input21 = "CAA AAA TTA TGC CAA CGC CCC AGT GGG ACC TGG TCT GGG GTC TGC GGC AAC AAC AAC GCTTGC AAG AAT CAA TGT ATT CGC TTG GAG AAA GCA CGC CAC GGC TCA TGT AAC TAT GTA TTTCCC GCA CAT AAG TGC ATT TGT TAT TTC CCA TGT"
id18 = input21.replace(" ", "")
payload_dict['id18'] = id18
input22 = "GCG GAA CGC GTC GGT GCC GGG GCA CCT GTT TAT TTG"
id19 = input22.replace(" ", "")
payload_dict['id19'] = id19
input23 = "CCA GAT CCG GCA AAA ACG GCG CCT AAA AAG AAG AGT AAG AAA GCT GTT ACC"
id20 = input23.replace(" ", "")
payload_dict['id20'] = id20
input24 = "GGC CTT TTC GAT ATT ATC AAG AAG GTG GCC TCG GTT GTC GGT GGT TTA"
id21 = input24.replace(" ", "")
payload_dict['id21'] = id21
input25 = "GGC TTA TTT GAC ATC ATT AAG AAG GTT GCC TCG GTG ATC GGA GGT CTG"
id22 = input25.replace(" ", "")
payload_dict['id22'] = id22
input26 = "TAC CGT GGC GGT TAT ACT GGA CCT ATC CCC CGT CCG CCT CCC ATC GGA CGC CCG CCC TTCCGT CCG GTA TGC AAC GCT TGT TAC CGT TTG AGT GTA AGC GAT GCG CGT AAC TGT TGC ATCAAA TTC GGC TCT TGT TGT CAT TTG GTG AAA"
id23 = input26.replace(" ", "")
payload_dict['id23'] = id23
input27 = "CAG GTT TAT AAG GGC GGA TAC ACG CGC CCG ATC CCA CGC CCT CCT CCC TTC GTG CGC CCG TTG CCA GGT GGC CCC ATC GGG CCT TAT AAC GGC TGC CCT GTT AGT TGC CGT GGG ATT AGT TTC TCG CAA GCC CGT TCA TGC TGT TCT CGC TTG GGT CGC TGC TGC CAC GTA GGC AAG GGA TAC TCG"
id24 = input27.replace(" ", "")
payload_dict['id24'] = id24
input28 = "GCG TGC AAT TTC CAG TCT TGC TGG GCG ACC TGC CAG GCG CAG CAT AGT ATT TAT TTT CGCCGC GCG TTC TGC GAC CGT TCC CAG TGC AAG TGT GTA TTC GTT CGT GGG"
id25 = input28.replace(" ", "")
payload_dict['id25'] = id25
input29 = "CAT AGT TCA GGA TAC ACA CGC CCA TTA CGC AAA CCT TCA CGC CCC ATC TTC ATT CGT CCT ATT GGC TGT GAT GTA TGT TAT GGC ATT CCC TCC TCC ACG GCT CGT CTT TGT TGC TTC CGC TAT GGA GAC TGT TGT CAC TTG"
id26 = input29.replace(" ", "")
payload_dict['id26'] = id26
input30 = "TAT AGC TCG GGC TAC ACC CGC CCA CTG CCC AAG CCA TCT CGT CCC ATC TTC ATT CGC CCTATT GGT TGT GAT GTT TGT TAC GGT ATC CCA TCA TCA ACT GCC CGC TTG TGC TGC TTC CGCTAC GGT GAT TGT TGT CAT CGC"
id27 = input30.replace(" ", "")
payload_dict['id27'] = id27
input31 = "CAG GGC TGT AAG GGT CCT TAT ACC CGT CCA ATT CTG CGC CCT TAC GTC CGC CCA GTA GTC AGC TAC AAC GCT TGT ACC TTA TCC TGC CGT GGA ATT ACC ACA ACG CAA GCC CGC TCG TGT TGC ACA CGC TTA GGA CGC TGC TGT CAC GTA GCA AAG GGC TAT TCA"
id28 = input31.replace(" ", "")
payload_dict['id28'] = id28
input32 = "TCA GCA TTC ACG GTT TGG TCG GGG CCG GGC TGT AAC AAT CGT GCG GAG CGT TAT AGC AAA TGT GGG TGT TCA GCC ATC CAC CAG AAA GGG GGT TAC GAC TTC TCT TAC ACA GGG CAG ACT GCG GCG CTT TAT AAT CAA GCT GGT TGC AGC GGT GTG GCT CAC ACA CGT TTT GGT AGC TCA GCT CGT GCA TGC AAT CCG TTC GGC TGG AAA TCT ATC TTT ATC CAG TGT"
id29 = input32.replace(" ", "")
payload_dict['id29'] = id29
input33 = "TTG TGC AAC GAA CGC CCT TCA CAG ACC TGG TCT GGG AAT TGC GGA AAT ACA GCC CAC TGCGAT AAA CAG TGC CAG GAT TGG GAG AAG GCG TCT CAC GGC GCT TGT CAT AAA CGT GAA AACCAC TGG AAA TGC TTT TGT TAC TTT AAT TGC"
id30 = input33.replace(" ", "")
payload_dict['id30'] = id30
input34 = "GTC ACT TGT TTC TGT CGA CGC CGC GGT TGC GCC AGT CGT GAG CGT CAC ATT GGT TAC TGC CGC TTC GGG AAC ACA ATT TAT CGC TTG TGC TGC CGT CGT"
id31 = input34.replace(" ", "")
payload_dict['id31'] = id31
input35 = "GGG GCC GAT TTT CAG GAG TGC ATG AAA GAG CAT AGC CAA AAA CAA CAT CAG CAC CAG GGC"
id32 = input35.replace(" ", "")
payload_dict['id32'] = id32

# chromoproteins
payload_dict['meffblue'] = "atgtccgttatcgcaacccagatgacgtacaaagtttatatgtcgggcaccgtgaatggtcattattttgaagtcgaaggtgatggcaaaggtcgtccgtatgaaggcgaacagaccgtcaaactgaccgtgacgaaaggcggtccgctgccgtttgcatgggatattctgagtccgcagtgccaatacggttccattccgttcaccaaatatccggaagatatcccggactacgtcaaacagagctttccggaaggtttcacgtgggaacgcattatgaactttgaagatggcgctgtgtgcaccgtttcaaacgacagctctatccaaggcaactgcttcacgtatcatgtgaaattttcgggtctgaacttcccgccgaatggcccggttatgcagaagaaaacccaaggttgggaaccgcacagtgaacgtctgtttgcgcgcggcggtatgctgatcggcaacaatttcatggccctgaaactggaaggcggtggccattatctgtgtgaatttaaaaccacgtacaaagcgaaaaaaccagtgaaaatgccgggttatcattacgttgatcgtaaactggacgtcacgaaccacaataaagactatacctcagttgaacagtgtgaaatcagcatcgcacgcaagccggtggtcgcctaataa"
payload_dict['tspurple'] = "atggcgagcttggttaagaaagatatgtgtgttaagatgacgatggaaggtactgtgaacggttatcactttaagtgcgttggcgagggtgaaggcaagccgttcgagggcacgcagaacatgcgcattcgtgtcaccgagggcggtccgctgccttttgcattcgacatcctggccccgtgctgtatgtacggctctaagaccttcattaaacacgtgagcggtatcccggattactttaaagagtcctttccagagggcttcacttgggaacgtacccagatttttgaggacggtggtgttctgaccgcgcaccaagacaccagcctggaaggtaattgcctgatctataaagtgaaggttctgggtaccaatttcccggcgaatggtccggtgatgcaaaagaaaaccgcgggttgggagccgtgcgtcgagatgctgtatccgcgtgacggcgtcttgtgtggtcagagcttgatggcgctgaagtgcaccgatggcaatcatctgaccagccacctgcgcacgacgtatcgtagccgtaaaccgagcaacgccgttaacatgccggagttccattttggtgaccatcgcatcgaaatcctgaaagctgagcagggcaaattctacgaacaatacgaatcggctgtcgcacgttacagcgatgtgccggaaaaagcgacgtaataa"
payload_dict['fwyellow'] = "atgacggcactgactgaaggcgcaaaactgttcgagaaagaaatcccatatatcactgagctggaaggtgacgttgaaggtatgaagtttatcatcaagggtgaaggtacaggtgacgcgagcgtcggtaaagtggatgctcagttcatttgtaccacgggcgacgttccggttccgtggagcacgctggtcaccacgctgacgtatggtgctcagtgctttgccaagtatccgcgccacattgcggatttcttcaaaagctgcatgccggaaggttacgtccaagagcgcaccatcacctttgagggtgatggcgtgttcaagacccgtgcggaagtcacctttgaaaatggcagcgtgtacaaccgtgtaaaactgaacggccagggtttcaagaaggacggccacgtgctgggcaaaaatctggagtttaactttacccctcattgtttgtacatttggggtgaccaagcgaatcatggcctgaagagcgcgttcaaaatcatgcatgagatcaccggctccaaagaggatttcattgttgccgatcacacccaaatgaataccccgattggtggtggtccggtgcacgtgccggagtaccaccacattacgtatcatgttaccctgtctaaagacgtcaccgatcaccgtgaccatttgaacattgttgaggtgatcaaggcagttgacctggagacgtaccgttaataa"
payload_dict['scorange'] = "atgagcaaaatcagcgacaacgtccgcatcaaattgtacatggaaggcacggtaaataaccaccactttatgtgtgaggctgaaggcgagggtaaaccgtacgagggtacccaggaaatgaagattgaggtgattgaaggtggcccgctgccgttcgcattccatattctgagcaccagctgtatgtacggctctaaaacgttcatcaaatatgtcagcggtatccctgactatttcaagcagtccttcccggaaggtttcacctgggagcgtacgactacctacgaggatggcggttttctgacggcgcatcaagacaccagcttggacggcgattgcctggtttacaaggttaagatcctgggtaataactttccggcggatggtccggttatgcagaataaagcagagcgctgggaaccggccaccgagattctgtatgaggtggatggtgtgctgcgtggccaaaccctgatggcgttgaagtgcgcggacggtaaccatctgacctgccacctgcgtaccacgtatcgtagcaagaaaccggcgtcggccctgaagatgccaggttttcactttggtgatcaccgcatcgagattatggaagaagttgagaaaggcaagtgttacaagcaatatgaagccgcggtcgcacgttactgcgacgcggctccgagcaaactgggtcaccattaataa"

# adhesives
input36 = "GCC GAT TAC TAT GGT CCC AAA TAC GGA CCT CCG CGC CGC TAC GGG GGT GGA AAT TAC AAT CGT TAT GGG CGT CGT TAC GGA GGA TAC AAG GGA TGG AAT AAC GGT TGG AAA CGC GGG CGC TGG GGA CGT AAG TAT TAC"
payload_dict['mfp3'] = input36.replace(" ", "")
input37 = "TCT TCC GAG GAG TAT AAA GGT GGC TAC TAT CCC GGA AAC ACT TAC CAC TAC CAT TCG GGA GGT AGT TAT CAT GGC TCG GGC TAC CAC GGG GGA TAC AAG GGA AAA TAT TAT GGG AAA GCA AAG AAG TAT TAT TAC AAA TAC AAG AAC TCG GGA AAG TAC AAG TAC CTG AAG AAG GCC CGT AAA TAT CAT CGT AAA GGG TAT AAG AAA TAT TAT GGG GGA GGA TCT TCA"
payload_dict['mfp5'] = input37.replace(" ", "")

### Tag
tag_dict = {}
input38 = "CAT CAT CAT CAT CAC CAC"
tag_dict['his'] = input38.replace(" ", "")
input39 = "GAG CCC TTA CAG TTA AAG ATG"
tag_dict['gpb'] = input39.replace(" ", "")
input40 = "GAC TAC AAA GAT GAC GAT GAT AAA"
tag_dict['flag'] = input40.replace(" ", "")
input41 = "CAC AGT AGC TAC TGG TAT GCA TTT AAT AAC AAG ACG"
tag_dict['cnbp'] = input41.replace(" ", "")
input42 = "GCC TAC TCC TCT GGT GCG CCT CCG ATG CCT CCC TTT"
tag_dict['a3'] = input42.replace(" ", "")
input43 = "AAT CCC TAC CAC CCT ACG ATT CCA CAG TCA GTA CAC"
tag_dict['clp12'] = input43.replace(" ", "")
input44 = "CCC CCT CCC TGG TTG CCA TAC ATG CCC CCA TGG AGT"
tag_dict['qbp1'] = input44.replace(" ", "")
input45 = "GCA CAC ATT GTT ATG GTT GAT GCC TAC AAG CCA ACG AAG"
tag_dict['spytag'] = input45.replace(" ", "")
input46 = "TGT GGA CCT GCT GGG GAT TCG TCC GGA GTG GAC AGT CGT TCC GTG GGA CCT TGC"
tag_dict['ct43'] = input46.replace(" ", "")
input47 = "AAG TGC ACC TCA GAT CAG GAC GAA CAG TTC ATT CCG AAG GGG TGC TCA AAA GGG TCA GGT GGT TCT GGC"
tag_dict['mbd'] = input47.replace(" ", "")
input48 = "GAT ACG GCC TCG GAC GCA GCG GCT GCT GCC GCT CTG ACA GCT GCC AAC GCG AAG GCC GCG GCA GAA CTG ACG GCC GCG AAT GCG GCT GCC GCA GCT GCT GCC ACG GCA CGT"
tag_dict['afp8'] = input48.replace(" ", "")
input49 = "GGA GGG ACG ATT TGG ACA GGA AAG GGA CTT GGC CTT GGA CTG GGG TTG GGC CTT GGT GCC TGG GGG CCA ATC ATT CTG GGG GTA GTG GGC GCT GGG GCA GTT TAT GCT TAT ATG AAG TCT CGT GAT ATT GAG TCT GCC CAG TCC GAC GAG GAG GTT GAA TTG CGC GAC GCC TTG GCC"
tag_dict['mms6'] = input49.replace(" ", "")
input50 = "GGT GCG ATG GTA GAT ACT TTG TCC GGC CTG TCT AGT GAG CAA GGT CAG TCA GGG GAT ATG ACC ATC GAG GAG GAT AGT GCG ACC CAC ATT AAG TTT TCA AAG CGT GAC GAA GAT GGC AAG GAG CTT GCG GGA GCC ACA ATG GAA TTG CGC GAT AGT TCA GGA AAG ACA ATT TCA ACA TGG ATC TCT GAT GGG CAG GTC AAA GAC TTC TAC TTG TAT CCA GGA AAG TAT ACG TTT GTC GAA ACC GCA GCC CCA GAT GGC TAC GAG GTT GCG ACG GCT ATT ACA TTT ACA GTC AAC GAA CAA GGG CAG GTA ACT GTT AAT GGG AAG GCT ACG AAG GGC GAC GCT CAT ATC"
tag_dict['spycatcher'] = input50.replace(" ", "")
input51 = "GCA GCC AAC GAC GAA AAT TAC GCC CTT GCT GCA"
tag_dict['ssssra'] = input51.replace(" ", "")
input52 = "AAT AAT CAT TAC TTG CCA CGT"
tag_dict['hbp7'] = input52.replace(" ", "")

# Searching for Illegal Sites
### Sec + n22
### Linkers
### Biopolymer
### Payloads
### Tags

In [236]:
### Sec + n22
frontagdigestlist = fraginDNA(restriction_dict, frontag_dict)
if len(frontagdigestlist[0]) < 1:
    print("There are no restriction sites in front tags")
else:
    for i in range(len(frontagdigestlist[0])):
        print(frontagdigestlist[0][i] + " is in " + frontagdigestlist[1][i] + " at index " + str(frontagdigestlist[2][i]))

comp_frontag_dict = {}
for frontag in frontag_dict:
    comp_frontag_dict[frontag] = flip(complementary(frontag_dict[frontag]))
compfrontagdigestlist = fraginDNA(restriction_dict, comp_frontag_dict)
if len(compfrontagdigestlist[0]) < 1:
    print("There are no restriction sites in front tag complements")
else:
    for i in range(len(compfrontagdigestlist[0])):
        print(compfrontagdigestlist[0][i] + " is in " + compfrontagdigestlist[1][i] + " at index " + 
              str(compfrontagdigestlist[2][i]))

### Linkers
linkerdigestlist = fraginDNA(restriction_dict, linker_dict)
if len(linkerdigestlist[0]) < 1:
    print("There are no restriction sites in linkers")
else:
    for i in range(len(linkerdigestlist[0])):
        print(linkerdigestlist[0][i] + " is in " + linkerdigestlist[1][i] + " at index " + str(linkerdigestlist[2][i]))

comp_linker_dict = {}
for linker in linker_dict:
    comp_linker_dict[linker] = flip(complementary(linker_dict[linker]))
complinkerdigestlist = fraginDNA(restriction_dict, comp_linker_dict)
if len(complinkerdigestlist[0]) < 1:
    print("There are no restriction sites in linker complements")
else:
    for i in range(len(complinkerdigestlist[0])):
        print(complinkerdigestlist[0][i] + " is in " + complinkerdigestlist[1][i] + " at index " + 
              str(complinkerdigestlist[2][i]))
        
### biopolymer
biopolymerdigestlist = fraginDNA(restriction_dict, biopolymer_dict)
if len(biopolymerdigestlist[0]) < 1:
    print("There are no restriction sites in biopolymers")
else:
    for i in range(len(biopolymerdigestlist[0])):
        print(biopolymerdigestlist[0][i] + " is in " + biopolymerdigestlist[1][i] + " at index " + str(biopolymerdigestlist[2][i]))

comp_biopolymer_dict = {}
for biopolymer in biopolymer_dict:
    comp_biopolymer_dict[biopolymer] = flip(complementary(biopolymer_dict[biopolymer]))
compbiopolymerdigestlist = fraginDNA(restriction_dict, comp_biopolymer_dict)
if len(compbiopolymerdigestlist[0]) < 1:
    print("There are no restriction sites in biopolymer complements")
else:
    for i in range(len(compbiopolymerdigestlist[0])):
        print(compbiopolymerdigestlist[0][i] + " is in " + compbiopolymerdigestlist[1][i] + " at index " + 
              str(compbiopolymerdigestlist[2][i]))

### payloads
payloaddigestlist = fraginDNA(restriction_dict, payload_dict)
if len(payloaddigestlist[0]) < 1:
    print("There are no restriction sites in payloads")
else:
    for i in range(len(payloaddigestlist[0])):
        print(payloaddigestlist[0][i] + " is in " + payloaddigestlist[1][i] + " at index " + str(payloaddigestlist[2][i]))

comp_payload_dict = {}
for payload in payload_dict:
    comp_payload_dict[payload] = flip(complementary(payload_dict[payload]))
comppayloaddigestlist = fraginDNA(restriction_dict, comp_payload_dict)
if len(comppayloaddigestlist[0]) < 1:
    print("There are no restriction sites in payload complements")
else:
    for i in range(len(comppayloaddigestlist[0])):
        print(comppayloaddigestlist[0][i] + " is in " + comppayloaddigestlist[1][i] + " at index " + 
              str(comppayloaddigestlist[2][i]))

### tags
tagdigestlist = fraginDNA(restriction_dict, tag_dict)
if len(tagdigestlist[0]) < 1:
    print("There are no restriction sites in tags")
else:
    for i in range(len(tagdigestlist[0])):
        print(tagdigestlist[0][i] + " is in " + tagdigestlist[1][i] + " at index " + str(tagdigestlist[2][i]))

comp_tag_dict = {}
for tag in tag_dict:
    comp_tag_dict[tag] = flip(complementary(tag_dict[tag]))
comptagdigestlist = fraginDNA(restriction_dict, comp_tag_dict)
if len(comptagdigestlist[0]) < 1:
    print("There are no restriction sites in tag complements")
else:
    for i in range(len(comptagdigestlist[0])):
        print(comptagdigestlist[0][i] + " is in " + comptagdigestlist[1][i] + " at index " + 
              str(comptagdigestlist[2][i]))

There are no restriction sites in front tags
There are no restriction sites in front tag complements
There are no restriction sites in linkers
There are no restriction sites in linker complements
There are no restriction sites in biopolymers
There are no restriction sites in biopolymer complements
There are no restriction sites in payloads
There are no restriction sites in payload complements
There are no restriction sites in tags
There are no restriction sites in tag complements


# Check for proteolytic sites
### Front tags
### Linkers
### Biopolymer
### Payloads
### Tags
So, the tag['flag'] tag has an enterokinase cleavage site. I'm gonna let this be for now.

In [237]:
### front tags
aa_frontag_dict = {}
for frontag in frontag_dict:
    aa_frontag_dict[frontag] = translate(frontag_dict[frontag])
frontagprotdigestlist = aainprot(proteolytic_dict, aa_frontag_dict)
if len(frontagprotdigestlist[0]) < 1:
    print("There are no proteolytic sites in frontags")
else:
    for i in range(len(frontagprotdigestlist[0])):
        print(frontagprotdigestlist[0][i] + " is in " + frontagprotdigestlist[1][i] + " at index " + 
             str(frontagprotdigestlist[2][i]))

### linkers
aa_linker_dict = {}
for linker in linker_dict:
    aa_linker_dict[linker] = translate(linker_dict[linker])
linkerprotdigestlist = aainprot(proteolytic_dict, aa_linker_dict)
if len(linkerprotdigestlist[0]) < 1:
    print("There are no proteolytic sites in linkers")
else:
    for i in range(len(linkerprotdigestlist[0])):
        print(linkerprotdigestlist[0][i] + " is in " + linkerprotdigestlist[1][i] + " at index " + 
             str(linkerprotdigestlist[2][i]))

### biopolymer
aa_biopolymer_dict = {}
for biopolymer in biopolymer_dict:
    aa_biopolymer_dict[biopolymer] = translate(biopolymer_dict[biopolymer])
biopolymerprotdigestlist = aainprot(proteolytic_dict, aa_biopolymer_dict)
if len(biopolymerprotdigestlist[0]) < 1:
    print("There are no proteolytic sites in biopolymers")
else:
    for i in range(len(biopolymerprotdigestlist[0])):
        print(biopolymerprotdigestlist[0][i] + " is in " + biopolymerprotdigestlist[1][i] + " at index " + 
             str(biopolymerprotdigestlist[2][i]))
        
### payloads
aa_payload_dict = {}
for payload in payload_dict:
    aa_payload_dict[payload] = translate(payload_dict[payload])
payloadprotdigestlist = aainprot(proteolytic_dict, aa_payload_dict)
if len(payloadprotdigestlist[0]) < 1:
    print("There are no proteolytic sites in payloads")
else:
    for i in range(len(payloadprotdigestlist[0])):
        print(payloadprotdigestlist[0][i] + " is in " + payloadprotdigestlist[1][i] + " at index " + 
             str(payloadprotdigestlist[2][i]))
        
### tag
aa_tag_dict = {}
for tag in tag_dict:
    aa_tag_dict[tag] = translate(tag_dict[tag])
tagprotdigestlist = aainprot(proteolytic_dict, aa_tag_dict)
if len(tagprotdigestlist[0]) < 1:
    print("There are no proteolytic sites in tags")
else:
    for i in range(len(tagprotdigestlist[0])):
        print(tagprotdigestlist[0][i] + " is in " + tagprotdigestlist[1][i] + " at index " + 
             str(tagprotdigestlist[2][i]))

There are no proteolytic sites in frontags
There are no proteolytic sites in linkers
There are no proteolytic sites in biopolymers
There are no proteolytic sites in payloads
DDDDK is in DYKDDDDK at index 3


### Notes on the library

Go the following link for a more detailed explanation of our library assembly method - this is for libraries 1-10:
https://docs.google.com/document/d/1D9qJ49pugpkksllmeUM2MvXzJJ-jDwx4-HqbrwazKPw/edit

# Subparts for our final ultramers
### Cut sites
### Start and stop codons
### BbsI and BbsI'
### NN sequences (two base buffer)
### Main part sequences

In [238]:
### Cut sites
# cut sites are in order of position
cut_dict = {'A': 'ggggag', 'B': 'tactct', 'C': 'tcaatg', 'D': 'aggtcg', 'E': 'gggctt', 'F': 'cgctcc', 'G': 'agtgcc', 'H': 'actagc'}

### Start and stop codons
start = "atg"
stop = "taa"

### BbsI and BbsI'
BbsI = "GAAGAC"
BbsIprime = "GTCTTC"

### NN sequences (two base buffer)
NN = "aa"

### Main part sequences
frontag_dict
linker_dict
biopolymer_dict
payload_dict
tag_dict

print("", end="")

# Curli

In [239]:
lib1_pos1 = {}
for biopolymer in biopolymer_dict:
    lib1_pos1[biopolymer] = (BbsI + cut_dict['C'] + biopolymer_dict[biopolymer] + cut_dict['D'] + BbsIprime).upper()

dict1 = [lib1_pos1]

# Curli - tag

In [240]:
lib2_pos1 = {}
for biopolymer in biopolymer_dict:
    lib2_pos1[biopolymer] = (BbsI + cut_dict['C'] + biopolymer_dict[biopolymer][:-3] + cut_dict['D'] + BbsIprime).upper()
lib2_pos2 = {}
for tag in tag_dict:
    lib2_pos2[tag] = (BbsI + NN + cut_dict['D'] + tag_dict[tag] + (2*stop) + cut_dict['E'] + NN + BbsIprime).upper()

dict2 = [lib2_pos1, lib2_pos2]

# Curli - linker - payload

In [241]:
lib3_pos1 = {}
lib3_pos2 = {}
lib3_pos3 = {}
dict3 = [lib3_pos1, lib3_pos2, lib3_pos3]

for biopolymer in biopolymer_dict:
    lib3_pos1[biopolymer] = (BbsI + cut_dict['C'] + biopolymer_dict[biopolymer][:-3] + cut_dict['D'] + BbsIprime).upper()

for linker in linker_dict:
    lib3_pos2[linker] = (BbsI + NN + cut_dict['D'] + linker_dict[linker] + cut_dict['E'] + NN + BbsIprime).upper()

for payload in payload_dict:
    lib3_pos3[payload] = (BbsI + cut_dict['E'] + payload_dict[payload] + (2*stop) + cut_dict['F'] + BbsIprime).upper()


# Curli - linker - payload - tag

In [242]:
lib4_pos1 = {}
lib4_pos2 = {}
lib4_pos3 = {}
lib4_pos4 = {}
dict4 = [lib4_pos1, lib4_pos2, lib4_pos3, lib4_pos4]

for biopolymer in biopolymer_dict:
    lib4_pos1[biopolymer] = (BbsI + cut_dict['C'] + biopolymer_dict[biopolymer][:-3] + cut_dict['D'] + BbsIprime).upper()

for linker in linker_dict:
    lib4_pos2[linker] = (BbsI + NN + cut_dict['D'] + linker_dict[linker] + cut_dict['E'] + NN + BbsIprime).upper()

for payload in payload_dict:
    lib4_pos3[payload] = (BbsI + cut_dict['E'] + payload_dict[payload] + cut_dict['F'] + BbsIprime).upper()

for tag in tag_dict:
    lib4_pos4[tag] = (BbsI + NN + cut_dict['F'] + tag_dict[tag] + (2*stop) + cut_dict['G'] + NN + BbsIprime).upper()

# Sec-n22 - linker - biopolymer

In [243]:
lib5_pos1 = {}
lib5_pos2 = {}
lib5_pos3 = {}
dict5 = [lib5_pos1, lib5_pos2, lib5_pos3]

for frontag in frontag_dict:
    lib5_pos1[frontag] = (BbsI + cut_dict['A'] + frontag_dict[frontag] + cut_dict['B'] + BbsIprime).upper()

for linker in linker_dict:
    lib5_pos2[linker] = (BbsI + NN + cut_dict['B'] + linker_dict[linker] + cut_dict['C'] + NN + BbsIprime).upper()
    
for biopolymer in biopolymer_dict:
    lib5_pos3[biopolymer] = (BbsI + cut_dict['C'] + biopolymer_dict[biopolymer] + cut_dict['D'] + BbsIprime).upper()

# Sec-n22-linker-biopolymer-tag

In [244]:
lib6_pos1 = {}
lib6_pos2 = {}
lib6_pos3 = {}
lib6_pos4 = {}
dict6 = [lib6_pos1, lib6_pos2, lib6_pos3, lib6_pos4]

for frontag in frontag_dict:
    lib6_pos1[frontag] = (BbsI + cut_dict['A'] + frontag_dict[frontag] + cut_dict['B'] + BbsIprime).upper()

for linker in linker_dict:
    lib6_pos2[linker] = (BbsI + NN + cut_dict['B'] + linker_dict[linker] + cut_dict['C'] + NN + BbsIprime).upper()
    
for biopolymer in biopolymer_dict:
    lib6_pos3[biopolymer] = (BbsI + cut_dict['C'] + biopolymer_dict[biopolymer][:-3] + cut_dict['D'] + BbsIprime).upper()

for tag in tag_dict:
    lib6_pos4[tag] = (BbsI + NN + cut_dict['D'] + tag_dict[tag] + (2*stop) + cut_dict['E'] + NN + BbsIprime).upper()

# Sec-n22-linker-biopolymer-linker-payload

In [245]:
lib7_pos1 = {}
lib7_pos2 = {}
lib7_pos3 = {}
lib7_pos4 = {}
lib7_pos5 = {}
dict7 = [lib7_pos1, lib7_pos2, lib7_pos3, lib7_pos4, lib7_pos5]

for frontag in frontag_dict:
    lib7_pos1[frontag] = (BbsI + cut_dict['A'] + frontag_dict[frontag] + cut_dict['B'] + BbsIprime).upper()

for linker in linker_dict:
    lib7_pos2[linker] = (BbsI + NN + cut_dict['B'] + linker_dict[linker] + cut_dict['C'] + NN + BbsIprime).upper()
    
for biopolymer in biopolymer_dict:
    lib7_pos3[biopolymer] = (BbsI + cut_dict['C'] + biopolymer_dict[biopolymer][:-3] + cut_dict['D'] + BbsIprime).upper()

for linker in linker_dict:
    lib7_pos4[linker] = (BbsI + NN + cut_dict['D'] + linker_dict[linker] + cut_dict['E'] + NN + BbsIprime).upper()

for payload in payload_dict:
    lib7_pos5[payload] = (BbsI + cut_dict['E'] + payload_dict[payload] + (2*stop) + cut_dict['F'] + BbsIprime).upper()

# Sec-n22-linker-biopolymer-linker-payload-tag

In [246]:
lib8_pos1 = {}
lib8_pos2 = {}
lib8_pos3 = {}
lib8_pos4 = {}
lib8_pos5 = {}
lib8_pos6 = {}
dict8 = [lib8_pos1, lib8_pos2, lib8_pos3, lib8_pos4, lib8_pos5, lib8_pos6]

for frontag in frontag_dict:
    lib8_pos1[frontag] = (BbsI + cut_dict['A'] + frontag_dict[frontag] + cut_dict['B'] + BbsIprime).upper()

for linker in linker_dict:
    lib8_pos2[linker] = (BbsI + NN + cut_dict['B'] + linker_dict[linker] + cut_dict['C'] + NN + BbsIprime).upper()
    
for biopolymer in biopolymer_dict:
    lib8_pos3[biopolymer] = (BbsI + cut_dict['C'] + biopolymer_dict[biopolymer][:-3] + cut_dict['D'] + BbsIprime).upper()

for linker in linker_dict:
    lib8_pos4[linker] = (BbsI + NN + cut_dict['D'] + linker_dict[linker] + cut_dict['E'] + NN + BbsIprime).upper()

for payload in payload_dict:
    lib8_pos5[payload] = (BbsI + cut_dict['E'] + payload_dict[payload] + cut_dict['F'] + BbsIprime).upper()

for tag in tag_dict:
    lib8_pos6[tag] = (BbsI + NN + cut_dict['F'] + tag_dict[tag] + (2*stop) + cut_dict['G'] + NN + BbsIprime).upper()

##### Libraries 9 and 10 are the same as 7 and 8, except for the payload, but I have all the payloads in a single library

# Moving these sequences into csv form

In [247]:
import csv

with open('output.csv', 'w') as output:
    writer = csv.writer(output)
    writer.writerow(['library 1', ''])
    writer.writerow([])
    for position in dict1:
        for key, value in position.items():
            writer.writerow([key, value])
        writer.writerow([])
        
    writer.writerow(['library 2', ''])
    writer.writerow([])
    for position in dict2:
        for key, value in position.items():
            writer.writerow([key, value])
        writer.writerow([])
        
    writer.writerow(['library 3', ''])
    writer.writerow([])
    for position in dict3:
        for key, value in position.items():
            writer.writerow([key, value])
        writer.writerow([])
    
    writer.writerow(['library 4', ''])
    writer.writerow([])
    for position in dict4:
        for key, value in position.items():
            writer.writerow([key, value])
        writer.writerow([])
    
    writer.writerow(['library 5', ''])
    writer.writerow([])
    for position in dict5:
        for key, value in position.items():
            writer.writerow([key, value])
        writer.writerow([])
        
    writer.writerow(['library 6', ''])
    writer.writerow([])
    for position in dict6:
        for key, value in position.items():
            writer.writerow([key, value])
        writer.writerow([])
    
    writer.writerow(['library 7', ''])
    writer.writerow([])
    for position in dict7:
        for key, value in position.items():
            writer.writerow([key, value])
        writer.writerow([])
    
    writer.writerow(['library 8', ''])
    writer.writerow([])
    for position in dict8:
        for key, value in position.items():
            writer.writerow([key, value])
        writer.writerow([])