In [8]:
# reorganizing 
# start with the stuff you need to do once

from Bio.PDB import PDBParser, CaPPBuilder
from Bio import SeqIO
from Bio.Seq import Seq
from glob import glob 
from subprocess import check_output
import screed
from re import sub

p = PDBParser()
ppb = CaPPBuilder()

ecoli_codon = { 'G':'GGC', 'A':'GCG', 'V':'GTG', 'F':'TTT', 'E':'GAA', 'D':'GAT', 'N':'AAC', 
                'H':'CAT', 'P':'CCG', 'Q':'CAG', 'W':'TGG', 'Y':'TAT', 'I':'ATT', 'M':'ATG', 
                'C':'TGC', 'K':'AAA', 'L':'CTG', 'R':'CGT', 'T':'ACC', 'S':'AGC' }

bb_nucleotide = '1oaa.fasta'
bb_pdb = '1oaa.pdb'
outpath = 'out/'

dna = SeqIO.read( bb_nucleotide, 'fasta' )
wt_structure = p.get_structure( 'wt_structure', bb_pdb )
wt_seq = [ pp.get_sequence() for pp in ppb.build_peptides( wt_structure ) ][ 0 ]

def make( design_structure, bb_nucleotide ):
    
    # design metadata 
    design = p.get_structure( 'design', design_structure )
    design_seq = [ pp.get_sequence() for pp in ppb.build_peptides( design ) ][ 0 ]
    design_fa = outpath + design_structure.replace( 'pdb', 'fa' )
    with open( design_fa, 'w' ) as handle:
        handle.write( '>{}\n{}'.format( design_structure, design_seq ) )

    # align design and scaffold 
    cmd = [ 'tblastn -query {} -subject {} -outfmt "6 sseq qseq sstart send"'.format( design_fa, bb_nucleotide ) ]
    blast_out = check_output( cmd, shell=True )
    best_hit = blast_out.split( '\n' )[ 0 ]
    sseq, qseq, sstart, send = best_hit.split( '\t' )

    # print a diff from this to use as the mutant handle 
    diff = '+'.join([ '{}{}{}'.format( native, position, designed ) for position, ( native, designed ) in enumerate( zip( sseq, qseq ) ) if native != designed ])

    # scaffold metadata 
    wt = [ record.sequence[ int( sstart ) - 1 : int( send ) ] for record in screed.open( bb_nucleotide ) ][0]
    codons = [ wt[i:i+3] for i in range( 0, len( wt ), 3 ) ]

    # mutate the scaffold sequence 
    for position, ( native, designed ) in enumerate( zip( wt_seq, design_seq ) ):
        if native != designed:
            if Seq( codons[ position ] ).translate() == native: # sanity check
                codons[ position ] = ecoli_codon[ designed ]

    oligos = sub( r'([atcg]{15})[atcg]{0,}([atcg]{15})', r'\1,\2', ''.join( codons ) ).split( ',' )[1:-1] 
    with open( 'transcriptic_csv.csv', 'a' ) as transcriptic_csv:
        for oligo in oligos:
            transcriptic_csv.write( '{0},{1},{1},25nm,standard\n'.format( diff, oligo ) )

    return ( diff, ''.join( codons ) )
            
# then loop through the mutants 
with open( 'transcriptic_csv.csv', 'w' ) as transcriptic_csv:
    transcriptic_csv.write( 'mutant_label,oligo_label,sequence,scale,purification\n' )

mutants = []
for mutant in glob( 'des*pdb' ):
    mutants += [ make( mutant, bb_nucleotide ) ]
    
import pandas
df = pandas.DataFrame( mutants, columns=['diff','nucleotide_sequence'] )
df.to_pickle( 'designs_nucleotide.p' )

# wait for sequencing results 

In [9]:
!ls

1oaa.fasta                                   des15.pdb
1oaa.pdb                                     des16.pdb
1oaa.pep                                     des17.pdb
1oaa_data.csv                                des18.pdb
1oaa_dyads_transfor_numbered.txt             des19.pdb
1oaa_dyads_transformation_20151202.txt       des2.pdb
Eurofins_Sequencing_Platesv5_1-3.xls         des3.pdb
NAX.params                                   des4.pdb
SBO.conf.pdb                                 des5.pdb
SBO.params                                   des6.pdb
UM_120_H169W204_1_1oaa_SBO_HW_HW_1_0007.pdb  des7.pdb
UM_120_H169W204_1_1oaa_SBO_HW_HW_1_0008.pdb  des8.pdb
UM_120_H169W204_1_1oaa_SBO_HW_HW_1_0009.pdb  des9.pdb
UM_297_W208Y173_1_1oaa_SBO_HW_YTS_1_0003.pdb designs_nucleotide.p
Untitled.ipynb                               diff_and_decide.ipynb
clones.csv                                   [34mfwd_reads[m[m
des1.pdb                                     kunkelWellCSV_r18cdhv8exmh3.csv
