In [56]:
# 3jyo designs ordering notebook

from Bio.PDB import PDBParser, PPBuilder
from glob import glob 
p, ppb = PDBParser(), PPBuilder()

def pdb_to_fasta( pdb ):
    structure = p.get_structure( pdb, pdb )
    for pp in ppb.build_peptides(structure): 
        return pp.get_sequence()

with open( 'queries.fa', 'w' ) as fn:    
    for i in glob( 'des*pdb' ):
        params = { }
        params.update( { 'filename': i } )
        params.update( { 'sequence': pdb_to_fasta( i ) } )
        fn.write( '>{}\n'.format( params['filename'] ) )
        fn.write( '{}\n'.format( params['sequence'] ) )

In [57]:
!tblastn -subject 3jyo.fasta -query queries.fa -outfmt "6 sstart send sseq qseq" > blast_out.tsv

In [58]:
import screed
from re import sub 

ecoli = { 'G':'GGC', 'A':'GCG', 'V':'GTG', 'F':'TTT', 'E':'GAA', 'D':'GAT', 'N':'AAC', 
          'H':'CAT', 'P':'CCG', 'Q':'CAG', 'W':'TGG', 'Y':'TAT', 'I':'ATT', 'M':'ATG', 
          'C':'TGC', 'K':'AAA', 'L':'CTG', 'R':'CGT', 'T':'ACC', 'S':'AGC' }

def reverse_complement( seq ):
    complement = [ dict( zip( 'actgACTG', 'tgacTGAC' ) )[ i ] for i in seq ]
    return ''.join( complement )[::-1]

with open( 'blast_out.tsv' ) as fn:
    outs = fn.readlines()
    
mutants = [] 
for blast_out in outs:
    #parse blast output 
    sstart, send, sseq, qseq = blast_out.split( '\t' )
    
    # diff 
    zipped = enumerate( zip( sseq, qseq ) )
    diff = [ ( native, pos, new ) for pos, ( native, new ) in zipped if native != new ]

    # make mutations 
    wt = [r for r in screed.open( '3jyo.fasta' )][0]
    t = wt.sequence[ int(sstart) - 1 : int(send) ]
    codons = [ t[i:i+3] for i in range( 0, len(t), 3 ) ] 
    label = [ ]
    params = { } 
    
    for d in diff:
        codons[ d[1] ] = ecoli[ d[2] ]
        label.append( '{}{}{}'.format( d[0], d[1] + 1, d[2] ) )
        
    #params['diff'] = diff 
    params['sequence'] = ''.join( codons )
    params['label'] = '+'.join( label ) 
    
    cut_up = sub( r'([atcg]{15})[atcg]{0,}([atcg]{15})', r'\1,\2', params['sequence'] )
    oligos =  [ reverse_complement( oligo ) for oligo in cut_up.split( ',' )[:-1] ]
    params['oligos'] = oligos
    
    mutants.append( params ) 

In [61]:
#transcript format CSV 

with open( '3jyo_order.csv', 'w' ) as ts_csv:
    ts_csv.write( 'mutant_label,oligo_label,sequence,scale,purification\n' )
    for mutant in mutants:
        for oligo in mutant['oligos']:
            if len( oligo ) < 60:
                ts_csv.write( '{0},{1},{1},25nm,standard\n'.format( mutant['label'], oligo ) )
            else:
                halfway = len( oligo ) // 2 
                oligo1, oligo2 = oligo[:halfway], oligo[halfway:]
                if len( oligo1 ) < 60 and len( oligo2 ) < 60: 
                    ts_csv.write( '{0},{1},{1},25nm,standard\n'.format( mutant['label'], oligo1 ) )
                    ts_csv.write( '{0},{1},{1},25nm,standard\n'.format( mutant['label'], oligo2 ) )
                else:
                    raise Exception( 'Split this oligo but it\'s still too long' )

In [62]:
!cat 3jyo_order.csv

mutant_label,oligo_label,sequence,scale,purification
T2Y+N77G+Q241S,ttcatgcattgccggATAacg,ttcatgcattgccggATAacg,25nm,standard
T2Y+N77G+Q241S,atcaataacaacggtGCCaactgcacccagctg,atcaataacaacggtGCCaactgcacccagctg,25nm,standard
T2Y+N77G+Q241S,aaaggcatcaacggcGCTatgaattgccatacg,aaaggcatcaacggcGCTatgaattgccatacg,25nm,standard
T2Y+T52Q+D93N+Q241S,ttcatgcattgccggATAacg,ttcatgcattgccggATAacg,25nm,standard
T2Y+T52Q+D93N+Q241S,ctgtttatacggatgCTGaatgttcagaccatt,ctgtttatacggatgCTGaatgttcagaccatt,25nm,standard
T2Y+T52Q+D93N+Q241S,accaaaaccgctcacGTTggtattatgacctgt,accaaaaccgctcacGTTggtattatgacctgt,25nm,standard
T2Y+T52Q+D93N+Q241S,aaaggcatcaacggcGCTatgaattgccatacg,aaaggcatcaacggcGCTatgaattgccatacg,25nm,standard
T2Y+T52Q+N77G+D93N+Q241S,ttcatgcattgccggATAacg,ttcatgcattgccggATAacg,25nm,standard
T2Y+T52Q+N77G+D93N+Q241S,ctgtttatacggatgCTGaatgttcagaccatt,ctgtttatacggatgCTGaatgttcagaccatt,25nm,standard
T2Y+T52Q+N77G+D93N+Q241S,atcaataacaacggtGCCaactgcacccagctg,atcaataacaacggtGCCaactgcacccagctg,25n