In [74]:
# 3jyo designs ordering notebook

import screed
from re import sub 
from Bio.SeqUtils import IUPACData
from glob import glob 

ecoli = { 'G':'GGC', 'A':'GCG', 'V':'GTG', 'F':'TTT', 'E':'GAA', 'D':'GAT', 'N':'AAC', 
          'H':'CAT', 'P':'CCG', 'Q':'CAG', 'W':'TGG', 'Y':'TAT', 'I':'ATT', 'M':'ATG', 
          'C':'TGC', 'K':'AAA', 'L':'CTG', 'R':'CGT', 'T':'ACC', 'S':'AGC' }

def reverse_complement( seq ):
    complement = [ dict( zip( 'actgACTG', 'tgacTGAC' ) )[ i ] for i in seq ]
    return ''.join( complement )[::-1]

def pdb_to_fasta( pdb ):
    with open( pdb ) as fn:
        cas = [ IUPACData.protein_letters_3to1[ line.split()[3].title() ] for line in fn.readlines() if 'CA' in line and 'ATOM' in line ]
        return '>{}\n{}\n'.format( pdb, ''.join( cas ) ) 

In [75]:
with open( 'queries.fa', 'w' ) as fn:    
    for i in glob( 'des*pdb' ):
        fn.write( pdb_to_fasta( i ) )

In [76]:
!tblastn -subject 3jyo.fasta -query queries.fa -soft_masking true -outfmt "6 sstart send sseq qseq" > blast_out.tsv

In [77]:
with open( 'blast_out.tsv' ) as fn:
    outs = fn.readlines()
    
mutants = [] 
for blast_out in set( outs ):
    #parse blast output 
    sstart, send, sseq, qseq = blast_out.split( '\t' )
    
    # diff 
    zipped = enumerate( zip( sseq, qseq ) )
    diff = [ ( native, pos, new ) for pos, ( native, new ) in zipped if native != new ]

    # make mutations 
    wt = [r for r in screed.open( '3jyo.fasta' )][0]
    t = wt.sequence[ int(sstart) - 1 : int(send) ]
    codons = [ t[i:i+3] for i in range( 0, len(t), 3 ) ] 
    label = [ ]
    params = { } 
    
    for d in diff:
        codons[ d[1] ] = ecoli[ d[2] ]
        label.append( '{}{}{}'.format( d[0], d[1] + 1, d[2] ) )
        
    #params['diff'] = diff 
    params['sequence'] = ''.join( codons )
    params['label'] = '+'.join( label ) 
    
    cut_up = sub( r'([atcg]{15})[atcg]{0,}([atcg]{15})', r'\1,\2', params['sequence'] )
    oligos =  [ reverse_complement( oligo ) for oligo in cut_up.split( ',' )[1:-1] ]
    params['oligos'] = oligos
    
    mutants.append( params ) 

In [78]:
with open( '3jyo_order.csv', 'w' ) as ts_csv:
    ts_csv.write( 'mutant_label,oligo_label,sequence,scale,purification\n' )
    for mutant in mutants:
        for oligo in mutant['oligos']:
            if len( oligo ) < 60:
                ts_csv.write( '{0},{1},{1},25nm,standard\n'.format( mutant['label'], oligo ) )
            else:
                raise Exception()                
                #halfway = len( oligo ) // 2 
                #oligo1, oligo2 = oligo[:halfway], oligo[halfway:]
                #if len( oligo1 ) < 60 and len( oligo2 ) < 60: 
                #    ts_csv.write( '{0},{1},{1},25nm,standard\n'.format( mutant['label'], oligo1 ) )
                #    ts_csv.write( '{0},{1},{1},25nm,standard\n'.format( mutant['label'], oligo2 ) )
                #else:
                #    raise Exception( 'Split this oligo but it\'s still too long' )

In [79]:
!cat 3jyo_order.csv

mutant_label,oligo_label,sequence,scale,purification
T18Y+T68Q+K72M+D109N+Q257S,ttcatgcattgccggATAacggctcagatccag,ttcatgcattgccggATAacggctcagatccag,25nm,standard
T18Y+T68Q+K72M+D109N+Q257S,cggcagaactgcctgCATatacggatgCTGaatgttcagaccatt,cggcagaactgcctgCATatacggatgCTGaatgttcagaccatt,25nm,standard
T18Y+T68Q+K72M+D109N+Q257S,accaaaaccgctcacGTTggtattatgacctgt,accaaaaccgctcacGTTggtattatgacctgt,25nm,standard
T18Y+T68Q+K72M+D109N+Q257S,aaaggcatcaacggcGCTatgaattgccatacg,aaaggcatcaacggcGCTatgaattgccatacg,25nm,standard
T18Y+D109H+Q257S,ttcatgcattgccggATAacggctcagatccag,ttcatgcattgccggATAacggctcagatccag,25nm,standard
T18Y+D109H+Q257S,accaaaaccgctcacATGggtattatgacctgt,accaaaaccgctcacATGggtattatgacctgt,25nm,standard
T18Y+D109H+Q257S,aaaggcatcaacggcGCTatgaattgccatacg,aaaggcatcaacggcGCTatgaattgccatacg,25nm,standard
T18Y+N93A+D109H+Q257S,ttcatgcattgccggATAacggctcagatccag,ttcatgcattgccggATAacggctcagatccag,25nm,standard
T18Y+N93A+D109H+Q257S,atcaataacaacggtCGCaactgcacccagctg,atcaataacaacggtCGCaac

In [89]:
print set( [ i['label'] for i in mutants ] ) 
print [i['label'] for i in mutants ]

set(['T18Y+D109H+A140Q+Q257S', 'T18Y+N93G+Q257S', 'T18Y+T68Q+K72M+N93A+D109N+Q257S', 'T18Y+K72S+D109W+G136E+Q257S', 'T18Y+D109H+V137N+A140Q+Q257S', 'T18Y+T68Q+D109N+Q257S', 'T18Y+T94R+Q257S', 'L8Y+T18Y+D109W+Q257S', 'T18Y+T68Q+N93G+D109N+Q257S', 'T18Y+D109W+G136D+Q257S', 'L8F+T18Y+D109W+Q257S', 'T18Y+T68Q+N93A+D109N+Q257S', 'T18Y+N93A+D109H+Q257S', 'T18Y+Q257S', 'T18Y+D109W+Q257S', 'T18Y+N93A+D109N+Q257S', 'T18Y+D109H+Q257S', 'T18Y+T68Q+K72M+D109N+Q257S', 'T18Y+N66A+T94R+Q257S'])
['T18Y+T68Q+K72M+D109N+Q257S', 'T18Y+D109H+Q257S', 'T18Y+N93A+D109H+Q257S', 'T18Y+Q257S', 'T18Y+T94R+Q257S', 'T18Y+D109H+V137N+A140Q+Q257S', 'T18Y+N66A+T94R+Q257S', 'T18Y+T68Q+D109N+Q257S', 'T18Y+T68Q+N93A+D109N+Q257S', 'T18Y+D109H+A140Q+Q257S', 'T18Y+N93G+Q257S', 'T18Y+T68Q+K72M+N93A+D109N+Q257S', 'T18Y+N93A+D109N+Q257S', 'T18Y+D109W+Q257S', 'T18Y+T68Q+N93G+D109N+Q257S', 'T18Y+K72S+D109W+G136E+Q257S', 'T18Y+D109W+G136D+Q257S', 'L8Y+T18Y+D109W+Q257S', 'L8F+T18Y+D109W+Q257S']
