In [1]:
import sys 

import numpy as np 
import seaborn as sns
import pandas

import pyrosetta
from rosetta.protocols import simple_moves, residue_selectors
from rosetta.core import select, chemical 

from Bio.Data import IUPACData

In [2]:
params = '-beta -extra_res_fa example_input/pnpg.params'
pyrosetta.init(params)

Found rosetta database at: /Users/alex/anaconda3/envs/rose/lib/python3.5/site-packages/pyrosetta-4.0-py3.5.egg/database; using it....
PyRosetta-4 2016 [Rosetta 2016 unknown:94998f0e5b038b6d8083995a664cb96641cb844b 2017-05-11 15:10:16 -0500] retrieved from: git@github.com:RosettaCommons/main.git
(C) Copyright Rosetta Commons Member Institutions.
Created in JHU by Sergey Lyskov and PyRosetta Team.



In [3]:
! pwd 

/Users/alex/Documents/bglb_family/shallow_mutational_scan


In [4]:
# model all possible mutations for a given position 

In [5]:
pose = pyrosetta.pose_from_file('example_input/native.pdb')
score = pyrosetta.create_score_function('beta') 
wild_type_energy = score(pose)
print('Wild type energy:', wild_type_energy) 

Wild type energy: -1447.3390722756833


In [6]:
def get_pose_energies(pose, score_function):
    energies = pose.energies()
    score_types = score_function.get_nonzero_weighted_scoretypes()
    for pos in range(1, pose.total_residue()+1):
        for st in score_types:
            value = energies.residue_total_energies(pos)[st]
            yield pos, str(st).split('.')[1], value

In [7]:
muts = pandas.read_csv('../data_sets/speculative_targets.csv')['mutant_name']

In [8]:
%%time

def deep_mutational_scan(pose, score_function):
    '''
    input: pose, score function 
    output: pandas DataFrame with per-residue energy features
    '''
    
    length = pose.total_residue()
    for n in range(163, 167): #length+1):
        native = pose.sequence()[n-1]
        for olc, tlc in IUPACData.protein_letters_1to3.items():
            tlc = tlc.upper()
            copy_pose = pose.clone()
            protocol = [
                simple_moves.MutateResidue(n, tlc)
            ]
            for item in protocol: 
                item.apply(copy_pose)
            name = '{}{}{}'.format(native, n, olc)
            for pos, st, value in get_pose_energies(copy_pose, score):
                yield name, pos, st, value 

CPU times: user 4 µs, sys: 1 µs, total: 5 µs
Wall time: 6.91 µs


In [9]:
columns = ['mutant', 'position', 'score_term', 'value']
df = pandas.DataFrame(deep_mutational_scan(pose, score), columns=columns)

In [10]:
df.head()

Unnamed: 0,mutant,position,score_term,value
0,N163W,1,fa_atr,-4.888219
1,N163W,1,fa_rep,0.859524
2,N163W,1,fa_sol,2.6747
3,N163W,1,fa_intra_atr_xover4,-0.667158
4,N163W,1,fa_intra_rep_xover4,1.019409


In [28]:
g = df.groupby('mutant')

for idx, gdf in g:
    print(idx)
    print(gdf.shape)
    break

I165A
(11674, 4)


In [30]:
g.agg(lambda x: x.value)

TypeError: <lambda>() missing 1 required positional argument: 'y'

In [20]:
df.pivot(columns='score_term', values='value')

score_term,dslf_fa13,fa_atr,fa_dun_dev,fa_dun_rot,fa_dun_semi,fa_elec,fa_intra_atr_xover4,fa_intra_elec,fa_intra_rep_xover4,fa_intra_sol_xover4,...,hxl_tors,lk_ball,lk_ball_bridge,lk_ball_bridge_uncpl,lk_ball_iso,omega,p_aa_pp,pro_close,rama_prepro,ref
0,,-4.888219,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,-0.667158,,,,...,,,,,,,,,,
4,,,,,,,,,1.019409,,...,,,,,,,,,,
5,,,,,,,,,,0.254186,...,,,,,,,,,,
6,,,,,,,,,,,...,,1.470664,,,,,,,,
7,,,,,,,,,,,...,,,,,3.269581,,,,,
8,,,,,,,,,,,...,,,0.000000,,,,,,,
9,,,,,,,,,,,...,,,,0.000000,,,,,,


In [12]:
# df = pandas.DataFrame( data, columns=fmt.keys() )
# df.index = df.index + 1 
# df.shape

In [13]:
# seaborn.heatmap(df, cmap='viridis')

In [14]:
# df.mean( axis=0 ).sort_values()

In [15]:
# df.mean( axis=1 )

In [16]:
#df.to_csv( 'repack_mutated_residue_only.csv' ) 