In [1]:
import os
import pandas as pd
import sys
import os
from subprocess import call

#%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.gridspec as gridspec

from IPython.display import display, HTML
import numpy as np

import random
from scipy.stats import ttest_1samp
from sklearn.externals import joblib
from matplotlib.lines import Line2D

random.seed(42)
np.random.seed(42)

from scipy.stats import spearmanr, pearsonr

In [2]:
ssm2 = pd.read_pickle("../../../data/pieces_new/rocklin_ssm2__full.pkl")

# First manual run: hyap65

In [3]:
hyap = ssm2[ssm2.phenotype_name.str.contains("hYAP65")]

In [4]:
hyap.shape

(829, 26)

In [5]:
def id_mutations(wt, mut_seq):
    # identify mutations in same length wt and seq2
    n_mut = 0
    for i,letter in enumerate(list(mut_seq)):
        if letter != wt[i]:
            n_mut+=1
    return n_mut

def return_mutations(wt, mut_seq):
    # identify mutations in same length wt and seq2
    muts = []
    for i,letter in enumerate(list(mut_seq)):
        if letter != wt[i]:
            return f"{wt[i]}{i+1}{mut_seq[i]}"
    raise

In [6]:
hyap.sequence.map(len).value_counts()

46    829
Name: sequence, dtype: int64

In [7]:
hyap.sequence.map(len).value_counts().index[0]

46

In [8]:
"".join([hyap.sequence.map(lambda x: x[i]).value_counts().index[0] for i in range(46)])

'FEIPDDVPLPAGWEMAKTSSGQRYFKNHIDQTTTWQDPRKAMLSQM'

In [9]:
hyap.sequence.map(lambda mut_seq: id_mutations('FEIPDDVPLPAGWEMAKTSSGQRYFKNHIDQTTTWQDPRKAMLSQM', mut_seq)).value_counts()

1    828
0      1
Name: sequence, dtype: int64

In [83]:
for prot in ssm2.phenotype_name.unique():
    print(prot)
    prot_subset = ssm2[ssm2.phenotype_name == prot]
    wt = "".join([prot_subset.sequence.map(lambda x: x[i]).value_counts().index[0] for i in range(len(prot_subset.sequence.iloc[0]))])
    print(prot_subset.sequence.map(lambda mut_seq: id_mutations(wt, mut_seq)).value_counts())

EEHEE_rd3_0037.pdb_ssm2_stability
1    774
0      1
Name: sequence, dtype: int64
EEHEE_rd3_1498.pdb_ssm2_stability
1    774
0      1
Name: sequence, dtype: int64
EEHEE_rd3_1702.pdb_ssm2_stability
1    774
0      1
Name: sequence, dtype: int64
EEHEE_rd3_1716.pdb_ssm2_stability
1    774
0      1
Name: sequence, dtype: int64
EHEE_0882.pdb_ssm2_stability
1    720
0      1
Name: sequence, dtype: int64
EHEE_rd2_0005.pdb_ssm2_stability
1    720
0      1
Name: sequence, dtype: int64
EHEE_rd3_0015.pdb_ssm2_stability
1    720
0      1
Name: sequence, dtype: int64
HEEH_rd2_0779.pdb_ssm2_stability
1    774
0      1
Name: sequence, dtype: int64
HEEH_rd3_0223.pdb_ssm2_stability
1    774
0      1
Name: sequence, dtype: int64
HEEH_rd3_0726.pdb_ssm2_stability
1    774
0      1
Name: sequence, dtype: int64
HEEH_rd3_0872.pdb_ssm2_stability
1    774
0      1
Name: sequence, dtype: int64
HHH_0142.pdb_ssm2_stability
1    774
0      1
Name: sequence, dtype: int64
HHH_rd2_0134.pdb_ssm2_stability
1    774
0   

In [10]:
hyap_muts = hyap[hyap.sequence != "FEIPDDVPLPAGWEMAKTSSGQRYFKNHIDQTTTWQDPRKAMLSQM"]

In [11]:
return_mutations("FEIPDDVPLPAGWEMAKTSSGQRYFKNHIDQTTTWQDPRKAMLSQM", "FEIPADVPLPAGWEMAKTSSGQRYFKNHIDQTTTWQDPRKAMLSQM")

'D5A'

the following is printing the mutations in the format required by the online version of STRUM tool

In [12]:
for x in hyap_muts.sequence.map(lambda mut_seq: return_mutations('FEIPDDVPLPAGWEMAKTSSGQRYFKNHIDQTTTWQDPRKAMLSQM', mut_seq)):
    print(f"{x};")

A11D;
A11E;
A11F;
A11G;
A11H;
A11I;
A11K;
A11L;
A11M;
A11N;
A11Q;
A11S;
A11T;
A11V;
A11W;
A11Y;
A16D;
A16E;
A16F;
A16G;
A16H;
A16I;
A16L;
A16M;
A16N;
A16P;
A16Q;
A16S;
A16V;
A16W;
A16Y;
A41D;
A41E;
A41F;
A41G;
A41I;
A41L;
A41M;
A41Q;
A41R;
A41S;
A41T;
A41V;
A41W;
D30E;
D30F;
D30H;
D30I;
D30K;
D30L;
D30N;
D30P;
D30Q;
D30T;
D30V;
D30W;
D30Y;
D37A;
D37E;
D37F;
D37G;
D37H;
D37K;
D37L;
D37N;
D37P;
D37Q;
D37R;
D37S;
D37T;
D37W;
D37Y;
D5A;
D5E;
D5F;
D5G;
D5H;
D5I;
D5K;
D5L;
D5N;
D5P;
D5Q;
D5R;
D5S;
D5T;
D5V;
D5W;
D6A;
D6F;
D6G;
D6H;
D6L;
D6M;
D6N;
D6P;
D6Q;
D6S;
D6T;
D6V;
D6W;
D6Y;
E14A;
E14F;
E14G;
E14H;
E14I;
E14K;
E14L;
E14M;
E14P;
E14Q;
E14S;
E14V;
E14W;
E2D;
E2F;
E2G;
E2I;
E2L;
E2M;
E2N;
E2P;
E2R;
E2T;
E2V;
E2Y;
F1A;
F1D;
F1H;
F1K;
F1L;
F1N;
F1Q;
F1R;
F1S;
F1T;
F1V;
F1W;
F1Y;
F25A;
F25D;
F25E;
F25G;
F25H;
F25K;
F25M;
F25N;
F25Q;
F25R;
F25S;
F25T;
F25V;
F25W;
F25Y;
G12A;
G12E;
G12F;
G12I;
G12K;
G12L;
G12M;
G12N;
G12P;
G12T;
G12V;
G12W;
G12Y;
G21A;
G21D;
G21F;
G21H;
G21K;
G21L;
G21M;
G21N;

In [13]:
hyap.loc[hyap_muts.index, 'mutation'] = hyap_muts.sequence.map(lambda mut_seq: return_mutations('FEIPDDVPLPAGWEMAKTSSGQRYFKNHIDQTTTWQDPRKAMLSQM', mut_seq))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [14]:
from io import StringIO

These are the results from the STRUM online tool

In [68]:
strum_res = pd.read_table(StringIO("""
name	chain	pos	wild type	mutant	ddG
SM104353	A	11	A	D	0.6
SM104353	A	11	A	E	0.54
SM104353	A	11	A	F	-0.07
SM104353	A	11	A	G	0.17
SM104353	A	11	A	H	0.78
SM104353	A	11	A	I	0.09
SM104353	A	11	A	K	0.58
SM104353	A	11	A	L	0.02
SM104353	A	11	A	M	0.09
SM104353	A	11	A	N	0.27
SM104353	A	11	A	Q	0.33
SM104353	A	11	A	S	0.62
SM104353	A	11	A	T	0.39
SM104353	A	11	A	V	0.4
SM104353	A	11	A	W	0.18
SM104353	A	11	A	Y	0.07
SM104353	A	16	A	D	0.34
SM104353	A	16	A	E	0.22
SM104353	A	16	A	F	0.07
SM104353	A	16	A	G	0.01
SM104353	A	16	A	H	0.78
SM104353	A	16	A	I	0.27
SM104353	A	16	A	L	-0.03
SM104353	A	16	A	M	0.04
SM104353	A	16	A	N	0.59
SM104353	A	16	A	P	0.22
SM104353	A	16	A	Q	0.25
SM104353	A	16	A	S	0.59
SM104353	A	16	A	V	0.46
SM104353	A	16	A	W	0.38
SM104353	A	16	A	Y	-0.07
SM104353	A	41	A	D	0.83
SM104353	A	41	A	E	0.8
SM104353	A	41	A	F	0.58
SM104353	A	41	A	G	0.05
SM104353	A	41	A	I	0.67
SM104353	A	41	A	L	0.68
SM104353	A	41	A	M	0.78
SM104353	A	41	A	Q	0.67
SM104353	A	41	A	R	0.91
SM104353	A	41	A	S	0.89
SM104353	A	41	A	T	0.75
SM104353	A	41	A	V	0.85
SM104353	A	41	A	W	0.88
SM104353	A	30	D	E	-0.33
SM104353	A	30	D	F	-1.06
SM104353	A	30	D	H	-0.55
SM104353	A	30	D	I	-1.07
SM104353	A	30	D	K	-0.6
SM104353	A	30	D	L	-0.91
SM104353	A	30	D	N	-0.21
SM104353	A	30	D	P	-2.03
SM104353	A	30	D	Q	-0.21
SM104353	A	30	D	T	-0.41
SM104353	A	30	D	V	-0.55
SM104353	A	30	D	W	-0.71
SM104353	A	30	D	Y	-0.4
SM104353	A	37	D	A	-0.83
SM104353	A	37	D	E	0.08
SM104353	A	37	D	F	-0.95
SM104353	A	37	D	G	-0.9
SM104353	A	37	D	H	-0.63
SM104353	A	37	D	K	-0.76
SM104353	A	37	D	L	-0.68
SM104353	A	37	D	N	0.01
SM104353	A	37	D	P	-0.77
SM104353	A	37	D	Q	0.32
SM104353	A	37	D	R	-0.31
SM104353	A	37	D	S	-0.47
SM104353	A	37	D	T	0.07
SM104353	A	37	D	W	-0.69
SM104353	A	37	D	Y	-0.99
SM104353	A	5	D	A	-0.27
SM104353	A	5	D	E	0.36
SM104353	A	5	D	F	0.04
SM104353	A	5	D	G	-0.19
SM104353	A	5	D	H	0.4
SM104353	A	5	D	I	-0.05
SM104353	A	5	D	K	0.04
SM104353	A	5	D	L	-0.28
SM104353	A	5	D	N	-0.02
SM104353	A	5	D	P	-1.2
SM104353	A	5	D	Q	0.14
SM104353	A	5	D	R	0.3
SM104353	A	5	D	S	0.22
SM104353	A	5	D	T	-0.07
SM104353	A	5	D	V	-0.1
SM104353	A	5	D	W	0.26
SM104353	A	6	D	A	0.18
SM104353	A	6	D	F	0.48
SM104353	A	6	D	G	-0.03
SM104353	A	6	D	H	0.46
SM104353	A	6	D	L	0.13
SM104353	A	6	D	M	0.14
SM104353	A	6	D	N	0.21
SM104353	A	6	D	P	-0.54
SM104353	A	6	D	Q	0.18
SM104353	A	6	D	S	0.32
SM104353	A	6	D	T	0.5
SM104353	A	6	D	V	0.21
SM104353	A	6	D	W	0.3
SM104353	A	6	D	Y	0.66
SM104353	A	14	E	A	0.09
SM104353	A	14	E	F	0.23
SM104353	A	14	E	G	-0.1
SM104353	A	14	E	H	0.52
SM104353	A	14	E	I	0.07
SM104353	A	14	E	K	0.37
SM104353	A	14	E	L	0.1
SM104353	A	14	E	M	0.12
SM104353	A	14	E	P	-0.46
SM104353	A	14	E	Q	0.28
SM104353	A	14	E	S	0.33
SM104353	A	14	E	V	0.1
SM104353	A	14	E	W	0.51
SM104353	A	2	E	D	0.31
SM104353	A	2	E	F	0.42
SM104353	A	2	E	G	-0.21
SM104353	A	2	E	I	0.51
SM104353	A	2	E	L	0.6
SM104353	A	2	E	M	0.43
SM104353	A	2	E	N	0.33
SM104353	A	2	E	P	-0.43
SM104353	A	2	E	R	0.7
SM104353	A	2	E	T	0.3
SM104353	A	2	E	V	0.19
SM104353	A	2	E	Y	0.47
SM104353	A	1	F	A	-0.23
SM104353	A	1	F	D	0.07
SM104353	A	1	F	H	-0.14
SM104353	A	1	F	K	-0.12
SM104353	A	1	F	L	-1.0
SM104353	A	1	F	N	-0.13
SM104353	A	1	F	Q	0.0
SM104353	A	1	F	R	0.64
SM104353	A	1	F	S	0.22
SM104353	A	1	F	T	-0.05
SM104353	A	1	F	V	-0.25
SM104353	A	1	F	W	-0.27
SM104353	A	1	F	Y	0.04
SM104353	A	25	F	A	-3.05
SM104353	A	25	F	D	-1.52
SM104353	A	25	F	E	-1.25
SM104353	A	25	F	G	-3.59
SM104353	A	25	F	H	-1.73
SM104353	A	25	F	K	-1.24
SM104353	A	25	F	M	-2.48
SM104353	A	25	F	N	-1.46
SM104353	A	25	F	Q	-1.12
SM104353	A	25	F	R	-0.77
SM104353	A	25	F	S	-2.62
SM104353	A	25	F	T	-2.32
SM104353	A	25	F	V	-3.32
SM104353	A	25	F	W	-1.53
SM104353	A	25	F	Y	-1.53
SM104353	A	12	G	A	-0.84
SM104353	A	12	G	E	-0.52
SM104353	A	12	G	F	-1.42
SM104353	A	12	G	I	-1.72
SM104353	A	12	G	K	-0.83
SM104353	A	12	G	L	-2.26
SM104353	A	12	G	M	-1.31
SM104353	A	12	G	N	-0.69
SM104353	A	12	G	P	-1.81
SM104353	A	12	G	T	-1.33
SM104353	A	12	G	V	-2.04
SM104353	A	12	G	W	-0.98
SM104353	A	12	G	Y	-0.92
SM104353	A	21	G	A	-0.5
SM104353	A	21	G	D	-0.21
SM104353	A	21	G	F	-0.69
SM104353	A	21	G	H	0.26
SM104353	A	21	G	K	-0.05
SM104353	A	21	G	L	-1.06
SM104353	A	21	G	M	-0.61
SM104353	A	21	G	N	-0.26
SM104353	A	21	G	Q	-0.05
SM104353	A	21	G	R	-0.24
SM104353	A	21	G	S	-0.47
SM104353	A	21	G	T	-0.59
SM104353	A	21	G	V	-1.32
SM104353	A	21	G	W	-0.22
SM104353	A	21	G	Y	-0.2
SM104353	A	28	H	A	0.35
SM104353	A	28	H	D	1.31
SM104353	A	28	H	E	1.54
SM104353	A	28	H	F	1.04
SM104353	A	28	H	K	1.97
SM104353	A	28	H	M	0.55
SM104353	A	28	H	N	1.33
SM104353	A	28	H	P	0.39
SM104353	A	28	H	Q	1.01
SM104353	A	28	H	R	1.1
SM104353	A	28	H	S	0.68
SM104353	A	28	H	W	1.42
SM104353	A	28	H	Y	1.15
SM104353	A	29	I	A	-0.59
SM104353	A	29	I	D	0.16
SM104353	A	29	I	E	0.02
SM104353	A	29	I	F	-0.73
SM104353	A	29	I	G	-1.05
SM104353	A	29	I	K	-0.13
SM104353	A	29	I	L	-0.64
SM104353	A	29	I	M	-0.67
SM104353	A	29	I	N	0.01
SM104353	A	29	I	Q	-0.0
SM104353	A	29	I	T	0.01
SM104353	A	29	I	V	-0.46
SM104353	A	3	I	A	-0.46
SM104353	A	3	I	D	0.31
SM104353	A	3	I	E	0.24
SM104353	A	3	I	F	-0.33
SM104353	A	3	I	G	-0.73
SM104353	A	3	I	K	-0.26
SM104353	A	3	I	L	-0.41
SM104353	A	3	I	M	-0.26
SM104353	A	3	I	N	-0.16
SM104353	A	3	I	P	-0.16
SM104353	A	3	I	R	-0.02
SM104353	A	3	I	S	-0.27
SM104353	A	3	I	T	0.09
SM104353	A	3	I	V	-0.48
SM104353	A	3	I	W	-0.44
SM104353	A	3	I	Y	-0.37
SM104353	A	17	K	A	0.17
SM104353	A	17	K	D	0.41
SM104353	A	17	K	E	0.78
SM104353	A	17	K	F	0.53
SM104353	A	17	K	H	0.66
SM104353	A	17	K	I	0.66
SM104353	A	17	K	L	0.57
SM104353	A	17	K	M	0.51
SM104353	A	17	K	N	0.52
SM104353	A	17	K	P	-0.43
SM104353	A	17	K	Q	0.63
SM104353	A	17	K	R	0.68
SM104353	A	17	K	T	0.34
SM104353	A	17	K	V	0.39
SM104353	A	17	K	W	0.65
SM104353	A	26	K	D	0.59
SM104353	A	26	K	E	0.79
SM104353	A	26	K	F	0.66
SM104353	A	26	K	G	-0.08
SM104353	A	26	K	H	0.73
SM104353	A	26	K	I	0.62
SM104353	A	26	K	L	0.62
SM104353	A	26	K	P	-0.44
SM104353	A	26	K	Q	0.55
SM104353	A	26	K	S	0.28
SM104353	A	26	K	T	0.66
SM104353	A	26	K	V	0.49
SM104353	A	26	K	W	0.41
SM104353	A	26	K	Y	0.43
SM104353	A	40	K	A	-0.17
SM104353	A	40	K	D	0.39
SM104353	A	40	K	E	0.31
SM104353	A	40	K	F	0.3
SM104353	A	40	K	G	-0.07
SM104353	A	40	K	I	0.18
SM104353	A	40	K	L	0.04
SM104353	A	40	K	M	0.13
SM104353	A	40	K	N	0.23
SM104353	A	40	K	P	-0.48
SM104353	A	40	K	Q	0.28
SM104353	A	40	K	R	0.45
SM104353	A	40	K	S	0.38
SM104353	A	40	K	T	0.12
SM104353	A	40	K	V	0.27
SM104353	A	40	K	W	0.42
SM104353	A	40	K	Y	0.16
SM104353	A	43	L	A	-0.27
SM104353	A	43	L	D	0.32
SM104353	A	43	L	E	0.16
SM104353	A	43	L	F	-0.57
SM104353	A	43	L	G	-0.63
SM104353	A	43	L	H	0.28
SM104353	A	43	L	I	-0.57
SM104353	A	43	L	K	0.01
SM104353	A	43	L	M	-0.2
SM104353	A	43	L	Q	0.03
SM104353	A	43	L	R	-0.19
SM104353	A	43	L	S	0.01
SM104353	A	43	L	T	0.01
SM104353	A	43	L	V	-0.42
SM104353	A	43	L	W	-0.16
SM104353	A	9	L	A	-1.29
SM104353	A	9	L	D	-0.92
SM104353	A	9	L	E	-0.91
SM104353	A	9	L	F	-1.96
SM104353	A	9	L	K	-1.12
SM104353	A	9	L	M	-1.66
SM104353	A	9	L	N	-0.89
SM104353	A	9	L	P	-1.63
SM104353	A	9	L	Q	-1.06
SM104353	A	9	L	R	-1.51
SM104353	A	9	L	S	-1.1
SM104353	A	9	L	T	-1.18
SM104353	A	9	L	V	-1.67
SM104353	A	9	L	W	-1.38
SM104353	A	9	L	Y	-1.29
SM104353	A	15	M	A	0.02
SM104353	A	15	M	D	0.91
SM104353	A	15	M	E	0.57
SM104353	A	15	M	F	0.34
SM104353	A	15	M	H	0.49
SM104353	A	15	M	I	0.7
SM104353	A	15	M	K	1.06
SM104353	A	15	M	N	0.09
SM104353	A	15	M	P	0.41
SM104353	A	15	M	Q	0.44
SM104353	A	15	M	R	0.97
SM104353	A	15	M	S	0.33
SM104353	A	15	M	T	0.38
SM104353	A	15	M	Y	0.28
SM104353	A	42	M	A	-0.42
SM104353	A	42	M	D	0.68
SM104353	A	42	M	F	0.15
SM104353	A	42	M	H	0.35
SM104353	A	42	M	I	0.11
SM104353	A	42	M	L	0.04
SM104353	A	42	M	N	0.08
SM104353	A	42	M	P	-0.32
SM104353	A	42	M	Q	0.03
SM104353	A	42	M	R	0.13
SM104353	A	42	M	S	-0.21
SM104353	A	42	M	T	0.12
SM104353	A	42	M	Y	0.14
SM104353	A	46	M	A	-0.06
SM104353	A	46	M	D	0.08
SM104353	A	46	M	E	-0.1
SM104353	A	46	M	F	-0.01
SM104353	A	46	M	I	0.14
SM104353	A	46	M	K	0.32
SM104353	A	46	M	L	0.13
SM104353	A	46	M	N	0.19
SM104353	A	46	M	P	-0.18
SM104353	A	46	M	Q	0.05
SM104353	A	46	M	R	0.99
SM104353	A	46	M	S	0.22
SM104353	A	46	M	V	-0.05
SM104353	A	46	M	W	-0.05
SM104353	A	46	M	Y	-0.08
SM104353	A	27	N	D	-0.38
SM104353	A	27	N	E	-0.68
SM104353	A	27	N	G	-1.97
SM104353	A	27	N	H	-0.78
SM104353	A	27	N	I	-1.88
SM104353	A	27	N	L	-1.49
SM104353	A	27	N	P	-2.07
SM104353	A	27	N	Q	-0.61
SM104353	A	27	N	R	-1.03
SM104353	A	27	N	S	-1.34
SM104353	A	27	N	T	-0.78
SM104353	A	27	N	V	-1.73
SM104353	A	27	N	W	-0.99
SM104353	A	27	N	Y	-0.96
SM104353	A	10	P	D	-0.91
SM104353	A	10	P	F	-2.11
SM104353	A	10	P	I	-2.01
SM104353	A	10	P	L	-2.02
SM104353	A	10	P	Q	-1.8
SM104353	A	10	P	R	-1.84
SM104353	A	10	P	S	-1.28
SM104353	A	10	P	T	-1.47
SM104353	A	10	P	V	-1.68
SM104353	A	10	P	W	-2.09
SM104353	A	10	P	Y	-1.91
SM104353	A	38	P	D	-0.71
SM104353	A	38	P	G	-1.27
SM104353	A	38	P	H	-1.03
SM104353	A	38	P	I	-2.09
SM104353	A	38	P	K	-1.61
SM104353	A	38	P	L	-1.89
SM104353	A	38	P	M	-2.17
SM104353	A	38	P	Q	-1.13
SM104353	A	38	P	R	-1.63
SM104353	A	38	P	S	-1.1
SM104353	A	38	P	Y	-1.23
SM104353	A	4	P	A	0.44
SM104353	A	4	P	E	0.92
SM104353	A	4	P	F	0.99
SM104353	A	4	P	G	0.35
SM104353	A	4	P	H	1.02
SM104353	A	4	P	I	0.61
SM104353	A	4	P	K	0.66
SM104353	A	4	P	L	0.34
SM104353	A	4	P	M	0.61
SM104353	A	4	P	N	0.55
SM104353	A	4	P	Q	0.48
SM104353	A	4	P	R	0.64
SM104353	A	4	P	S	0.93
SM104353	A	4	P	T	0.64
SM104353	A	4	P	V	0.51
SM104353	A	4	P	W	1.29
SM104353	A	4	P	Y	0.78
SM104353	A	8	P	A	0.61
SM104353	A	8	P	E	1.15
SM104353	A	8	P	F	0.53
SM104353	A	8	P	G	0.34
SM104353	A	8	P	I	0.74
SM104353	A	8	P	K	0.7
SM104353	A	8	P	L	0.57
SM104353	A	8	P	M	0.3
SM104353	A	8	P	N	0.58
SM104353	A	8	P	Q	0.74
SM104353	A	8	P	R	0.62
SM104353	A	8	P	S	1.04
SM104353	A	8	P	V	0.81
SM104353	A	8	P	W	0.49
SM104353	A	22	Q	G	-0.4
SM104353	A	22	Q	K	0.32
SM104353	A	22	Q	L	-0.22
SM104353	A	22	Q	N	0.34
SM104353	A	22	Q	R	0.19
SM104353	A	22	Q	S	-0.72
SM104353	A	22	Q	T	-0.07
SM104353	A	22	Q	V	-0.2
SM104353	A	22	Q	W	0.12
SM104353	A	22	Q	Y	0.19
SM104353	A	31	Q	A	0.02
SM104353	A	31	Q	D	0.89
SM104353	A	31	Q	E	0.81
SM104353	A	31	Q	F	0.25
SM104353	A	31	Q	G	-0.15
SM104353	A	31	Q	H	0.82
SM104353	A	31	Q	I	0.1
SM104353	A	31	Q	K	0.8
SM104353	A	31	Q	L	0.17
SM104353	A	31	Q	M	0.11
SM104353	A	31	Q	N	0.33
SM104353	A	31	Q	T	0.16
SM104353	A	31	Q	V	0.13
SM104353	A	31	Q	W	0.2
SM104353	A	31	Q	Y	0.33
SM104353	A	36	Q	D	0.77
SM104353	A	36	Q	E	0.71
SM104353	A	36	Q	F	0.15
SM104353	A	36	Q	I	0.03
SM104353	A	36	Q	L	0.05
SM104353	A	36	Q	R	0.36
SM104353	A	36	Q	S	-0.11
SM104353	A	36	Q	T	0.21
SM104353	A	36	Q	V	0.17
SM104353	A	36	Q	Y	0.4
SM104353	A	45	Q	A	0.27
SM104353	A	45	Q	D	0.93
SM104353	A	45	Q	E	0.81
SM104353	A	45	Q	F	0.29
SM104353	A	45	Q	G	0.41
SM104353	A	45	Q	H	0.48
SM104353	A	45	Q	I	0.29
SM104353	A	45	Q	L	0.25
SM104353	A	45	Q	N	0.42
SM104353	A	45	Q	P	-0.12
SM104353	A	45	Q	R	0.57
SM104353	A	45	Q	S	0.65
SM104353	A	45	Q	T	0.6
SM104353	A	45	Q	V	0.47
SM104353	A	45	Q	W	0.66
SM104353	A	45	Q	Y	0.36
SM104353	A	23	R	A	-1.04
SM104353	A	23	R	D	0.06
SM104353	A	23	R	E	0.32
SM104353	A	23	R	G	-0.59
SM104353	A	23	R	H	-0.18
SM104353	A	23	R	K	-0.33
SM104353	A	23	R	M	-0.28
SM104353	A	23	R	P	-1.15
SM104353	A	23	R	S	-0.41
SM104353	A	23	R	T	-0.49
SM104353	A	23	R	V	-0.47
SM104353	A	23	R	W	-0.15
SM104353	A	23	R	Y	-0.05
SM104353	A	39	R	A	-0.14
SM104353	A	39	R	E	0.31
SM104353	A	39	R	G	0.1
SM104353	A	39	R	K	0.15
SM104353	A	39	R	N	-0.07
SM104353	A	39	R	P	-0.26
SM104353	A	39	R	Q	0.34
SM104353	A	39	R	T	0.08
SM104353	A	39	R	V	-0.06
SM104353	A	39	R	W	-0.01
SM104353	A	39	R	Y	0.35
SM104353	A	19	S	E	0.92
SM104353	A	19	S	F	0.16
SM104353	A	19	S	G	0.36
SM104353	A	19	S	H	1.09
SM104353	A	19	S	I	0.61
SM104353	A	19	S	K	0.82
SM104353	A	19	S	L	0.68
SM104353	A	19	S	M	0.2
SM104353	A	19	S	N	0.69
SM104353	A	19	S	P	-0.17
SM104353	A	19	S	R	0.8
SM104353	A	19	S	T	0.72
SM104353	A	19	S	W	0.61
SM104353	A	19	S	Y	0.42
SM104353	A	20	S	A	0.08
SM104353	A	20	S	D	0.81
SM104353	A	20	S	F	0.34
SM104353	A	20	S	G	0.24
SM104353	A	20	S	H	0.83
SM104353	A	20	S	I	0.51
SM104353	A	20	S	M	0.43
SM104353	A	20	S	N	0.62
SM104353	A	20	S	P	-0.61
SM104353	A	20	S	Q	0.74
SM104353	A	20	S	R	0.93
SM104353	A	20	S	T	0.43
SM104353	A	20	S	V	0.58
SM104353	A	20	S	Y	0.59
SM104353	A	44	S	A	0.05
SM104353	A	44	S	D	0.62
SM104353	A	44	S	F	0.23
SM104353	A	44	S	G	-0.06
SM104353	A	44	S	H	0.7
SM104353	A	44	S	K	0.67
SM104353	A	44	S	L	0.25
SM104353	A	44	S	P	-0.45
SM104353	A	44	S	Q	0.45
SM104353	A	44	S	R	0.94
SM104353	A	44	S	T	0.46
SM104353	A	44	S	W	0.66
SM104353	A	18	T	A	-0.55
SM104353	A	18	T	D	-0.09
SM104353	A	18	T	E	-0.15
SM104353	A	18	T	F	-1.09
SM104353	A	18	T	G	-0.21
SM104353	A	18	T	H	-0.43
SM104353	A	18	T	I	-0.69
SM104353	A	18	T	K	-0.49
SM104353	A	18	T	L	-0.83
SM104353	A	18	T	M	-1.25
SM104353	A	18	T	N	-0.25
SM104353	A	18	T	P	-1.36
SM104353	A	18	T	Q	-0.49
SM104353	A	18	T	R	-0.44
SM104353	A	18	T	S	-0.13
SM104353	A	18	T	V	-0.72
SM104353	A	18	T	W	-0.61
SM104353	A	32	T	A	0.31
SM104353	A	32	T	D	1.09
SM104353	A	32	T	E	0.82
SM104353	A	32	T	G	0.22
SM104353	A	32	T	H	0.76
SM104353	A	32	T	I	0.52
SM104353	A	32	T	K	0.19
SM104353	A	32	T	M	0.22
SM104353	A	32	T	N	0.51
SM104353	A	32	T	Q	0.48
SM104353	A	32	T	R	0.65
SM104353	A	32	T	S	0.56
SM104353	A	32	T	V	0.52
SM104353	A	32	T	W	0.58
SM104353	A	32	T	Y	0.8
SM104353	A	33	T	A	-0.18
SM104353	A	33	T	F	-0.74
SM104353	A	33	T	G	0.07
SM104353	A	33	T	I	-0.41
SM104353	A	33	T	K	-0.43
SM104353	A	33	T	L	-0.49
SM104353	A	33	T	M	-0.95
SM104353	A	33	T	N	-0.1
SM104353	A	33	T	Q	-0.21
SM104353	A	33	T	S	-0.06
SM104353	A	33	T	W	-0.0
SM104353	A	33	T	Y	-0.15
SM104353	A	34	T	A	-0.84
SM104353	A	34	T	D	0.47
SM104353	A	34	T	E	0.22
SM104353	A	34	T	F	-0.64
SM104353	A	34	T	G	-0.49
SM104353	A	34	T	H	-0.11
SM104353	A	34	T	I	-0.52
SM104353	A	34	T	K	-0.03
SM104353	A	34	T	L	-0.45
SM104353	A	34	T	N	0.17
SM104353	A	34	T	Q	-0.04
SM104353	A	34	T	R	-0.16
SM104353	A	34	T	S	-0.01
SM104353	A	34	T	V	-0.14
SM104353	A	34	T	W	-0.01
SM104353	A	34	T	Y	-0.3
SM104353	A	7	V	A	0.08
SM104353	A	7	V	D	0.64
SM104353	A	7	V	E	0.52
SM104353	A	7	V	F	0.11
SM104353	A	7	V	G	-0.56
SM104353	A	7	V	H	0.74
SM104353	A	7	V	I	0.05
SM104353	A	7	V	K	0.08
SM104353	A	7	V	L	-0.27
SM104353	A	7	V	M	-0.03
SM104353	A	7	V	N	0.07
SM104353	A	7	V	P	-0.2
SM104353	A	7	V	Q	0.16
SM104353	A	7	V	R	0.06
SM104353	A	7	V	S	0.5
SM104353	A	7	V	T	0.39
SM104353	A	7	V	W	0.25
SM104353	A	7	V	Y	-0.03
SM104353	A	13	W	D	-0.79
SM104353	A	13	W	E	-0.74
SM104353	A	13	W	F	-1.97
SM104353	A	13	W	G	-2.78
SM104353	A	13	W	I	-2.42
SM104353	A	13	W	L	-2.57
SM104353	A	13	W	M	-2.46
SM104353	A	13	W	N	-0.84
SM104353	A	13	W	P	-1.74
SM104353	A	13	W	Q	-0.91
SM104353	A	13	W	R	-1.27
SM104353	A	13	W	S	-1.76
SM104353	A	13	W	V	-1.92
SM104353	A	13	W	Y	-1.66
SM104353	A	35	W	A	-1.89
SM104353	A	35	W	D	-0.68
SM104353	A	35	W	E	-0.66
SM104353	A	35	W	F	-0.99
SM104353	A	35	W	G	-2.12
SM104353	A	35	W	H	-0.75
SM104353	A	35	W	I	-1.65
SM104353	A	35	W	K	-0.8
SM104353	A	35	W	M	-1.7
SM104353	A	35	W	N	-0.96
SM104353	A	35	W	P	-1.24
SM104353	A	35	W	R	-0.25
SM104353	A	35	W	S	-1.72
SM104353	A	35	W	T	-1.16
SM104353	A	35	W	V	-1.54
SM104353	A	35	W	Y	-0.8
SM104353	A	24	Y	A	-2.3
SM104353	A	24	Y	D	-1.65
SM104353	A	24	Y	E	-1.04
SM104353	A	24	Y	F	-1.17
SM104353	A	24	Y	H	-0.84
SM104353	A	24	Y	I	-2.34
SM104353	A	24	Y	K	-0.87
SM104353	A	24	Y	L	-2.53
SM104353	A	24	Y	M	-1.88
SM104353	A	24	Y	Q	-1.01
SM104353	A	24	Y	R	-1.08
SM104353	A	24	Y	S	-1.54
SM104353	A	24	Y	T	-1.26
SM104353	A	24	Y	V	-2.53
SM104353	A	24	Y	W	-0.95
SM104353	A	11	A	P	0.11
SM104353	A	11	A	R	0.43
SM104353	A	16	A	K	0.09
SM104353	A	16	A	R	0.1
SM104353	A	41	A	N	0.82
SM104353	A	41	A	P	0.18
SM104353	A	30	D	A	-0.88
SM104353	A	30	D	S	-0.43
SM104353	A	37	D	I	-0.78
SM104353	A	5	D	Y	0.34
SM104353	A	6	D	K	0.32
SM104353	A	14	E	N	0.26
SM104353	A	14	E	R	0.17
SM104353	A	14	E	Y	0.46
SM104353	A	2	E	H	0.65
SM104353	A	2	E	K	0.63
SM104353	A	2	E	W	0.2
SM104353	A	1	F	G	-0.88
SM104353	A	1	F	I	-0.67
SM104353	A	1	F	M	-0.54
SM104353	A	1	F	P	-0.64
SM104353	A	12	G	H	-0.66
SM104353	A	21	G	P	-0.85
SM104353	A	28	H	T	1.0
SM104353	A	29	I	P	-0.92
SM104353	A	29	I	S	-0.14
SM104353	A	29	I	W	-0.64
SM104353	A	29	I	Y	-0.75
SM104353	A	3	I	Q	-0.0
SM104353	A	17	K	G	0.25
SM104353	A	17	K	S	0.19
SM104353	A	17	K	Y	0.87
SM104353	A	26	K	A	-0.23
SM104353	A	26	K	M	0.49
SM104353	A	40	K	H	0.38
SM104353	A	43	L	N	0.14
SM104353	A	43	L	Y	-0.39
SM104353	A	9	L	I	-1.54
SM104353	A	15	M	G	-1.0
SM104353	A	15	M	V	0.41
SM104353	A	15	M	W	0.56
SM104353	A	42	M	E	0.34
SM104353	A	42	M	V	0.12
SM104353	A	46	M	G	-0.73
SM104353	A	46	M	T	0.06
SM104353	A	27	N	K	-0.56
SM104353	A	27	N	M	-1.68
SM104353	A	10	P	A	-1.54
SM104353	A	10	P	E	-1.23
SM104353	A	10	P	G	-1.01
SM104353	A	10	P	M	-2.24
SM104353	A	38	P	F	-1.92
SM104353	A	38	P	N	-0.31
SM104353	A	38	P	T	-1.42
SM104353	A	38	P	V	-1.91
SM104353	A	8	P	D	1.09
SM104353	A	8	P	H	0.96
SM104353	A	22	Q	D	0.49
SM104353	A	22	Q	I	-0.25
SM104353	A	22	Q	M	-0.1
SM104353	A	22	Q	P	-0.88
SM104353	A	36	Q	G	-0.59
SM104353	A	36	Q	H	0.51
SM104353	A	36	Q	K	0.4
SM104353	A	36	Q	M	-0.12
SM104353	A	36	Q	W	0.64
SM104353	A	23	R	F	-0.28
SM104353	A	23	R	I	-0.06
SM104353	A	23	R	L	-0.3
SM104353	A	23	R	N	-0.54
SM104353	A	39	R	D	0.33
SM104353	A	39	R	F	-0.07
SM104353	A	39	R	L	0.13
SM104353	A	39	R	S	0.04
SM104353	A	19	S	A	0.38
SM104353	A	19	S	Q	0.67
SM104353	A	20	S	E	0.94
SM104353	A	20	S	K	0.48
SM104353	A	20	S	W	0.46
SM104353	A	44	S	V	0.48
SM104353	A	33	T	D	0.28
SM104353	A	33	T	H	0.02
SM104353	A	33	T	P	-1.09
SM104353	A	33	T	V	-0.19
SM104353	A	34	T	M	-0.78
SM104353	A	34	T	P	-1.17
SM104353	A	13	W	H	-1.23
SM104353	A	13	W	T	-1.42
SM104353	A	35	W	L	-1.86
SM104353	A	35	W	Q	-1.0
SM104353	A	24	Y	N	-1.0
SM104353	A	16	A	T	0.5
SM104353	A	41	A	H	1.34
SM104353	A	41	A	K	0.78
SM104353	A	41	A	Y	0.85
SM104353	A	30	D	G	-0.85
SM104353	A	30	D	M	-0.81
SM104353	A	30	D	R	-0.56
SM104353	A	37	D	M	-0.79
SM104353	A	37	D	V	-0.18
SM104353	A	5	D	M	-0.16
SM104353	A	6	D	E	0.37
SM104353	A	6	D	I	0.08
SM104353	A	6	D	R	0.61
SM104353	A	14	E	D	0.2
SM104353	A	14	E	T	0.31
SM104353	A	2	E	A	-0.19
SM104353	A	2	E	Q	0.68
SM104353	A	2	E	S	0.23
SM104353	A	1	F	E	-0.09
SM104353	A	25	F	I	-2.97
SM104353	A	25	F	L	-2.98
SM104353	A	25	F	P	-2.73
SM104353	A	12	G	D	-0.71
SM104353	A	12	G	Q	-0.52
SM104353	A	12	G	R	-0.82
SM104353	A	12	G	S	-0.89
SM104353	A	21	G	E	-0.09
SM104353	A	21	G	I	-1.11
SM104353	A	28	H	G	0.85
SM104353	A	28	H	I	0.88
SM104353	A	28	H	L	1.0
SM104353	A	28	H	V	0.82
SM104353	A	29	I	H	-0.04
SM104353	A	29	I	R	0.12
SM104353	A	3	I	H	0.19
SM104353	A	26	K	N	0.37
SM104353	A	26	K	R	0.2
SM104353	A	43	L	P	-0.57
SM104353	A	9	L	G	-1.9
SM104353	A	9	L	H	-1.14
SM104353	A	15	M	L	0.31
SM104353	A	42	M	G	-0.93
SM104353	A	42	M	K	0.02
SM104353	A	42	M	W	0.22
SM104353	A	46	M	H	0.18
SM104353	A	27	N	A	-1.96
SM104353	A	27	N	F	-1.8
SM104353	A	10	P	H	-1.57
SM104353	A	10	P	K	-1.35
SM104353	A	10	P	N	-1.4
SM104353	A	38	P	A	-1.55
SM104353	A	38	P	E	-1.26
SM104353	A	38	P	W	-1.85
SM104353	A	4	P	D	1.27
SM104353	A	8	P	T	0.92
SM104353	A	8	P	Y	0.47
SM104353	A	22	Q	A	-0.99
SM104353	A	22	Q	E	0.59
SM104353	A	22	Q	F	-0.23
SM104353	A	22	Q	H	0.24
SM104353	A	31	Q	P	-0.5
SM104353	A	31	Q	R	0.01
SM104353	A	31	Q	S	0.13
SM104353	A	36	Q	A	-0.16
SM104353	A	36	Q	N	0.16
SM104353	A	36	Q	P	-0.78
SM104353	A	45	Q	K	0.64
SM104353	A	45	Q	M	0.35
SM104353	A	23	R	Q	0.17
SM104353	A	39	R	H	0.45
SM104353	A	39	R	I	0.13
SM104353	A	39	R	M	-0.22
SM104353	A	19	S	D	0.74
SM104353	A	19	S	V	0.74
SM104353	A	20	S	L	0.49
SM104353	A	44	S	E	1.04
SM104353	A	44	S	I	0.1
SM104353	A	44	S	M	-0.1
SM104353	A	44	S	N	0.46
SM104353	A	44	S	Y	0.52
SM104353	A	18	T	Y	-0.66
SM104353	A	32	T	F	0.75
SM104353	A	32	T	L	0.58
SM104353	A	32	T	P	-0.26
SM104353	A	33	T	E	0.23
SM104353	A	33	T	R	-0.12
SM104353	A	13	W	A	-2.24
SM104353	A	13	W	K	-1.53
SM104353	A	24	Y	G	-3.01
SM104353	A	24	Y	P	-2.18
"""))

In [16]:
hyap_muts.loc[:,'strum_pred'] = strum_res.iloc[:,-1].values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [70]:
test = hyap_muts[hyap_muts.is_test == True]

In [71]:
unirep_predictions = np.load(
    "../../../data/predictions/rocklin_ssm2__hYAP65_ssm2_stability__all_1900__test__predictions.npy"
)

In [72]:
from scipy.stats import spearmanr

In [73]:
spearmanr(unirep_predictions, test['phenotype'])

SpearmanrResult(correlation=0.77816664574823047, pvalue=1.8124749860174226e-19)

In [74]:
spearmanr(test['strum_pred'],
          test['phenotype'])

SpearmanrResult(correlation=0.54410113851426845, pvalue=2.9779526806418641e-08)

In [75]:
def compute_std(predictions, real_values):

    return pd.Series([
        spearmanr(
            predictions[intersec_ids_sample],
            real_values[intersec_ids_sample])[0] for intersec_ids_sample in [
                np.random.choice(
                            np.arange(real_values.shape[0]), size = np.int(real_values.shape[0]/2), replace=False) for i in range(30)
                    ] 
    ]).std()

In [76]:
from scipy.stats import ttest_ind_from_stats

ttest_ind_from_stats(mean1=spearmanr(unirep_predictions, test['phenotype'].values)[0], 
                     std1=compute_std(unirep_predictions, test['phenotype'].values), 
                     nobs1=30,
                     mean2=spearmanr(test['strum_pred'].values,test['phenotype'].values)[0], 
                     std2 =compute_std(test['strum_pred'].values,test['phenotype'].values), 
                     nobs2=30)

Ttest_indResult(statistic=12.566628206941687, pvalue=3.3773164324017766e-18)

# Running villin and pin1

In [22]:
def get_wt_and_print_test_set_mutations(phen_name):
    hyap = ssm2[ssm2.phenotype_name.str.contains(phen_name)]
    length = hyap.sequence.map(len).value_counts().index[0]
    wt = "".join([hyap.sequence.map(lambda x: x[i]).value_counts().index[0] for i in range(length)])
    print(wt)
    hyap_muts = hyap[hyap.sequence != wt]
    
    test = hyap_muts[hyap_muts.is_test == True]
    
    for x in test.sequence.map(lambda mut_seq: return_mutations(wt, mut_seq)):
        print(f"{x};")

In [23]:
return_mutations("FEIPDDVPLPAGWEMAKTSSGQRYFKNHIDQTTTWQDPRKAMLSQM", "FEIPDDVPLPAGWEMAKTSSGQRPFKNHIDQTTTWQDPRKAMLSQM")

'Y24P'

In [24]:
ssm2.phenotype_name.value_counts()

hYAP65_ssm2_stability                829
HHH_rd3_0138.pdb_ssm2_stability      775
HEEH_rd3_0872.pdb_ssm2_stability     775
HHH_0142.pdb_ssm2_stability          775
HHH_rd2_0134.pdb_ssm2_stability      775
EEHEE_rd3_1716.pdb_ssm2_stability    775
EEHEE_rd3_1702.pdb_ssm2_stability    775
EEHEE_rd3_0037.pdb_ssm2_stability    775
HEEH_rd3_0726.pdb_ssm2_stability     775
HEEH_rd3_0223.pdb_ssm2_stability     775
HEEH_rd2_0779.pdb_ssm2_stability     775
EEHEE_rd3_1498.pdb_ssm2_stability    775
EHEE_rd3_0015.pdb_ssm2_stability     721
EHEE_rd2_0005.pdb_ssm2_stability     721
EHEE_0882.pdb_ssm2_stability         721
Pin1_ssm2_stability                  703
villin_ssm2_stability                631
Name: phenotype_name, dtype: int64

### villin

In [25]:
get_wt_and_print_test_set_mutations('villin')

GSSGSLSDEDFKAVFGMTRSAFANLPLWKQQNLKKEKGLFGSS
A21P;
A21T;
A21W;
A23G;
A23H;
A23P;
A13F;
A13G;
A13S;
D8K;
D8Y;
D10F;
D10Q;
D10T;
E36P;
E9F;
F15H;
F22D;
F22H;
F22N;
F40I;
F40M;
F11L;
F11R;
G16D;
G16K;
G16Q;
G16W;
G38I;
G38P;
G38W;
K34M;
K34Y;
K35G;
K35M;
K35N;
K37M;
L6S;
L25A;
L25M;
L27A;
L27N;
L33H;
L33N;
L33P;
L33V;
L39P;
N24S;
P26G;
Q30W;
Q31A;
Q31S;
R19D;
R19Q;
R19T;
S20W;
S7T;
S7W;
S7Y;
V14M;
W28K;
W28S;


In [26]:
villin_res = pd.read_table(StringIO("""
Position	Wild-type	mutant type	ddG
21	A	P	-0.64
21	A	T	0.25
21	A	W	0.41
23	A	G	-0.02
23	A	H	0.78
23	A	P	-0.17
13	A	F	1.11
13	A	G	0.46
13	A	S	0.42
8	D	K	-0.17
8	D	Y	0.13
10	D	F	-0.71
10	D	Q	-0.53
10	D	T	-0.83
36	E	P	-0.54
9	E	F	0.6
15	F	H	-0.66
22	F	D	-2.49
22	F	H	-2.51
22	F	N	-1.69
40	F	I	-0.85
40	F	M	-0.35
11	F	L	-2.53
11	F	R	-1.48
16	G	D	0.48
16	G	K	0.22
16	G	Q	0.03
16	G	W	-0.26
38	G	I	0.03
38	G	P	-0.96
38	G	W	0.18
34	K	M	-0.23
34	K	Y	-0.71
35	K	G	0.19
35	K	M	0.71
35	K	N	0.58
37	K	M	0.94
6	L	S	-0.82
25	L	A	-1.53
25	L	M	-0.25
27	L	A	0.78
27	L	N	0.85
33	L	H	-0.47
33	L	N	-0.09
33	L	P	-1.23
33	L	V	-0.89
39	L	P	-1.53
24	N	S	0.22
26	P	G	-0.15
30	Q	W	-0.0
31	Q	A	-1.31
31	Q	S	-0.51
19	R	D	0.21
19	R	Q	0.12
19	R	T	-0.69
20	S	W	0.56
7	S	T	-0.06
7	S	W	0.13
7	S	Y	0.58
14	V	M	0.16
28	W	K	-1.02
28	W	S	-1.02
"""))

In [27]:
unirep_villin_predictions = np.load(
    "../../../data/predictions/rocklin_ssm2__villin_ssm2_stability__all_1900__test__predictions.npy"
)

In [28]:
assert unirep_villin_predictions.shape[0] == villin_res.shape[0]
assert unirep_villin_predictions.shape[0] == ssm2[ssm2.phenotype_name.str.contains("villin") & ssm2.is_test == True].shape[0]

In [29]:
spearmanr(unirep_villin_predictions, ssm2[ssm2.phenotype_name.str.contains("villin") & ssm2.is_test == True]['phenotype'])

SpearmanrResult(correlation=0.86006097003238446, pvalue=3.4520319226038328e-19)

In [30]:
spearmanr(villin_res.iloc[:,-1].values, ssm2[ssm2.phenotype_name.str.contains("villin") & ssm2.is_test == True]['phenotype'])

SpearmanrResult(correlation=0.33641069692568021, pvalue=0.0075086758720693744)

In [77]:
from scipy.stats import ttest_ind_from_stats

ttest_ind_from_stats(mean1=spearmanr(unirep_villin_predictions, ssm2[ssm2.phenotype_name.str.contains("villin") & ssm2.is_test == True]['phenotype'].values)[0], 
                     std1=compute_std(unirep_villin_predictions, ssm2[ssm2.phenotype_name.str.contains("villin") & ssm2.is_test == True]['phenotype'].values), 
                     nobs1=30,
                     mean2=spearmanr(villin_res.iloc[:,-1].values, ssm2[ssm2.phenotype_name.str.contains("villin") & ssm2.is_test == True]['phenotype'].values)[0], 
                     std2 =compute_std(villin_res.iloc[:,-1].values, ssm2[ssm2.phenotype_name.str.contains("villin") & ssm2.is_test == True]['phenotype'].values), 
                     nobs2=30)

Ttest_indResult(statistic=27.363408372114442, pvalue=7.5488751216756445e-35)

### pin1

In [31]:
get_wt_and_print_test_set_mutations('Pin1')

GMADEEKLPPGWEKRMSRSSGRVYYFNHITNASQWERPSGGSS
A3I;
A3K;
A3L;
A32L;
E13H;
E5D;
E6L;
E6W;
F26L;
F26R;
G11H;
G11N;
G11T;
G11V;
G21L;
G21N;
G21T;
G40I;
H28F;
H28I;
I29N;
K14L;
K14S;
L8A;
L8H;
M16Q;
M16Y;
M2D;
M2P;
M2R;
N27D;
N27G;
N27M;
N31G;
N31Q;
P38L;
P38S;
P9G;
P9R;
P9S;
P10R;
P10W;
R15H;
R15L;
R15W;
R18V;
R18Y;
R22S;
R22Y;
R37I;
S19W;
S20Q;
S33H;
S33I;
S33P;
S39F;
S39R;
V23Q;
W12R;
W35I;
W35Q;
Y24S;
Y25S;
Y25T;


In [32]:
pin1_res = pd.read_table(StringIO("""
Position	Wild-type	mutant type	ddG
3	A	I	0.43
3	A	K	0.5
3	A	L	0.42
32	A	L	0.73
13	E	H	-0.03
5	E	D	0.28
6	E	L	0.07
6	E	W	0.29
26	F	L	-2.0
26	F	R	-0.16
11	G	H	-0.73
11	G	N	-1.05
11	G	T	-1.72
11	G	V	-2.0
21	G	L	-1.08
21	G	N	-0.53
21	G	T	-0.88
40	G	I	0.22
28	H	F	0.12
28	H	I	0.12
29	I	N	0.1
14	K	L	0.29
14	K	S	0.66
8	L	A	-1.19
8	L	H	-0.88
16	M	Q	0.35
16	M	Y	-0.11
2	M	D	0.92
2	M	P	0.19
2	M	R	0.82
27	N	D	-1.15
27	N	G	-3.41
27	N	M	-2.36
31	N	G	-0.76
31	N	Q	0.26
38	P	L	-2.79
38	P	S	-1.66
9	P	G	-0.83
9	P	R	-1.82
9	P	S	-1.11
10	P	R	0.67
10	P	W	0.8
15	R	H	0.39
15	R	L	-0.36
15	R	W	0.33
18	R	V	0.14
18	R	Y	0.2
22	R	S	-0.56
22	R	Y	0.01
37	R	I	-0.68
19	S	W	0.21
20	S	Q	0.52
33	S	H	-0.55
33	S	I	-1.1
33	S	P	-0.5
39	S	F	-1.02
39	S	R	-0.73
23	V	Q	-0.73
12	W	R	-2.08
35	W	I	-1.61
35	W	Q	-1.29
24	Y	S	-2.32
25	Y	S	-2.41
25	Y	T	-1.99
"""))

In [33]:
unirep_pin1_predictions = np.load(
    "../../../data/predictions/rocklin_ssm2__Pin1_ssm2_stability__all_1900__test__predictions.npy"
)

In [34]:
assert unirep_pin1_predictions.shape[0] == pin1_res.shape[0]
assert unirep_pin1_predictions.shape[0] == ssm2[ssm2.phenotype_name.str.contains("Pin1") & ssm2.is_test == True].shape[0]

In [35]:
spearmanr(unirep_pin1_predictions, ssm2[ssm2.phenotype_name.str.contains("Pin1") & ssm2.is_test == True]['phenotype'])

SpearmanrResult(correlation=0.88640252625610894, pvalue=2.0683514770384785e-22)

In [36]:
spearmanr(pin1_res.iloc[:,-1].values, ssm2[ssm2.phenotype_name.str.contains("Pin1") & ssm2.is_test == True]['phenotype'])

SpearmanrResult(correlation=0.55022968834866826, pvalue=2.4773304343718899e-06)

In [78]:
from scipy.stats import ttest_ind_from_stats

ttest_ind_from_stats(mean1=spearmanr(unirep_pin1_predictions, ssm2[ssm2.phenotype_name.str.contains("Pin1") & ssm2.is_test == True]['phenotype'].values)[0], 
                     std1=compute_std(unirep_pin1_predictions, ssm2[ssm2.phenotype_name.str.contains("Pin1") & ssm2.is_test == True]['phenotype'].values), 
                     nobs1=30,
                     mean2=spearmanr(pin1_res.iloc[:,-1].values, ssm2[ssm2.phenotype_name.str.contains("Pin1") & ssm2.is_test == True]['phenotype'].values)[0], 
                     std2 =compute_std(pin1_res.iloc[:,-1].values, ssm2[ssm2.phenotype_name.str.contains("Pin1") & ssm2.is_test == True]['phenotype'].values), 
                     nobs2=30)

Ttest_indResult(statistic=17.02766655116848, pvalue=3.1063423771591725e-24)