Skip to content

Commit

Permalink
add split_gpm function
Browse files Browse the repository at this point in the history
  • Loading branch information
Zsailer committed Apr 24, 2018
1 parent 984de89 commit 030cac4
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 1 deletion.
2 changes: 1 addition & 1 deletion epistasis/pyplot/nonlinear.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def plot_scale(
yadd = model.Additive.predict()

xx = np.linspace(min(yadd), max(yadd),20)
yy = model.function(xx, **params)
yy = model.minimizer.predict(xx)

elif yobs is None or yadd is None:
raise Exception("If not model is given, pobs and padd must be set.")
Expand Down
39 changes: 39 additions & 0 deletions epistasis/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from scipy.stats import norm
import scipy

from gpmap import GenotypePhenotypeMap

# -----------------------------------------------------------------------
# Correlation metrics
# -----------------------------------------------------------------------
Expand Down Expand Up @@ -53,6 +55,43 @@ def split_data(data, fraction=1.0):
return train_set, test_set


def split_gpm(gpm, fraction=1.0):
"""Split GenotypePhenotypeMap into two sets, a training and a test set.
Parameters
----------
data : pandas.DataFrame
full dataset to split.
fraction : float
fraction in training set.
Returns
-------
train_gpm : GenotypePhenotypeMap
training set.
test_gpm : GenotypePhenotypeMap
test set.
"""
train, test = split_data(gpm.data, fraction=fraction)

train_gpm = GenotypePhenotypeMap.read_dataframe(
train,
wildtype=gpm.wildtype,
mutations=gpm.mutations
)

test_gpm = GenotypePhenotypeMap.read_dataframe(
test,
wildtype=gpm.wildtype,
mutations=gpm.mutations
)

return train_gpm, test_gpm



def gmean(x):
"""Calculate a geometric mean with zero and negative values.
Expand Down

0 comments on commit 030cac4

Please sign in to comment.