-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #34 from Zsailer/cluster
Refactored Classifier module and allow for custom sklearn classifiers
- Loading branch information
Showing
12 changed files
with
440 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
from .logistic import EpistasisLogisticRegression | ||
from .gmm import EpistasisGaussianMixture |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import numpy as np | ||
import pandas as pd | ||
|
||
# Scikit-learn classifiers | ||
from sklearn.preprocessing import binarize | ||
|
||
from epistasis.mapping import EpistasisMap | ||
from epistasis.models.base import BaseModel, use_sklearn | ||
from epistasis.models.utils import (XMatrixException, arghandler) | ||
|
||
from epistasis.models.linear import EpistasisLinearRegression | ||
|
||
from gpmap import GenotypePhenotypeMap | ||
|
||
|
||
class EpistasisClassifierMixin: | ||
"""A Mixin class for epistasis classifiers | ||
""" | ||
def _fit_additive(self, X=None, y=None): | ||
# Construct an additive model. | ||
self.Additive = EpistasisLinearRegression( | ||
order=1, model_type=self.model_type) | ||
|
||
self.Additive.add_gpm(self.gpm) | ||
|
||
# Prepare a high-order model | ||
self.Additive.epistasis = EpistasisMap( | ||
sites=self.Additive.Xcolumns, | ||
order=self.Additive.order, | ||
model_type=self.Additive.model_type | ||
) | ||
|
||
# Fit the additive model and infer additive phenotypes | ||
self.Additive.fit(X=X, y=y) | ||
return self | ||
|
||
def _fit_classifier(self, X=None, y=None): | ||
# This method builds x and y from data. | ||
add_coefs = self.Additive.epistasis.values | ||
add_X = self.Additive._X(data=X) | ||
|
||
# Project X into padd space. | ||
X = add_X * add_coefs | ||
|
||
# Label X. | ||
y = binarize(y.reshape(1, -1), self.threshold)[0] | ||
self.classes = y | ||
|
||
# Fit classifier. | ||
super().fit(X=X, y=y) | ||
return self | ||
|
||
def fit_transform(self, X=None, y=None, **kwargs): | ||
self.fit(X=X, y=y, **kwargs) | ||
ypred = self.predict(X=X) | ||
|
||
# Transform map. | ||
gpm = GenotypePhenotypeMap.read_dataframe( | ||
dataframe=self.gpm.data[ypred==1], | ||
wildtype=self.gpm.wildtype, | ||
mutations=self.gpm.mutations | ||
) | ||
return gpm | ||
|
||
def predict(self, X=None): | ||
Xadd = self.Additive._X(data=X) | ||
X = Xadd * self.Additive.epistasis.values | ||
return super().predict(X=X) | ||
|
||
def predict_transform(self, X=None, y=None): | ||
x = self.predict(X=X) | ||
y[x <= 0.5] = self.threshold | ||
return y | ||
|
||
def predict_log_proba(self, X=None): | ||
Xadd = self.Additive._X(data=X) | ||
X = Xadd * self.Additive.epistasis.values | ||
return super().predict_log_proba(X) | ||
|
||
def predict_proba(self, X=None): | ||
Xadd = self.Additive._X(data=X) | ||
X = Xadd * self.Additive.epistasis.values | ||
return super().predict_proba(X=X) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import numpy as np | ||
import pandas as pd | ||
|
||
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis | ||
from sklearn.preprocessing import binarize | ||
|
||
from epistasis.mapping import EpistasisMap | ||
from epistasis.models.base import BaseModel, use_sklearn | ||
from epistasis.models.utils import arghandler | ||
from epistasis.models.linear import EpistasisLinearRegression | ||
|
||
# Use if inheriting from a scikit-learn class | ||
@use_sklearn(QuadraticDiscriminantAnalysis) | ||
class EpistasisQuadraticDA(BaseModel): | ||
"""testing quadratic | ||
""" | ||
def __init__(self, order=1, threshold=5, model_type='global', **kwargs): | ||
self.model_type = model_type | ||
self.order = 1 | ||
self.Xbuilt = {} | ||
self.threshold=threshold | ||
|
||
super(self.__class__, self).__init__(**kwargs) | ||
|
||
# Store model specs. | ||
self.model_specs = dict( | ||
priors=None, | ||
threshold=threshold, | ||
model_type=self.model_type, | ||
**kwargs) | ||
|
||
# Set up additive linear model for pre-classifying | ||
self.Additive = EpistasisLinearRegression( | ||
order=1, model_type=self.model_type) | ||
|
||
@property | ||
def num_of_params(self): | ||
pass | ||
|
||
def hypothesis(self, X=None, thetas=None): | ||
pass | ||
|
||
def hypothesis_transform(self, X=None, y=None, thetas=None): | ||
pass | ||
|
||
def lnlike_of_data( | ||
self, | ||
X=None, | ||
y=None, | ||
yerr=None, | ||
thetas=None): | ||
pass | ||
|
||
def lnlike_transform( | ||
self, | ||
X=None, | ||
y=None, | ||
yerr=None, | ||
lnprior=None, | ||
thetas=None): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import numpy as np | ||
import pandas as pd | ||
|
||
from sklearn.gaussian_process import GaussianProcessClassifier | ||
from sklearn.preprocessing import binarize | ||
|
||
from epistasis.mapping import EpistasisMap | ||
from epistasis.models.base import BaseModel, use_sklearn | ||
from epistasis.models.utils import arghandler | ||
from epistasis.models.linear import EpistasisLinearRegression | ||
|
||
from .base import EpistasisClassifierMixin | ||
|
||
# Use if inheriting from a scikit-learn class | ||
@use_sklearn(GaussianProcessClassifier) | ||
class EpistasisGaussianProcess(EpistasisClassifierMixin, BaseModel): | ||
"""testing quadratic | ||
""" | ||
def __init__(self, order=1, threshold=5, model_type='global', **kwargs): | ||
self.model_type = model_type | ||
self.order = 1 | ||
self.Xbuilt = {} | ||
self.threshold=threshold | ||
|
||
super(self.__class__, self).__init__(**kwargs) | ||
|
||
# Store model specs. | ||
self.model_specs = dict( | ||
priors=None, | ||
threshold=threshold, | ||
model_type=self.model_type, | ||
**kwargs) | ||
|
||
# Set up additive linear model for pre-classifying | ||
self.Additive = EpistasisLinearRegression( | ||
order=1, model_type=self.model_type) | ||
|
||
@property | ||
def num_of_params(self): | ||
n = 0 | ||
n += self.epistasis.n | ||
return n | ||
|
||
@arghandler | ||
def fit(self, X=None, y=None, **kwargs): | ||
# Use Additive model to establish the phenotypic scale. | ||
# Prepare Additive model | ||
self._fit_additive(X=X, y=y) | ||
self._fit_classifier(X=X, y=y) | ||
return self | ||
|
||
def hypothesis(self, X=None, thetas=None): | ||
pass | ||
|
||
def hypothesis_transform(self, X=None, y=None, thetas=None): | ||
pass | ||
|
||
def lnlike_of_data( | ||
self, | ||
X=None, | ||
y=None, | ||
yerr=None, | ||
thetas=None): | ||
|
||
pass | ||
|
||
def lnlike_transform( | ||
self, | ||
X=None, | ||
y=None, | ||
yerr=None, | ||
lnprior=None, | ||
thetas=None): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import numpy as np | ||
import pandas as pd | ||
|
||
# Scikit-learn classifiers | ||
from sklearn.mixture import GaussianMixture | ||
from sklearn.preprocessing import binarize | ||
|
||
from epistasis.mapping import EpistasisMap | ||
from epistasis.models.base import BaseModel, use_sklearn | ||
from epistasis.models.utils import (XMatrixException, arghandler) | ||
|
||
from epistasis.models.linear import EpistasisLinearRegression | ||
|
||
from gpmap import GenotypePhenotypeMap | ||
|
||
from .base import EpistasisClassifierMixin | ||
|
||
@use_sklearn(GaussianMixture) | ||
class EpistasisGaussianMixture(EpistasisClassifierMixin, BaseModel): | ||
"""Logistic regression for estimating epistatic interactions that lead to | ||
nonviable phenotypes. Useful for predicting viable/nonviable phenotypes. | ||
Parameters | ||
---------- | ||
order : int | ||
order of epistasis model | ||
model_type : str (default="global") | ||
type of model matrix to use. "global" defines epistasis with respect to | ||
a background-averaged "genotype-phenotype". "local" defines epistasis | ||
with respect to the wildtype genotype. | ||
""" | ||
def __init__( | ||
self, | ||
n_components=1, | ||
model_type="global", | ||
**kwargs): | ||
|
||
super(self.__class__, self).__init__(n_components=n_components, **kwargs) | ||
self.model_type = model_type | ||
self.order = 1 | ||
self.Xbuilt = {} | ||
|
||
# Store model specs. | ||
self.model_specs = dict( | ||
model_type=self.model_type, | ||
**kwargs) | ||
|
||
@arghandler | ||
def lnlike_of_data(self, X=None, y=None, yerr=None, thetas=None): | ||
pass | ||
|
||
@arghandler | ||
def lnlike_transform( | ||
self, | ||
X=None, | ||
y=None, | ||
yerr=None, | ||
lnprior=None, | ||
thetas=None): | ||
pass | ||
|
||
@arghandler | ||
def hypothesis(self, X=None, thetas=None): | ||
pass | ||
|
||
def hypothesis_transform(self, X=None, y=None, thetas=None): | ||
pass | ||
|
||
@property | ||
def thetas(self): | ||
pass |
Oops, something went wrong.