Skip to content

Commit

Permalink
Merge pull request #34 from Zsailer/cluster
Browse files Browse the repository at this point in the history
Refactored Classifier module and allow for custom sklearn classifiers
  • Loading branch information
Zsailer committed May 17, 2018
2 parents 8c5387a + dffd9bf commit 69de872
Show file tree
Hide file tree
Showing 12 changed files with 440 additions and 73 deletions.
3 changes: 2 additions & 1 deletion epistasis/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@
from .nonlinear import (EpistasisNonlinearRegression,
EpistasisPowerTransform,
EpistasisSpline)
from .classifiers import EpistasisLogisticRegression
from .classifiers import (EpistasisLogisticRegression,
EpistasisGaussianMixture)
from .pipeline import EpistasisPipeline
9 changes: 6 additions & 3 deletions epistasis/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,19 @@ class SubclassException(Exception):
"""Subclass Exception for parent classes."""

def use_sklearn(sklearn_class):
"""Swap out base classes in an Epistasis model class with a sklearn_class +
AbstractModel.
"""Swap out last class in the inherited stack (Assuming its
the BaseModel) with the AbstractModel below. Then, sandwiches
the Sklearn class with all other base classes first, followed
by the Sklearn class and the AbstractModel.
"""
def mixer(cls):
# Meta program the class
bases = cls.__bases__[:-1]
name = cls.__name__
methods = dict(cls.__dict__)

# Put Sklearn first in line of parent classes
parents = (sklearn_class, AbstractModel)
parents = bases + (sklearn_class, AbstractModel)

# Rebuild class with Mixed in scikit learn.
cls = type(name, parents, methods)
Expand Down
2 changes: 2 additions & 0 deletions epistasis/models/classifiers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .logistic import EpistasisLogisticRegression
from .gmm import EpistasisGaussianMixture
83 changes: 83 additions & 0 deletions epistasis/models/classifiers/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import numpy as np
import pandas as pd

# Scikit-learn classifiers
from sklearn.preprocessing import binarize

from epistasis.mapping import EpistasisMap
from epistasis.models.base import BaseModel, use_sklearn
from epistasis.models.utils import (XMatrixException, arghandler)

from epistasis.models.linear import EpistasisLinearRegression

from gpmap import GenotypePhenotypeMap


class EpistasisClassifierMixin:
"""A Mixin class for epistasis classifiers
"""
def _fit_additive(self, X=None, y=None):
# Construct an additive model.
self.Additive = EpistasisLinearRegression(
order=1, model_type=self.model_type)

self.Additive.add_gpm(self.gpm)

# Prepare a high-order model
self.Additive.epistasis = EpistasisMap(
sites=self.Additive.Xcolumns,
order=self.Additive.order,
model_type=self.Additive.model_type
)

# Fit the additive model and infer additive phenotypes
self.Additive.fit(X=X, y=y)
return self

def _fit_classifier(self, X=None, y=None):
# This method builds x and y from data.
add_coefs = self.Additive.epistasis.values
add_X = self.Additive._X(data=X)

# Project X into padd space.
X = add_X * add_coefs

# Label X.
y = binarize(y.reshape(1, -1), self.threshold)[0]
self.classes = y

# Fit classifier.
super().fit(X=X, y=y)
return self

def fit_transform(self, X=None, y=None, **kwargs):
self.fit(X=X, y=y, **kwargs)
ypred = self.predict(X=X)

# Transform map.
gpm = GenotypePhenotypeMap.read_dataframe(
dataframe=self.gpm.data[ypred==1],
wildtype=self.gpm.wildtype,
mutations=self.gpm.mutations
)
return gpm

def predict(self, X=None):
Xadd = self.Additive._X(data=X)
X = Xadd * self.Additive.epistasis.values
return super().predict(X=X)

def predict_transform(self, X=None, y=None):
x = self.predict(X=X)
y[x <= 0.5] = self.threshold
return y

def predict_log_proba(self, X=None):
Xadd = self.Additive._X(data=X)
X = Xadd * self.Additive.epistasis.values
return super().predict_log_proba(X)

def predict_proba(self, X=None):
Xadd = self.Additive._X(data=X)
X = Xadd * self.Additive.epistasis.values
return super().predict_proba(X=X)
61 changes: 61 additions & 0 deletions epistasis/models/classifiers/discriminant_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import numpy as np
import pandas as pd

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.preprocessing import binarize

from epistasis.mapping import EpistasisMap
from epistasis.models.base import BaseModel, use_sklearn
from epistasis.models.utils import arghandler
from epistasis.models.linear import EpistasisLinearRegression

# Use if inheriting from a scikit-learn class
@use_sklearn(QuadraticDiscriminantAnalysis)
class EpistasisQuadraticDA(BaseModel):
"""testing quadratic
"""
def __init__(self, order=1, threshold=5, model_type='global', **kwargs):
self.model_type = model_type
self.order = 1
self.Xbuilt = {}
self.threshold=threshold

super(self.__class__, self).__init__(**kwargs)

# Store model specs.
self.model_specs = dict(
priors=None,
threshold=threshold,
model_type=self.model_type,
**kwargs)

# Set up additive linear model for pre-classifying
self.Additive = EpistasisLinearRegression(
order=1, model_type=self.model_type)

@property
def num_of_params(self):
pass

def hypothesis(self, X=None, thetas=None):
pass

def hypothesis_transform(self, X=None, y=None, thetas=None):
pass

def lnlike_of_data(
self,
X=None,
y=None,
yerr=None,
thetas=None):
pass

def lnlike_transform(
self,
X=None,
y=None,
yerr=None,
lnprior=None,
thetas=None):
pass
74 changes: 74 additions & 0 deletions epistasis/models/classifiers/gaussian_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import numpy as np
import pandas as pd

from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.preprocessing import binarize

from epistasis.mapping import EpistasisMap
from epistasis.models.base import BaseModel, use_sklearn
from epistasis.models.utils import arghandler
from epistasis.models.linear import EpistasisLinearRegression

from .base import EpistasisClassifierMixin

# Use if inheriting from a scikit-learn class
@use_sklearn(GaussianProcessClassifier)
class EpistasisGaussianProcess(EpistasisClassifierMixin, BaseModel):
"""testing quadratic
"""
def __init__(self, order=1, threshold=5, model_type='global', **kwargs):
self.model_type = model_type
self.order = 1
self.Xbuilt = {}
self.threshold=threshold

super(self.__class__, self).__init__(**kwargs)

# Store model specs.
self.model_specs = dict(
priors=None,
threshold=threshold,
model_type=self.model_type,
**kwargs)

# Set up additive linear model for pre-classifying
self.Additive = EpistasisLinearRegression(
order=1, model_type=self.model_type)

@property
def num_of_params(self):
n = 0
n += self.epistasis.n
return n

@arghandler
def fit(self, X=None, y=None, **kwargs):
# Use Additive model to establish the phenotypic scale.
# Prepare Additive model
self._fit_additive(X=X, y=y)
self._fit_classifier(X=X, y=y)
return self

def hypothesis(self, X=None, thetas=None):
pass

def hypothesis_transform(self, X=None, y=None, thetas=None):
pass

def lnlike_of_data(
self,
X=None,
y=None,
yerr=None,
thetas=None):

pass

def lnlike_transform(
self,
X=None,
y=None,
yerr=None,
lnprior=None,
thetas=None):
pass
72 changes: 72 additions & 0 deletions epistasis/models/classifiers/gmm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import numpy as np
import pandas as pd

# Scikit-learn classifiers
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import binarize

from epistasis.mapping import EpistasisMap
from epistasis.models.base import BaseModel, use_sklearn
from epistasis.models.utils import (XMatrixException, arghandler)

from epistasis.models.linear import EpistasisLinearRegression

from gpmap import GenotypePhenotypeMap

from .base import EpistasisClassifierMixin

@use_sklearn(GaussianMixture)
class EpistasisGaussianMixture(EpistasisClassifierMixin, BaseModel):
"""Logistic regression for estimating epistatic interactions that lead to
nonviable phenotypes. Useful for predicting viable/nonviable phenotypes.
Parameters
----------
order : int
order of epistasis model
model_type : str (default="global")
type of model matrix to use. "global" defines epistasis with respect to
a background-averaged "genotype-phenotype". "local" defines epistasis
with respect to the wildtype genotype.
"""
def __init__(
self,
n_components=1,
model_type="global",
**kwargs):

super(self.__class__, self).__init__(n_components=n_components, **kwargs)
self.model_type = model_type
self.order = 1
self.Xbuilt = {}

# Store model specs.
self.model_specs = dict(
model_type=self.model_type,
**kwargs)

@arghandler
def lnlike_of_data(self, X=None, y=None, yerr=None, thetas=None):
pass

@arghandler
def lnlike_transform(
self,
X=None,
y=None,
yerr=None,
lnprior=None,
thetas=None):
pass

@arghandler
def hypothesis(self, X=None, thetas=None):
pass

def hypothesis_transform(self, X=None, y=None, thetas=None):
pass

@property
def thetas(self):
pass

0 comments on commit 69de872

Please sign in to comment.