Skip to content

Commit

Permalink
Merge 502d351 into 3cf4ec0
Browse files Browse the repository at this point in the history
  • Loading branch information
eyadgaran committed Jun 25, 2020
2 parents 3cf4ec0 + 502d351 commit 19751d7
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 40 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ install:
- pip install flake8

script:
- flake8 . --ignore=E121,E221,E251,E261,E266,E302,E303,E305,F401,E402,E501,E502 --exclude=.git,.tox,build,dist
- flake8 . --ignore=E121,E221,E251,E261,E266,E302,E303,E305,F401,E402,E501,E502,W503 --exclude=.git,.tox,build,dist
- coverage run setup.py test

after_success:
Expand Down Expand Up @@ -67,3 +67,4 @@ deploy:
# E402, # module level import not at top of file
# E501, # line too long
# E502, # the backslash is redundant between brackets
# W503, # line break before binary operator (considered an anti-pattern in W504)
121 changes: 82 additions & 39 deletions simpleml/metrics/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from simpleml.constants import TRAIN_SPLIT, VALIDATION_SPLIT, TEST_SPLIT
from simpleml.utils.errors import MetricError
from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score, f1_score
from abc import abstractmethod
import numpy as np
import pandas as pd

Expand All @@ -36,6 +37,7 @@ class ClassificationMetric(Metric):
'''
TODO: Figure out multiclass generalizations
'''

def __init__(self, dataset_split, **kwargs):
'''
:param dataset_split: string denoting which dataset split to use
Expand Down Expand Up @@ -131,6 +133,19 @@ def confusion_matrix(self):

return self._confusion_matrix

@staticmethod
def _create_confusion_matrix(thresholds, probabilities, labels):
'''
Independent computation method (easier testing)
'''
results = []
for threshold in thresholds:
predictions = np.where(probabilities >= threshold, 1, 0)
tn, fp, fn, tp = confusion_matrix(labels, predictions, labels=[0, 1]).ravel()
results.append((threshold, tn, fp, fn, tp))

return pd.DataFrame(results, columns=['threshold', 'tn', 'fp', 'fn', 'tp'])

def create_confusion_matrix(self):
'''
Iterate through each threshold and compute confusion matrix
Expand All @@ -140,13 +155,7 @@ def create_confusion_matrix(self):
probabilities = self.probabilities
labels = self.labels

results = []
for threshold in thresholds:
predictions = np.where(probabilities >= threshold, 1, 0)
tn, fp, fn, tp = confusion_matrix(labels, predictions, labels=[0, 1]).ravel()
results.append((threshold, tn, fp, fn, tp))

self._confusion_matrix = pd.DataFrame(results, columns=['threshold', 'tn', 'fp', 'fn', 'tp'])
self._confusion_matrix = self._create_confusion_matrix(thresholds, probabilities, labels)

@staticmethod
def dedupe_curve(keys, values, maximize=True, round_places=3):
Expand Down Expand Up @@ -266,9 +275,14 @@ def matthews_correlation_coefficient(self):
'''
Convenience property for the Matthews Correlation Coefficient (TP*TN-FP*FN/((FP+TP)*(TP+FN)*(TN+FP)*(TN+FN))^0.5)
'''
return (self.confusion_matrix.tp * self.confusion_matrix.tn - self.confusion_matrix.fp * self.confusion_matrix.fn) /\
((self.confusion_matrix.fp + self.confusion_matrix.tp) * (self.confusion_matrix.tp + self.confusion_matrix.fn) *\
(self.confusion_matrix.tn + self.confusion_matrix.fp) * (self.confusion_matrix.tn + self.confusion_matrix.fn))**0.5
numerator = (self.confusion_matrix.tp * self.confusion_matrix.tn - self.confusion_matrix.fp * self.confusion_matrix.fn)
denominator = (
(self.confusion_matrix.fp + self.confusion_matrix.tp)
* (self.confusion_matrix.tp + self.confusion_matrix.fn)
* (self.confusion_matrix.tn + self.confusion_matrix.fp)
* (self.confusion_matrix.tn + self.confusion_matrix.fn)
)**0.5
return numerator / denominator

@property
def informedness(self):
Expand All @@ -292,85 +306,104 @@ def markedness(self):
(fixed operating point)
'''

class AccuracyMetric(BinaryClassificationMetric):

class AggregateBinaryClassificationMetric(BinaryClassificationMetric):
@staticmethod
@abstractmethod
def _score(predictions, labels):
'''
Each aggregate needs to define a separate private method to actually
calculate the aggregate
Separated from the public score method to enable easier testing and
extension (values can be passed from non internal properties)
'''

def score(self):
'''
Main scoring method. Uses internal values and passes to class level
aggregation method
'''
predictions = self.predictions
labels = self.labels
self.values = {'agg': self._score(predictions, labels)}


class AccuracyMetric(AggregateBinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
kwargs['name'] = 'classification_accuracy'
super(AccuracyMetric, self).__init__(**kwargs)

def score(self):
predictions = self.predictions
labels = self.labels
accuracy = accuracy_score(y_true=labels, y_pred=predictions)
@staticmethod
def _score(predictions, labels):
return accuracy_score(y_true=labels, y_pred=predictions)

self.values = {'agg': accuracy}

class TprMetric(BinaryClassificationMetric):
class TprMetric(AggregateBinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
kwargs['name'] = 'tpr'
super(TprMetric, self).__init__(**kwargs)

def score(self):
predictions = self.predictions
labels = self.labels
@staticmethod
def _score(predictions, labels):
tn, fp, fn, tp = confusion_matrix(labels, predictions).ravel()
tpr = float(tp) / (tp + fn)
return float(tp) / (tp + fn)

self.values = {'agg': tpr}

class FprMetric(BinaryClassificationMetric):
class FprMetric(AggregateBinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
kwargs['name'] = 'fpr'
super(FprMetric, self).__init__(**kwargs)

def score(self):
predictions = self.predictions
labels = self.labels
@staticmethod
def _score(predictions, labels):
tn, fp, fn, tp = confusion_matrix(labels, predictions).ravel()
fpr = float(fp) / (fp + tn)
return float(fp) / (fp + tn)

self.values = {'agg': fpr}

class F1ScoreMetric(BinaryClassificationMetric):
class F1ScoreMetric(AggregateBinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
kwargs['name'] = 'f1_score'
super(F1ScoreMetric, self).__init__(**kwargs)

def score(self):
predictions = self.predictions
labels = self.labels
f1_score_ = f1_score(y_true=labels, y_pred=predictions)

self.values = {'agg': f1_score_}
@staticmethod
def _score(predictions, labels):
return f1_score(y_true=labels, y_pred=predictions)


'''
Aggregate metrics computed by evaluating over entire curves
(Requires proba method)
'''


class RocAucMetric(BinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
kwargs['name'] = 'roc_auc'
super(RocAucMetric, self).__init__(**kwargs)

@staticmethod
def _score(probabilities, labels):
return roc_auc_score(y_true=labels, y_score=probabilities)

def score(self):
probabilities = self.probabilities
labels = self.labels
auc = roc_auc_score(y_true=labels, y_score=probabilities)

self.values = {'agg': auc}
self.values = {'agg': self._score(probabilities, labels)}

############################### CURVE METRICS ###############################


'''
Threshold Constrained Metrics
'''


class ThresholdTprMetric(BinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
Expand All @@ -390,6 +423,7 @@ def __init__(self, **kwargs):
def score(self):
self.values = self.dedupe_curve(self.thresholds, self.true_negative_rate, maximize=True)


class ThresholdFnrMetric(BinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
Expand Down Expand Up @@ -524,6 +558,7 @@ def score(self):
FPR Constrained Metrics
'''


class FprThresholdMetric(BinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
Expand Down Expand Up @@ -678,6 +713,7 @@ def score(self):
TPR Constrained Metrics
'''


class TprThresholdMetric(BinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
Expand Down Expand Up @@ -832,6 +868,7 @@ def score(self):
TNR Constrained Metrics
'''


class TnrThresholdMetric(BinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
Expand Down Expand Up @@ -986,6 +1023,7 @@ def score(self):
FNR Constrained Metrics
'''


class FnrThresholdMetric(BinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
Expand Down Expand Up @@ -1136,11 +1174,11 @@ def score(self):
self.values = self.dedupe_curve(self.false_negative_rate, self.markedness, maximize=True)



'''
FDR Constrained Metrics
'''


class FdrThresholdMetric(BinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
Expand Down Expand Up @@ -1295,6 +1333,7 @@ def score(self):
FOR Constrained Metrics
'''


class ForThresholdMetric(BinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
Expand Down Expand Up @@ -1449,6 +1488,7 @@ def score(self):
PPV Constrained Metrics
'''


class PpvThresholdMetric(BinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
Expand Down Expand Up @@ -1603,6 +1643,7 @@ def score(self):
NPV Constrained Metrics
'''


class NpvThresholdMetric(BinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
Expand Down Expand Up @@ -1757,6 +1798,7 @@ def score(self):
Predicted Positive Rate Constrained Metrics
'''


class PredictedPositiveRateThresholdMetric(BinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
Expand Down Expand Up @@ -1911,6 +1953,7 @@ def score(self):
Predicted Negative Rate Constrained Metrics
'''


class PredictedNegativeRateThresholdMetric(BinaryClassificationMetric):
def __init__(self, **kwargs):
# Drop whatever name was passed and explicitly rename
Expand Down

0 comments on commit 19751d7

Please sign in to comment.