Skip to content
This repository has been archived by the owner on Dec 18, 2023. It is now read-only.

Commit

Permalink
Merge branch 'feat/multiclass_metrics' into feat/tests_expansion
Browse files Browse the repository at this point in the history
  • Loading branch information
fabrizio-credo committed Dec 12, 2022
2 parents 0ff1e9d + 7dd4fcc commit 166d214
Show file tree
Hide file tree
Showing 8 changed files with 69 additions and 29 deletions.
6 changes: 3 additions & 3 deletions credoai/artifacts/model/classification_model.py
Expand Up @@ -23,7 +23,7 @@ class ClassificationModel(Model):

def __init__(self, name: str, model_like=None, tags=None):
super().__init__(
"classification",
"CLASSIFICATION",
["predict", "predict_proba"],
["predict"],
name,
Expand All @@ -36,12 +36,12 @@ def __post_init__(self):
if self.model_info["framework"] in SKLEARN_LIKE_FRAMEWORKS:
func = getattr(self, "predict_proba", None)
if len(self.model_like.classes_) == 2:
self.type = "binary_classification"
self.type = "BINARY_CLASSIFICATION"
# if binary, replace probability array with one-dimensional vector
if func:
self.__dict__["predict_proba"] = lambda x: func(x)[:, 1]
else:
self.type = "multiclass_classification"
self.type = "MULTICLASS_CLASSIFICATION"


class DummyClassifier:
Expand Down
2 changes: 1 addition & 1 deletion credoai/artifacts/model/comparison_model.py
Expand Up @@ -23,7 +23,7 @@ class ComparisonModel(Model):

def __init__(self, name: str, model_like=None):
super().__init__(
"comparison",
"COMPARISON",
["compare"],
["compare"],
name,
Expand Down
9 changes: 5 additions & 4 deletions credoai/artifacts/model/constants_model.py
@@ -1,7 +1,8 @@
SKLEARN_LIKE_FRAMEWORKS = ["sklearn", "xgboost"]
MODEL_TYPES = [
"regression",
"binary_classification",
"multiclass_classification",
"comparison",
"REGRESSION",
"CLASSIFICATION",
"BINARY_CLASSIFICATION",
"MULTICLASS_CLASSIFICATION",
"COMPARISON",
]
2 changes: 1 addition & 1 deletion credoai/artifacts/model/regression_model.py
Expand Up @@ -21,7 +21,7 @@ class RegressionModel(Model):
"""

def __init__(self, name: str, model_like=None, tags=None):
super().__init__("regression", ["predict"], ["predict"], name, model_like, tags)
super().__init__("REGRESSION", ["predict"], ["predict"], name, model_like, tags)


class DummyRegression:
Expand Down
2 changes: 1 addition & 1 deletion credoai/evaluators/fairness.py
Expand Up @@ -305,7 +305,7 @@ def _process_metrics(self, metrics):
if isinstance(metric, str):
metric_name = metric
metric_categories_to_include = MODEL_METRIC_CATEGORIES
metric_categories_to_include.append(self.model.type.upper())
metric_categories_to_include.append(self.model.type)
metric = find_metrics(metric, metric_categories_to_include)
if len(metric) == 1:
metric = metric[0]
Expand Down
2 changes: 1 addition & 1 deletion credoai/evaluators/performance.py
Expand Up @@ -225,7 +225,7 @@ def _process_metrics(self, metrics):
if isinstance(metric, str):
metric_name = metric
metric_categories_to_include = MODEL_METRIC_CATEGORIES
metric_categories_to_include.append(self.model.type.upper())
metric_categories_to_include.append(self.model.type)
metric = find_metrics(metric, MODEL_METRIC_CATEGORIES)
if len(metric) == 1:
metric = metric[0]
Expand Down
19 changes: 9 additions & 10 deletions credoai/modules/constants_metrics.py
Expand Up @@ -9,20 +9,18 @@
from fairlearn import metrics as fl_metrics
from sklearn import metrics as sk_metrics

from credoai.artifacts.model.constants_model import MODEL_TYPES
from credoai.modules.metrics_credoai import (
equal_opportunity_difference,
false_discovery_rate,
false_omission_rate,
gini_coefficient_discriminatory,
ks_statistic,
multiclass_rates,
multiclass_confusion_metrics,
)
from credoai.artifacts.model.constants_model import MODEL_TYPES

THRESHOLD_METRIC_CATEGORIES = ["BINARY_CLASSIFICATION_THRESHOLD"]

MODEL_TYPE_METRIC_CATEGORIES = [x.upper() for x in MODEL_TYPES]

MODEL_METRIC_CATEGORIES = [
"CLUSTERING",
"FAIRNESS",
Expand All @@ -36,7 +34,7 @@
]

METRIC_CATEGORIES = (
MODEL_TYPE_METRIC_CATEGORIES
MODEL_TYPES
+ MODEL_METRIC_CATEGORIES
+ THRESHOLD_METRIC_CATEGORIES
+ NON_MODEL_METRIC_CATEGORIES
Expand Down Expand Up @@ -70,11 +68,12 @@
# Define Multiclass classification name mapping.
# Multiclass classification metrics must have a similar signature to sklearn metrics
MULTICLASS_CLASSIFICATION_FUNCTIONS = {
"accuracy_score": sk_metrics.accuracy_score,
"average_precision_score": sk_metrics.average_precision_score,
"accuracy_score": partial(multiclass_confusion_metrics, metric="ACC"),
"balanced_accuracy_score": sk_metrics.balanced_accuracy_score,
"f1_score": partial(sk_metrics.f1_score, average="weighted"),
"false_discovery_rate": partial(false_discovery_rate, average="weighted"),
"false_discovery_rate": partial(multiclass_confusion_metrics, metric="FDR"),
"false_negative_rate": partial(multiclass_confusion_metrics, metric="FNR"),
"false_positive_rate": partial(multiclass_confusion_metrics, metric="FPR"),
"gini_coefficient": partial(
gini_coefficient_discriminatory, multi_class="ovo", average="weighted"
),
Expand All @@ -85,8 +84,8 @@
sk_metrics.roc_auc_score, multi_class="ovo", average="weighted"
),
"selection_rate": fl_metrics.selection_rate,
"true_negative_rate": partial(multiclass_rates, rate="TNR"),
"true_positive_rate": partial(multiclass_rates, rate="TNR"),
"true_negative_rate": partial(multiclass_confusion_metrics, metric="TNR"),
"true_positive_rate": partial(multiclass_confusion_metrics, metric="TPR"),
"underprediction": fl_metrics._mean_underprediction,
}

Expand Down
56 changes: 48 additions & 8 deletions credoai/modules/metrics_credoai.py
Expand Up @@ -11,7 +11,35 @@
from sklearn.utils import check_consistent_length


def multiclass_rates(y_true, y_pred, rate):
def multiclass_confusion_metrics(y_true, y_pred, metric=None, average="weighted"):
"""Calculate
Parameters
----------
y_true : array-like of shape (n_samples,)
Ground truth (correct) target values.
y_pred : array-like of shape (n_samples,)
Estimated targets as returned by a classifier.
metric : str, optional
If provided, returns a specific metric. All metrics are returned if None is provided.
Options are:
"TPR": Sensitivity, hit rate, recall, or true positive rate
"TNR": Specificity or true negative rate
"PPV": Precision or positive predictive value
"NPV": Negative predictive value
"FPR": Fall out or false positive rate
"FNR": False negative rate
"FDR": False discovery rate
"ACC": Overall accuracy
average : str
Options are "weighted", "macro" or None (which will return the values for each label)
Returns
-------
dict or float
dict if metric is not provided
"""
cnf_matrix = confusion_matrix(y_true, y_pred)
FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)
FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
Expand All @@ -23,13 +51,25 @@ def multiclass_rates(y_true, y_pred, rate):
TP = TP.astype(float)
TN = TN.astype(float)

TPR = TP / (TP + FN)
TNR = TN / (TN + FP)

if rate == "TPR":
return np.round(TPR, 6)
if rate == "TNR":
return np.round(TNR, 6)
metrics = {
"TPR": TP / (TP + FN),
"TNR": TN / (TN + FP),
"PPV": TP / (TP + FP),
"NPV": TN / (TN + FN),
"FPR": FP / (FP + TN),
"FNR": FN / (TP + FN),
"FDR": FP / (TP + FP),
"ACC": (TP + TN) / (TP + FP + FN + TN),
}
if average == "weighted":
weights = np.unique(y_true, return_counts=True)[1] / len(y_true)
metrics = {k: np.average(v, weights=weights) for k, v in metrics.items()}
elif average == "macro":
metrics = {k: v.mean() for k, v in metrics.items()}
if metric:
return metrics[metric]
else:
return metrics


def general_wilson(p, n, z=1.96):
Expand Down

0 comments on commit 166d214

Please sign in to comment.