Skip to content
This repository has been archived by the owner on Dec 18, 2023. It is now read-only.

Feat/multiclass metrics #280

Closed
wants to merge 9 commits into from
2 changes: 2 additions & 0 deletions credoai/artifacts/data/base_data.py
Expand Up @@ -132,6 +132,8 @@ def _process_sensitive(self, sensitive_features, sensitive_intersections):
_description_
"""
df = pd.DataFrame(sensitive_features)
if len(df.columns) == 1 and isinstance(df.columns[0], int):
df.columns = ["NA"]
# add intersections if asked for
features = df.columns
if sensitive_intersections is False or len(features) == 1:
Expand Down
10 changes: 5 additions & 5 deletions credoai/artifacts/model/base_model.py
Expand Up @@ -40,7 +40,11 @@ def __init__(
self.model_info = get_model_info(model_like)
self._validate(necessary_functions)
self._build(possible_functions)
self._update_functionality()
self.__post_init__()

def __post_init__(self):
"""Optional custom functionality to call after Base Model init"""
pass

@property
def tags(self):
Expand Down Expand Up @@ -88,7 +92,3 @@ def _add_functionality(self, key: str):
func = getattr(self.model_like, key, None)
if func:
self.__dict__[key] = func

def _update_functionality(self):
"""Optional framework specific functionality update"""
pass
18 changes: 11 additions & 7 deletions credoai/artifacts/model/classification_model.py
@@ -1,7 +1,6 @@
"""Model artifact wrapping any classification model"""
from .base_model import Model

PREDICT_PROBA_FRAMEWORKS = ["sklearn", "xgboost"]
from .constants_model import SKLEARN_LIKE_FRAMEWORKS


class ClassificationModel(Model):
Expand All @@ -24,20 +23,25 @@ class ClassificationModel(Model):

def __init__(self, name: str, model_like=None, tags=None):
super().__init__(
"Classification",
"CLASSIFICATION",
["predict", "predict_proba"],
["predict"],
name,
model_like,
tags,
)

def _update_functionality(self):
def __post_init__(self):
"""Conditionally updates functionality based on framework"""
if self.model_info["framework"] in PREDICT_PROBA_FRAMEWORKS:
if self.model_info["framework"] in SKLEARN_LIKE_FRAMEWORKS:
func = getattr(self, "predict_proba", None)
if func and len(self.model_like.classes_) == 2:
self.__dict__["predict_proba"] = lambda x: func(x)[:, 1]
if len(self.model_like.classes_) == 2:
self.type = "BINARY_CLASSIFICATION"
# if binary, replace probability array with one-dimensional vector
if func:
self.__dict__["predict_proba"] = lambda x: func(x)[:, 1]
else:
self.type = "MULTICLASS_CLASSIFICATION"


class DummyClassifier:
Expand Down
2 changes: 1 addition & 1 deletion credoai/artifacts/model/comparison_model.py
Expand Up @@ -23,7 +23,7 @@ class ComparisonModel(Model):

def __init__(self, name: str, model_like=None):
super().__init__(
"ComparisonModel",
"COMPARISON",
["compare"],
["compare"],
name,
Expand Down
8 changes: 8 additions & 0 deletions credoai/artifacts/model/constants_model.py
@@ -0,0 +1,8 @@
SKLEARN_LIKE_FRAMEWORKS = ["sklearn", "xgboost"]
MODEL_TYPES = [
"REGRESSION",
"CLASSIFICATION",
"BINARY_CLASSIFICATION",
"MULTICLASS_CLASSIFICATION",
"COMPARISON",
]
2 changes: 1 addition & 1 deletion credoai/artifacts/model/regression_model.py
Expand Up @@ -21,7 +21,7 @@ class RegressionModel(Model):
"""

def __init__(self, name: str, model_like=None, tags=None):
super().__init__("Regression", ["predict"], ["predict"], name, model_like, tags)
super().__init__("REGRESSION", ["predict"], ["predict"], name, model_like, tags)


class DummyRegression:
Expand Down
4 changes: 3 additions & 1 deletion credoai/evaluators/fairness.py
Expand Up @@ -304,7 +304,9 @@ def _process_metrics(self, metrics):
for metric in metrics:
if isinstance(metric, str):
metric_name = metric
metric = find_metrics(metric, MODEL_METRIC_CATEGORIES)
metric_categories_to_include = MODEL_METRIC_CATEGORIES
metric_categories_to_include.append(self.model.type)
metric = find_metrics(metric, metric_categories_to_include)
if len(metric) == 1:
metric = metric[0]
elif len(metric) == 0:
Expand Down
2 changes: 2 additions & 0 deletions credoai/evaluators/performance.py
Expand Up @@ -224,6 +224,8 @@ def _process_metrics(self, metrics):
for metric in metrics:
if isinstance(metric, str):
metric_name = metric
metric_categories_to_include = MODEL_METRIC_CATEGORIES
metric_categories_to_include.append(self.model.type)
metric = find_metrics(metric, MODEL_METRIC_CATEGORIES)
if len(metric) == 1:
metric = metric[0]
Expand Down
56 changes: 42 additions & 14 deletions credoai/modules/constants_metrics.py
Expand Up @@ -9,20 +9,19 @@
from fairlearn import metrics as fl_metrics
from sklearn import metrics as sk_metrics

from credoai.artifacts.model.constants_model import MODEL_TYPES
from credoai.modules.metrics_credoai import (
equal_opportunity_difference,
false_discovery_rate,
false_omission_rate,
gini_coefficient_discriminatory,
ks_statistic,
multiclass_confusion_metrics,
)

THRESHOLD_METRIC_CATEGORIES = ["BINARY_CLASSIFICATION_THRESHOLD"]

MODEL_METRIC_CATEGORIES = [
"BINARY_CLASSIFICATION",
"MULTICLASS_CLASSIFICATION",
"REGRESSION",
"CLUSTERING",
"FAIRNESS",
] + THRESHOLD_METRIC_CATEGORIES
Expand All @@ -35,30 +34,59 @@
]

METRIC_CATEGORIES = (
MODEL_METRIC_CATEGORIES + THRESHOLD_METRIC_CATEGORIES + NON_MODEL_METRIC_CATEGORIES
MODEL_TYPES
+ MODEL_METRIC_CATEGORIES
+ THRESHOLD_METRIC_CATEGORIES
+ NON_MODEL_METRIC_CATEGORIES
)

SCALAR_METRIC_CATEGORIES = MODEL_METRIC_CATEGORIES + NON_MODEL_METRIC_CATEGORIES

# MODEL METRICS
# Define Binary classification name mapping.
# Binary classification metrics must have a similar signature to sklearn metrics
BINARY_CLASSIFICATION_FUNCTIONS = {
"false_positive_rate": fl_metrics.false_positive_rate,
"false_negative_rate": fl_metrics.false_negative_rate,
"false_discovery_rate": false_discovery_rate,
"false_omission_rate": false_omission_rate,
"true_positive_rate": fl_metrics.true_positive_rate,
"true_negative_rate": fl_metrics.true_negative_rate,
"precision_score": sk_metrics.precision_score,
"accuracy_score": sk_metrics.accuracy_score,
"average_precision_score": sk_metrics.average_precision_score,
"balanced_accuracy_score": sk_metrics.balanced_accuracy_score,
"matthews_correlation_coefficient": sk_metrics.matthews_corrcoef,
"f1_score": sk_metrics.f1_score,
"average_precision_score": sk_metrics.average_precision_score,
"false_discovery_rate": false_discovery_rate,
"false_negative_rate": fl_metrics.false_negative_rate,
"false_omission_rate": false_omission_rate,
"false_positive_rate": fl_metrics.false_positive_rate,
"gini_coefficient": gini_coefficient_discriminatory,
"matthews_correlation_coefficient": sk_metrics.matthews_corrcoef,
"overprediction": fl_metrics._mean_overprediction,
"precision_score": sk_metrics.precision_score,
"roc_auc_score": sk_metrics.roc_auc_score,
"selection_rate": fl_metrics.selection_rate,
"true_negative_rate": fl_metrics.true_negative_rate,
"true_positive_rate": fl_metrics.true_positive_rate,
"underprediction": fl_metrics._mean_underprediction,
}

# Define Multiclass classification name mapping.
# Multiclass classification metrics must have a similar signature to sklearn metrics
MULTICLASS_CLASSIFICATION_FUNCTIONS = {
"accuracy_score": partial(multiclass_confusion_metrics, metric="ACC"),
"balanced_accuracy_score": sk_metrics.balanced_accuracy_score,
"f1_score": partial(sk_metrics.f1_score, average="weighted"),
"false_discovery_rate": partial(multiclass_confusion_metrics, metric="FDR"),
"false_negative_rate": partial(multiclass_confusion_metrics, metric="FNR"),
"false_positive_rate": partial(multiclass_confusion_metrics, metric="FPR"),
"gini_coefficient": partial(
gini_coefficient_discriminatory, multi_class="ovo", average="weighted"
),
"matthews_correlation_coefficient": sk_metrics.matthews_corrcoef,
"overprediction": fl_metrics._mean_overprediction,
"precision_score": partial(sk_metrics.precision_score, average="weighted"),
"roc_auc_score": partial(
sk_metrics.roc_auc_score, multi_class="ovo", average="weighted"
),
"selection_rate": fl_metrics.selection_rate,
"true_negative_rate": partial(multiclass_confusion_metrics, metric="TNR"),
"true_positive_rate": partial(multiclass_confusion_metrics, metric="TPR"),
"underprediction": fl_metrics._mean_underprediction,
"gini_coefficient": gini_coefficient_discriminatory,
}

# Define Fairness Metric Name Mapping
Expand Down
10 changes: 9 additions & 1 deletion credoai/modules/metrics.py
Expand Up @@ -127,7 +127,14 @@ def find_metrics(metric_name, metric_category=None):
# Convert To List of Metrics
BINARY_CLASSIFICATION_METRICS = metrics_from_dict(
BINARY_CLASSIFICATION_FUNCTIONS,
"BINARY_CLASSIFICATION",
"binary_classification",
PROBABILITY_FUNCTIONS,
METRIC_EQUIVALENTS,
)

MULTICLASS_CLASSIFICATION_METRICS = metrics_from_dict(
MULTICLASS_CLASSIFICATION_FUNCTIONS,
"MULTICLASS_CLASSIFICATION",
PROBABILITY_FUNCTIONS,
METRIC_EQUIVALENTS,
)
Expand Down Expand Up @@ -168,6 +175,7 @@ def find_metrics(metric_name, metric_category=None):

ALL_METRICS = (
list(BINARY_CLASSIFICATION_METRICS.values())
+ list(MULTICLASS_CLASSIFICATION_METRICS.values())
+ list(THRESHOLD_VARYING_METRICS.values())
+ list(FAIRNESS_METRICS.values())
+ list(DATASET_METRICS.values())
Expand Down
65 changes: 63 additions & 2 deletions credoai/modules/metrics_credoai.py
Expand Up @@ -11,6 +11,67 @@
from sklearn.utils import check_consistent_length


def multiclass_confusion_metrics(y_true, y_pred, metric=None, average="weighted"):
"""Calculate

Parameters
----------
y_true : array-like of shape (n_samples,)
Ground truth (correct) target values.

y_pred : array-like of shape (n_samples,)
Estimated targets as returned by a classifier.
metric : str, optional
If provided, returns a specific metric. All metrics are returned if None is provided.
Options are:
"TPR": Sensitivity, hit rate, recall, or true positive rate
"TNR": Specificity or true negative rate
"PPV": Precision or positive predictive value
"NPV": Negative predictive value
"FPR": Fall out or false positive rate
"FNR": False negative rate
"FDR": False discovery rate
"ACC": Overall accuracy
average : str
Options are "weighted", "macro" or None (which will return the values for each label)

Returns
-------
dict or float
dict if metric is not provided
"""
cnf_matrix = confusion_matrix(y_true, y_pred)
FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)
FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
TP = np.diag(cnf_matrix)
TN = cnf_matrix.sum() - (FP + FN + TP)

FP = FP.astype(float)
FN = FN.astype(float)
TP = TP.astype(float)
TN = TN.astype(float)

metrics = {
"TPR": TP / (TP + FN),
"TNR": TN / (TN + FP),
"PPV": TP / (TP + FP),
"NPV": TN / (TN + FN),
"FPR": FP / (FP + TN),
"FNR": FN / (TP + FN),
"FDR": FP / (TP + FP),
"ACC": (TP + TN) / (TP + FP + FN + TN),
}
if average == "weighted":
weights = np.unique(y_true, return_counts=True)[1] / len(y_true)
metrics = {k: np.average(v, weights=weights) for k, v in metrics.items()}
elif average == "macro":
metrics = {k: v.mean() for k, v in metrics.items()}
if metric:
return metrics[metric]
else:
return metrics


def general_wilson(p, n, z=1.96):
"""Return lower and upper bound using Wilson Interval.
Parameters
Expand Down Expand Up @@ -322,7 +383,7 @@ def credo_det_curve(y_true, y_prob):
)


def gini_coefficient_discriminatory(y_true, y_prob):
def gini_coefficient_discriminatory(y_true, y_prob, **kwargs):
"""Returns the Gini Coefficient of a discriminatory model

NOTE: There are two popular, yet distinct metrics known as the 'gini coefficient'.
Expand All @@ -349,7 +410,7 @@ def gini_coefficient_discriminatory(y_true, y_prob):
float
Discriminatory Gini Coefficient
"""
G = (2 * sk_metrics.roc_auc_score(y_true, y_prob)) - 1
G = (2 * sk_metrics.roc_auc_score(y_true, y_prob, **kwargs)) - 1
return G


Expand Down
1 change: 1 addition & 0 deletions credoai/utils/model_utils.py
Expand Up @@ -23,6 +23,7 @@ def get_generic_classifier():


def get_model_info(model):
"""Returns basic information about model info"""
try:
framework = model.__class__.__module__.split(".")[0]
except AttributeError:
Expand Down