In [None]:
import category_encoders as ce
import jinja2
import pandas as pd
import plotly.offline as py
from pydantic import BaseModel
from pydantic.main import ModelMetaclass
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    ConfusionMatrixDisplay,
    RocCurveDisplay,
    accuracy_score,
    precision_recall_curve,
    precision_score,
    recall_score,
    roc_auc_score,
    roc_curve,
)
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

from cyclops.report.model_card.model_card import (
    Citation,
    Dataset,
    FairnessAssessment,
    Graphic,
    GraphicsCollection,
    KeyVal,
    License,
    Limitation,
    ModelCard,
    Owner,
    PerformanceMetric,
    Reference,
    Risk,
    SensitiveData,
    Tradeoff,
    UseCase,
    User,
)
from cyclops.report.plot.classification import ClassificationPlotter
from cyclops.report.plot.utils import fig_to_html

In [None]:
# Credit card fraud Dataset
df = pd.read_csv("data/fraud.csv")

# get 5000 samples of fraud and 5000 samples of non-fraud
df = pd.concat(
    [
        df.loc[df.is_fraud == 1].sample(5000, replace=True),
        df.loc[df.is_fraud == 0].sample(5000, replace=True),
    ]
)

# split out features and target
x = df.drop("is_fraud", axis=1)
y = df["is_fraud"]

# Train-Test data Split
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.5, random_state=50
)


# Build ML model with protected attributes as model features

# Apply one hot encoding to categorical columns (auto-detect object columns)
# and random forest model in the pipeline
estimator = Pipeline(
    steps=[
        ("onehot", ce.OneHotEncoder(use_cat_names=True)),
        (
            "classifier",
            RandomForestClassifier(
                n_estimators=4, max_features="sqrt", random_state=882
            ),
        ),
    ]
)


# Fit, predict and compute performance metrics
estimator.fit(x_train, y_train)

output = x_test.copy()  # x_test df with output columns, to be appended later
y_pred = estimator.predict(x_test)
y_probas = estimator.predict_proba(x_test)[::, 1]

precision_train = round(precision_score(y_train, estimator.predict(x_train)), 3)
recall_train = round(recall_score(y_train, estimator.predict(x_train)), 3)
precision_test = round(precision_score(y_test, y_pred), 3)
recall_test = round(recall_score(y_test, y_pred), 3)


# Add output columns to this dataframe, to be used as a input for feat tests
output["truth"] = y_test
output["prediction"] = y_pred
output["prediction_probas"] = y_probas


# Dataframe with categorical features encoded
x_train_encoded = estimator[0].transform(x_train)
x_test_encoded = estimator[0].transform(x_test)


# Get feature importance values
df_importance = pd.DataFrame(
    {"features": x_test_encoded.columns, "value": estimator[-1].feature_importances_}
)

## Get confusion matrix and ROC curve on train/test set

In [None]:
# Train set
ConfusionMatrixDisplay.from_estimator(estimator, x_train, y_train)
# confusion_matrix_train = plot_to_str()
RocCurveDisplay.from_estimator(estimator, x_train, y_train)
# roc_curve_train = plot_to_str()

# Test set
ConfusionMatrixDisplay.from_estimator(estimator, x_test, y_test)
# confusion_matrix_test = plot_to_str()
RocCurveDisplay.from_estimator(estimator, x_test, y_test)
# roc_curve_test = plot_to_str()

## Use ClassificationPlotter to to plot evaluation metrics

In [None]:
y_probas

In [None]:
plotter = ClassificationPlotter(task_type="binary", class_names=["0", "1"])
plotter.set_template("plotly_white")
plotter.set_colorway(["#006ba6", "#ffbc42", "#d81159", "#0496ff", "#8f2d56"])

precicion_recall_curve = precision_recall_curve(y_test, y_probas)
prc_fig = plotter.precision_recall_curve(precicion_recall_curve)
prc_str = fig_to_html(prc_fig)


roc = roc_curve(y_test, y_probas)
auroc = roc_auc_score(y_test, y_probas)
roc_fig = plotter.roc_curve(roc, auroc=auroc)
roc_str = fig_to_html(roc_fig)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

metrics_fig = plotter.classification_metrics(
    {"accuracy": accuracy, "precision": precision, "recall": recall},
    title_suffix="Test Set",
)
metrics_str = fig_to_html(metrics_fig)

## Bootstrap model card from VerifyML model card editor and scaffold assets
We can add the quantitative analysis, explainability analysis and fairness analysis sections to a bootstrap model card for convenience. In this example, we use an existing model card which we created from the [VerifyML model card editor](https://report.verifyml.com/create). This is meant only as an example - the dataset and risk evaluation in the model card is a fictional use case.

In [None]:
# Initialize the model card
def scaffold_model(base_model: BaseModel) -> BaseModel:
    """Recursively initialize a pydantic model with default values."""
    assert isinstance(
        base_model, BaseModel
    ), f"Expected a pydantic BaseModel instance, got {type(base_model)} instead."

    for field in base_model.__fields__:
        field_type = base_model.__fields__[field].type_

        if (
            type(field_type) is ModelMetaclass
            and base_model.__fields__[field].default_factory is None
        ):
            sub_model = scaffold_model(field_type())
            setattr(base_model, field, sub_model)
        else:
            default = base_model.__fields__[field].default
            if base_model.__fields__[field].default_factory is not None:
                default = base_model.__fields__[field].default_factory()
            setattr(base_model, field, default)
    return base_model

In [None]:
mc = ModelCard()
mc = scaffold_model(mc)

## Populate model card fields

In [None]:
# model details
mc.model_details.name = "Credit Card Fraud Detection"

mc.model_details.overview = "This model predicts whether a credit card transaction \
    is fraudulent or not."

mc.model_details.documentation = "This model is trained on the Credit Card Fraud \
    Detection dataset from Kaggle. The dataset contains transactions made by credit \
    cards in September 2013 by European cardholders. This dataset presents \
    transactions that occurred in two days, where we have 492 frauds out of 284,807 \
    transactions. The dataset is highly unbalanced, the positive class (frauds) \
    account for 0.172% of all transactions. The model is trained on 80% of the data \
    and tested on the remaining 20%."

mc.model_details.owners.append(Owner(name="John Doe", contact="", role="Researcher"))

mc.model_details.version.name = "1.0"
mc.model_details.version.date = "2021-01-01"
mc.model_details.version.diff = "Initial release"

mc.model_details.licenses.append(
    License(
        identifier="Apache 2.0",
        custom_text="https://www.apache.org/licenses/LICENSE-2.0",
    )
)
mc.model_details.references.append(
    Reference(reference="https://www.kaggle.com/mlg-ulb/creditcardfraud")
)
mc.model_details.citations.append(
    Citation(
        style="APA",
        citation="Dua, D. and Graff, C. (2019). UCI Machine Learning Repository \
            [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California, \
            School of Information and Computer Science.",
    )
)

In [None]:
# model parameters
mc.model_parameters.model_architecture = "Random Forest"
mc.model_parameters.data.append(  # training set
    Dataset(
        name="Credit Card Fraud Detection",
        split="train",
        size=len(x_train),
        attributes=x.columns.tolist(),
        sensitive=SensitiveData(
            sensitive_data=["gender", "age"],
            sensitive_data_used=["gender", "age"],
            justification="Gender and age of the cardholder may be informative of\
                 the likelihood of fraud.",
        ),
    )
)
mc.model_parameters.data.append(  # test set
    Dataset(
        name="Credit Card Fraud Detection",
        split="test",
        size=len(x_test),
        attributes=x.columns.tolist(),
    )
)
mc.model_parameters.input_format = "NumPy array"
mc.model_parameters.input_format_map.extend(
    [
        KeyVal(
            key="x",
            value="NumPy array of shape (n_samples, n_features) containing the \
                input features.",
        ),
        KeyVal(
            key="y",
            value="NumPy array of shape (n_samples,) containing the target values.",
        ),
    ]
)
mc.model_parameters.output_format = "NumPy array"

In [None]:
# considerations
mc.considerations.users.append(User(description="Data Scientists")),
mc.considerations.use_cases.append(
    UseCase(
        description="This model predicts whether a credit card transaction is \
            fraudulent or not."
    )
)
mc.considerations.limitations.append(
    Limitation(
        description="The model is trained on a dataset that is highly unbalanced,\
              the positive class (frauds) account for 0.172% of all transactions."
    )
)
mc.considerations.tradeoffs.append(
    Tradeoff(
        description="The tradeoffs of using this model are that it can help banks\
             to detect fraudulent transactions, but it can lead to false positives,\
             which can lead to inconvenience for customers."
    )
)
mc.considerations.ethical_considerations.append(
    Risk(
        name="The model is trained on a dataset that is highly unbalanced, \
            the positive class (frauds) account for 0.172% of all transactions.",
        mitigation_strategy="We can mitigate this by using a different dataset\
            that is more balanced.",
    )
)
mc.considerations.fairness_assessment.append(
    FairnessAssessment(
        group_at_risk="Fraudulent transactions",
        benefits="The model can help banks to detect fraudulent transactions.",
        harms="The model can lead to false positives, which can lead to inconvenience\
            for customers.",
    )
)

In [None]:
# quantitative analysis

# Create 4 PerformanceMetric to store our results
mc.quantitative_analysis.performance_metrics = [
    PerformanceMetric() for i in range(0, 6)
]
mc.quantitative_analysis.performance_metrics[0].type = "Recall"
mc.quantitative_analysis.performance_metrics[0].value = recall_train
mc.quantitative_analysis.performance_metrics[0].slice = "Training Set"

mc.quantitative_analysis.performance_metrics[1].type = "Precision"
mc.quantitative_analysis.performance_metrics[1].value = precision_train
mc.quantitative_analysis.performance_metrics[1].slice = "Training Set"
# mc.quantitative_analysis.performance_metrics[1].graphics.description = (
#   'Confusion matrix and ROC Curve')
# mc.quantitative_analysis.performance_metrics[1].graphics.collection = [
#     Graphic(image=confusion_matrix_train), Graphic(image=roc_curve_train)
# ]

mc.quantitative_analysis.performance_metrics[2].type = "Recall"
mc.quantitative_analysis.performance_metrics[2].value = recall_test
mc.quantitative_analysis.performance_metrics[2].slice = "Test Set"

mc.quantitative_analysis.performance_metrics[3].type = "Precision"
mc.quantitative_analysis.performance_metrics[3].value = precision_test
mc.quantitative_analysis.performance_metrics[3].slice = "Test Set"
mc.quantitative_analysis.performance_metrics[3].graphics = GraphicsCollection()
mc.quantitative_analysis.performance_metrics[3].graphics.collection = [
    Graphic(image=metrics_str)
]
# mc.quantitative_analysis.performance_metrics[3].graphics.description = (
#   'Confusion matrix and ROC Curve')
# mc.quantitative_analysis.performance_metrics[3].graphics.collection = [
#     Graphic(image=confusion_matrix_test), Graphic(image=roc_curve_test)
# ]
mc.quantitative_analysis.performance_metrics[4].type = "Precision Recall Curve"
mc.quantitative_analysis.performance_metrics[4].slice = "Test Set"
mc.quantitative_analysis.performance_metrics[4].graphics = GraphicsCollection()
mc.quantitative_analysis.performance_metrics[4].graphics.collection = [
    Graphic(image=prc_str)
]

mc.quantitative_analysis.performance_metrics[5].type = "ROC Curve"
mc.quantitative_analysis.performance_metrics[5].slice = "Test Set"
mc.quantitative_analysis.performance_metrics[5].graphics = GraphicsCollection()
mc.quantitative_analysis.performance_metrics[5].graphics.collection = [
    Graphic(image=roc_str)
]

## Model Card Display

In [None]:
jinja_env = jinja2.Environment(
    loader=jinja2.FileSystemLoader("../model_card/template/"),
    autoescape=True,
    auto_reload=True,
    cache_size=0,
)

In [None]:
template = jinja_env.get_template("cyclops_template.jinja")

In [None]:
plotlyjs = py.get_plotlyjs()

In [None]:
content = template.render(
    plotlyjs=plotlyjs,
    model_details=mc.model_details,
    model_parameters=mc.model_parameters,
    quantitative_analysis=mc.quantitative_analysis,
    explainability_analysis=mc.explainability_analysis,
    fairness_analysis=mc.fairness_analysis,
    considerations=mc.considerations,
)

In [None]:
with open("report.html", "w+") as f:
    f.write(content)