In [None]:
import inspect

import category_encoders as ce
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    ConfusionMatrixDisplay,
    RocCurveDisplay,
    precision_score,
    recall_score,
)
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline

from cyclops.report import ModelCardReport

In [None]:
report = ModelCardReport()

In [None]:
report.log_from_dict(
    {
        "description": inspect.cleandoc(
            """
            Sample example of a risk assessment of a credit card fraud model.
            Binary prediction problem (fraud or no fraud). Customers flagged as
            potentially fraudulent will be passed to internal investigation team
            for follow-up."""
        ),
        "references": [
            {
                "link": "https://www.kaggle.com/mlg-ulb/creditcardfraud",
            }
        ],
    },
    "Model Details",
)
report.log_citation(
    citation=inspect.cleandoc(
        """
    Markelle Kelly, Rachel Longjohn, Kolby Nottingham, The UCI Machine Learning
    Repository, https://archive.ics.uci.edu
    """
    ),
)
report.log_license(identifier="MIT", some_other_license="some other license")
report.log_owner(name="John Doe", contact="john.doe@email.com", role="Researcher")
report.log_version("1.0.0", "2021-01-01", "Initial release")
report.log_regulation("Accountability and Transparency (FEAT) principles")

In [None]:
report.log_user(description="Credit card fraud team and credit card holders")
report.log_use_case(
    description=inspect.cleandoc(
        """
        Increase accuracy of predicting credit card fraud over the existing rule-based
        model, saving the bank time and energy for each false positive case and
        avoiding reputation harm from false negative cases."""
    )
)
report.log_descriptor(
    name="limitation",
    description="The model is trained on a dataset that is highly unbalanced,\
        the positive class (frauds) account for 0.172% of all transactions.",
    section_name="considerations",
)
report.log_descriptor(
    name="tradeoff",
    description="The tradeoffs of using this model are that it can help banks\
            to detect fraudulent transactions, but it can lead to false positives,\
            which can lead to inconvenience for customers.",
    section_name="considerations",
)
report.log_risk(
    risk="The model is trained on a dataset that is highly unbalanced, \
        the positive class (frauds) account for 0.172% of all transactions.",
    mitigation_strategy="We can mitigate this by using a different dataset\
        that is more balanced.",
)
report.log_fairness_assessment(
    affected_group="Race, age, geneder",
    benefit=inspect.cleandoc(
        """
        A more precise model will reduce the number of customers being mistakenly
        labelled as fraudulent in the existing rules based model, which takes 7
        man-days to resolve before a credit card could be unfrozen."""
    ),
    harm=inspect.cleandoc(
        """
        Customers who are in the false-positive category will have their credit card
        frozen and may be excluded from the financial services of the bank for up
        to 7 days."""
    ),
    mitigation_strategy=inspect.cleandoc(
        """
        Because there is less data for certain demographic groups (e.g. youth, elderly),
        the model can have much higher/lower false-positive rates for that segment
        than that of others. We will prioritize such cases after the initial model
        score to add a 2nd level of check and minimise disruption to the customer."""
    ),
)

In [None]:
# Credit card fraud Dataset
df = pd.read_csv("data/fraud.csv")

# get 5000 samples of fraud and 5000 samples of non-fraud
df = pd.concat(
    [
        df.loc[df.is_fraud == 1].sample(5000, replace=True),
        df.loc[df.is_fraud == 0].sample(5000, replace=True),
    ]
)

# split out features and target
x = df.drop("is_fraud", axis=1)
y = df["is_fraud"]

# Train-Test data Split
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.5, random_state=50
)


# Build ML model with protected attributes as model features

# Apply one hot encoding to categorical columns (auto-detect object columns)
# and random forest model in the pipeline
estimator = Pipeline(
    steps=[
        ("onehot", ce.OneHotEncoder(use_cat_names=True)),
        (
            "classifier",
            RandomForestClassifier(
                n_estimators=4, max_features="sqrt", random_state=882
            ),
        ),
    ]
)

report.log_model_parameters(estimator[-1].get_params())

# Fit, predict and compute performance metrics
estimator.fit(x_train, y_train)

output = x_test.copy()  # x_test df with output columns, to be appended later
y_pred = estimator.predict(x_test)
y_probas = estimator.predict_proba(x_test)[::, 1]

precision_train = round(precision_score(y_train, estimator.predict(x_train)), 3)
recall_train = round(recall_score(y_train, estimator.predict(x_train)), 3)
precision_test = round(precision_score(y_test, y_pred), 3)
recall_test = round(recall_score(y_test, y_pred), 3)


# Add output columns to this dataframe, to be used as a input for feat tests
output["truth"] = y_test
output["prediction"] = y_pred
output["prediction_probas"] = y_probas


# Dataframe with categorical features encoded
x_train_encoded = estimator[0].transform(x_train)
x_test_encoded = estimator[0].transform(x_test)


# Get feature importance values
df_importance = pd.DataFrame(
    {"features": x_test_encoded.columns, "value": estimator[-1].feature_importances_}
)

In [None]:
report.log_performance_metrics(
    {
        "train/precision": precision_train,
        "train/recall": recall_train,
        "test/precision": precision_test,
        "test/recall": recall_test,
    }
)

## Get confusion matrix and ROC curve on train/test set

In [None]:
# Train set
ConfusionMatrixDisplay.from_estimator(estimator, x_train, y_train)
# confusion_matrix_train = plot_to_str()
RocCurveDisplay.from_estimator(estimator, x_train, y_train)
# roc_curve_train = plot_to_str()

# Test set
ConfusionMatrixDisplay.from_estimator(estimator, x_test, y_test)
# confusion_matrix_test = plot_to_str()
RocCurveDisplay.from_estimator(estimator, x_test, y_test)
# roc_curve_test = plot_to_str()

In [None]:
report.export()