In [None]:
from credoai.integration import *

In [None]:
import numpy as np

from matplotlib import pyplot as plt
from pprint import pprint
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_curve

# Create an example ML Model

### Load data and train model

For the purpose of this demonstration, we will be classifying digits after a large amount of noise has been added to each image.

In [None]:
# load data
digits = datasets.load_digits()

# add noise
digits.data += np.random.rand(*digits.data.shape)*16

# split into train and test
X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target)

# create and fit model
clf = SVC(probability=True)
clf.fit(X_train, y_train)

### Visualize example images along with predicted label

In [None]:
examples_plot = plt.figure()
for i in range(8):
    image_data = X_test[i,:]
    prediction = digits.target_names[clf.predict(image_data[None,:])[0]]
    label = f'Pred: "{prediction}"'
    # plot
    ax = plt.subplot(2,4,i+1)
    ax.imshow(image_data.reshape(8,8), cmap='gray')
    ax.set_title(label)
    ax.tick_params(labelbottom=False, labelleft=False, length=0)
plt.suptitle('Example Images and Predictions', fontsize=16)

### Calculate performance metrics and visualize

As a multiclassification problem, we can calculate metrics per class, or overall. We record overall metrics, but include figures for individual class performance breakdown

In [None]:
metrics = classification_report(y_test, clf.predict(X_test), output_dict=True)
overall_metrics = metrics['macro avg']
del overall_metrics['support']
pprint(overall_metrics)

In [None]:
probs = clf.predict_proba(X_test)
pr_curves = plt.figure(figsize=(8,6))
# plot PR curve sper digit
for digit in digits.target_names:
    y_true = y_test == digit
    y_prob = probs[:,digit]
    precisions, recalls, thresholds = precision_recall_curve(y_true, y_prob)
    plt.plot(recalls, precisions, lw=3, label=f'Digit: {digit}')
plt.xlabel('Recall', fontsize=16)
plt.ylabel('Precision', fontsize=16)

# plot iso lines
f_scores = np.linspace(0.2, 0.8, num=4)
lines = []
labels = []
for f_score in f_scores:
    label = label='ISO f1 curves' if f_score==f_scores[0] else ''
    x = np.linspace(0.01, 1)
    y = f_score * x / (2 * x - f_score)
    l, = plt.plot(x[y >= 0], y[y >= 0], color='gray', alpha=0.2, label=label)
# final touches
plt.xlim([0.5, 1.0])
plt.ylim([0.0, 1.05])
plt.tick_params(labelsize=14)
plt.title('PR Curves per Digit', fontsize=20)
plt.legend(loc='lower left', fontsize=10)

In [None]:
from sklearn.metrics import plot_confusion_matrix
confusion_plot = plt.figure(figsize=(6,6))
plot_confusion_matrix(clf, X_test, y_test, \
                      normalize='true', ax=plt.gca(), colorbar=False)
plt.tick_params(labelsize=14)

# Integration example 
## Sending model information to Credo AI
For governance, Credo AI needs some information about your model.
* Metrics related to performance, fairness, or other governance considerations
* Input features
* Outputs (what does the model produce?)

In addition, figures are often produced that help communicate metrics better, understand the model, or other contextualize the AI system. Credo can ingest those as well.

**Which metrics to record?**

Ideally you will have decided on the most important metrics before building the model. We refer to this stage as `Metric Alignment`. This is the phase where you explicitly determine how you will measure whether your model can be safely deployed. 

You may want to record more metrics than those explicitly determined during `Metric Alignment`.

For instance, in this example let's say that during `Metric Alignment`, the _F1 Score_ is the primary metric used to evaluate model performance. However, we have decided that recall and precision would be helpful supporting. So we will send those three metrics.


To reiterate: You are always free to send more metrics - Credo AI will ingest them. It is you and your team's decision which metrics are tracked specifically for governance purposes.

## Quick reference

Below is all the code needed to record a set of metrics, figures, inputs and outputs and wrap them all up in a model. We will unpack each part below.

In [None]:
model_name = 'SVC_1.0'
dataset_name = 'test'

# metrics
metric_records = record_metrics(overall_metrics, 
                                model=model_name,
                                dataset=dataset_name, 
                                user_id='Ada Lovelace')

#figures
example_figure_record = Figure(examples_plot._suptitle.get_text(), examples_plot)
confusion_figure_record = Figure(confusion_plot.axes[0].get_title(), confusion_plot)

pr_curve_caption="""Precision-recall curves are shown for each digit separately.
These are calculated by treating each class as a separate
binary classification problem. The grey lines are 
ISO f1 curves - all points on each curve have identical
f1 scores.
"""
pr_curve_figure_record = Figure(pr_curves.axes[0].get_title(),
                                figure=pr_curves,
                                caption=pr_curve_caption)
figure_records = [example_figure_record, confusion_figure_record, pr_curve_figure_record]

# inputs
input_description="""8x8 digit image. The original images had 4-bit pixels.
Noise (uniformly sampled from real numbers 0-16) was
added to each pixel.
"""
input_feature_record = Input(name='8x8 noisy digit image',
                      example=digits.data[0],
                      description=input_description)

# outputs
output_description="""Class of the digit corresponding to the highest
probability digit according to the model. 
The probability of each class is also output from the model."""
output_record = Output(name='Digit class',
                example=digits.target_names[0],
                description=output_description)

# model
model_record = Model(metric_records, figure_records, [input_feature_record, output_record])

# export to file
# export_record(model_record, 'model_record.json')

## Metric Record

To record a metric you can either record each one manually or ingest a dictionary of metrics.

### Manually entering individual metrics

In [None]:
f1_description = """Harmonic mean of precision and recall scores.
Ranges from 0-1, with 1 being perfect performance."""
f1_record = Metric(name='f1', 
                   value=overall_metrics['f1-score'],
                   model = model_name, 
                   dataset=dataset_name,
                   user_id = 'Ada Lovelace',
                   description=f1_description)

precision_record = Metric(name='precision',
                          value=overall_metrics['precision'],
                          model = model_name, 
                          dataset=dataset_name,
                          user_id = 'Marvin Minsky'
                          )

recall_record = Metric(name='recall', 
                       value=overall_metrics['recall'],
                       model = model_name, 
                       dataset = dataset_name)
metrics = [f1_record, precision_record, recall_record]

### Convenience to record multiple metrics

*** Note *** You cannot customize each metric's model, dataset or user ID and cannot edit the description.

In [None]:
metric_records = record_metrics(overall_metrics, 
                                model=model_name,
                                dataset=dataset_name, 
                                user_id='Ada Lovelace')

In [None]:
for m in metric_records:
    print(m)

## Record figures

Credo can accept a path to an image file or a matplotlib figure. Matplotlib figures are converted to PNG images and saved.


A caption can be included for futher description. Included a caption is recommended when the image is not self-explanatory, which is most of the time! 

In [None]:
example_figure_record = Figure(examples_plot._suptitle.get_text(), examples_plot)
confusion_figure_record = Figure(confusion_plot.axes[0].get_title(), confusion_plot)

pr_curve_caption="""Precision-recall curves are shown for each digit separately.
These are calculated by treating each class as a separate
binary classification problem. The grey lines are 
ISO f1 curves - all points on each curve have identical
f1 scores.
"""
pr_curve_figure_record = Figure(pr_curves.axes[0].get_title(),
                                figure=pr_curves,
                                caption=pr_curve_caption)
figure_records = [example_figure_record, confusion_figure_record, pr_curve_figure_record]

## Input Features

In [None]:
input_description="""8x8 digit image. The original images had 4-bit pixels.
Noise (uniformly sampled from real numbers 0-16) was
added to each pixel.
"""
input_feature_record = Input(name='8x8 noisy digit image',
                      example=digits.data[0],
                      description=input_description)

If many features need to be recorded, the convenience function `create_feature_records` can be used. For instance, the code below records each pixel as a separate input.


In [None]:
input_args = {'name': digits.feature_names,
                'example': digits.data[0]}
inputs_exhaustive = create_input_records(input_args)

## Outputs

In [None]:
output_description="""Class of the digit corresponding to the highest
probability digit according to the model. 
The probability of each class is also output from the model."""
output_record = Output(name='Digit class',
                example=digits.target_names[3],
                description=output_description)

## Model

To send all the information, we wrap the records in a model record.

In [None]:
model_record = Model(metric_records, figure_records, [input_feature_record, output_record])

## Export to Credo AI

The json object of the model record can be created by calling `model_record.jsonify()`. The convenience function `export_record` can be called to export the json record to a file. This file can then be uploaded to Credo AI.

In [None]:
# filename is the location to save the json object of the model record
filename="XXX.json"
export_record(model_record, filename)

All Credo Toolkits also have the ability to send Model Records directly to Credo AI's Governance Platform. Though you shouldn't need to access these functions directly in general, we illustrate how this could be completed below. Note you must know the Model ID associated with the model on the Governance Platform.

In [None]:
from credoai.utils.credo_api_utils import patch_metrics
model_id = "XXX"
patch_metrics(model_id, model_record)