# Credit Score prediction with Autopilot

In [None]:
import time
from datetime import datetime
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import s3fs
import sagemaker as sm
from sagemaker.automl import automl
from sklearn.metrics import (
    average_precision_score,
    f1_score,
    precision_recall_curve,
    roc_auc_score,
    roc_curve,
)

## Import data

<!-- The data is downloaded from Dua, D. and Graff, C. (2019). [UCI Machine Learning Repository](http://archive.ics.uci.edu/ml). Irvine, CA: University of California, School of Information and Computer Science.  -->

In [None]:
data_url = "s3://sagemaker-sample-files/datasets/tabular/uci_statlog_german_credit_data/german_credit_data.csv"
target_name = "risk"

In [None]:
df = pd.read_csv(data_url)
df.head()

In [None]:
df[target_name].value_counts()

In [None]:
target_true_value = df[target_name].value_counts().index[1]

### Split `train` and `test` datasets

Reserve a fraction of the records for out of band testing, either as batch transform or as a inference endpoint.

In [None]:
df_test = df.sample(frac=0.15, random_state=42)
df_train = df.drop(df_test.index)

In [None]:
df_train.shape, df_test.shape

## Model Creation: Autopilot

Autopilot is an automated machine learning (commonly referred to as AutoML) solution for tabular datasets.   
We will use the AutoML estimator from SageMaker Python SDK to invoke Autopilot to find the best ML pipeline to train a model on this dataset.

This Notebook is developed to run in Amazon SageMaker Studio. We can get the sesssion and role and other parameters from the environment using the default values.

In [None]:
sm_session = sm.Session()
sm_client = sm_session.sagemaker_client
s3 = s3fs.S3FileSystem()
region = sm_session.boto_region_name
role = sm.get_execution_role()

All the data will be stored in the default bucket, using the folder name as prefix to organize it.

In [None]:
bucket = sm_session.default_bucket()
prefix = f"{Path().resolve().name}-{datetime.now():%Y%m%d%H%M}"
print(
    f"All files be be stored in \033[93m{bucket}\033[0m\n"
    f"with prefix \033[93m{prefix}\033[0m"
)

We start uploading the training dataset to S3.  
Currently Autopilot supports only tabular datasets in CSV format. Either all files should have a header row, or the first file of the dataset, when sorted in alphabetical/lexical order by name, is expected to have a header row.

In [None]:
train_dataset_uri = f"s3://{bucket}/{prefix}/train.csv"

df_train.to_csv(train_dataset_uri, index=False)

In the definition of the AUtopilot task, we specify the kind of problem, a `BinaryClassification`, and we limit the total number of models considered to 30.

In [None]:
automl_obj = automl.AutoML(
    role=role,
    target_attribute_name=target_name,
    output_path=f"s3://{bucket}/{prefix}/automl-output",
    problem_type="BinaryClassification",
    max_candidates=30,
    job_objective={"MetricName": "AUC", "MetricName": "F1"},
)

We can now launch the Autopilot job by calling the fit method of the AutoML estimator.

In [None]:
automl_obj.fit(inputs=train_dataset_uri, wait=False, logs=False)

SageMaker Autopilot job consists of the following high-level steps :

- Analyzing Data, where the dataset is analyzed and Autopilot comes up with a list of ML pipelines that should be tried out on the dataset. The dataset is also split into train and validation sets.
- Feature Engineering, where Autopilot performs feature transformation on individual features of the dataset as well as at an aggregate level.
- Model Tuning, where the top performing pipeline is selected along with the optimal hyperparameters for the training algorithm (the last stage of the pipeline).

In [None]:
automl_obj.current_job_name

We can use the `describe_auto_ml_job method` to check the status of our SageMaker Autopilot job.

In [None]:
while True:
    status = automl_obj.describe_auto_ml_job()["AutoMLJobStatus"]
    print(status)
    if status != "InProgress":
        break
    time.sleep(60)

## Model candidates
The Autopilot job is completed, and we now have a set of models with their associated performance metric.
Let's consider the top 10.

In [None]:
candidates_list = automl_obj.list_candidates(
    max_results=10, sort_by="FinalObjectiveMetricValue"
)

In [None]:
models = pd.json_normalize(candidates_list)[
    [
        "CandidateName",
        "FinalAutoMLJobObjectiveMetric.Value",
        "FinalAutoMLJobObjectiveMetric.MetricName",
    ]
].rename(
    columns={
        "FinalAutoMLJobObjectiveMetric.Value": "metric_value",
        "FinalAutoMLJobObjectiveMetric.MetricName": "metric_name",
        "CandidateName": "candidate_name",
    }
)
models

## Evaluate Top Candidates
We can start running inference on the top candidates. In SageMaker, you can perform inference in two ways: online endpoint inference or batch transform inference. Lets focus on batch transform inference.

We'll perform batch transform on our top candidates and analyze some custom metrics from our top candidates' prediction results.

In [None]:
top_n_candidates = 5

We'll use the  test dataset we set aside previously, `df_test`. We need to upload this data to S3.   
For Batch prediction jobs, the input data must be without headers, and the order and number of features columns must match that of the training dataset.

In [None]:
test_dataset_uri = f"s3://{bucket}/{prefix}/test.csv"
df_test.drop(columns=target_name).to_csv(test_dataset_uri, index=False, header=False)

In [None]:
def create_transformers(
    candidate: dict, automl_instance: automl.AutoML, s3_transform_output_path, **kwarg
):
    """Create a transformer from a Automl model candidate"""
    model = automl_instance.create_model(
        name=candidate["CandidateName"], candidate=candidate, **kwarg
    )

    output_path = f"{s3_transform_output_path}{candidate['CandidateName']}/"

    return model.transformer(
        instance_count=1,
        instance_type="ml.m5.xlarge",
        assemble_with="Line",
        output_path=output_path,
    )

For classification problem types, the inference containers generated by SageMaker Autopilot allow you to select the response content for predictions. Valid inference response content are defined below for binary classification and multiclass classification problem types.

- 'predicted_label' - predicted class
- 'probability' - In binary classification, the probability that the result is predicted as the second or True class in the target column. In multiclass classification, the probability of the winning class.
- 'labels' - list of all possible classes
- 'probabilities' - list of all probabilities for all classes (order corresponds with 'labels')

By default the inference contianers are configured to generate the 'predicted_label'.

In this example we use ‘predicted_label’ and ‘probability’ to demonstrate how to evaluate the models with custom metrics. 
For the German Credit Score dataset, the second or True class is the value `2`.

In [None]:
inference_response_keys = ["predicted_label", "probability"]

We are now ready to create the transformers

In [None]:
batch_output = f"s3://{bucket}/{prefix}/inference-results/"
transformers_list = [
    create_transformers(
        c, automl_obj, batch_output, inference_response_keys=inference_response_keys
    )
    for c in candidates_list[:top_n_candidates]
]

We can now start the transform jobs.

In [None]:
def batch_predict(transformer, input_dataset_uri):
    """Start a Batch Transform job for a transformer given an input dataset"""
    try:
        transformer.transform(
            data=input_dataset_uri,
            data_type="S3Prefix",
            content_type="text/csv",
            split_type="Line",
            wait=False,
        )
        print(f"Starting transform job {transformer._current_job_name}")
    except Exception as e:
        # catch also exception due to account-level service limits
        print(f"{transformer._current_job_name} failed with error {e}")
        return
    return transformer._current_job_name


# We use this starting time to filter the list of transformation job when we monitoring the progress
start_time = time.time()
batch_predictions_names = [
    batch_predict(t, test_dataset_uri) for t in transformers_list
]

# remove empty entries
batch_predictions_names = [b for b in batch_predictions_names if b is not None]

Now we wait for our transform jobs to finish.

In [None]:
while True:
    job_list = pd.DataFrame(
        sm_client.list_transform_jobs(CreationTimeAfter=start_time)[
            "TransformJobSummaries"
        ]
    )
    num_transform_jobs = (
        job_list[job_list["TransformJobName"].isin(batch_predictions_names)][
            "TransformJobStatus"
        ]
        == "InProgress"
    ).sum()

    print(
        f"{num_transform_jobs} out of {len(batch_predictions_names)} transform jobs are running."
    )
    if num_transform_jobs == 0:
        break
    time.sleep(30)

## Evaluate the Inference Results

Now we analyze our inference results. The batch transform results are stored in S3, we load them into a dictionary, using the model name as key.

In [None]:
predictions_dict = {
    k.model_name: pd.read_csv(k.output_path + "test.csv.out", header=None)
    for k in transformers_list
    if s3.exists(k.output_path + "test.csv.out")
}

In [None]:
pass

Define an array of the ground truth labels for conveninence

In [None]:
labels = df_test[target_name] == target_true_value

We can now calculate two common metrics for classificaitno problems, the *Area Under the Receiver Operating Characteristic Curve*, or `ROC AUC`, and the *Average Precision*, or `AP` from the prediction probabilities, and the test `F1` score from the predicted label.

We also include the `F1` score pulled from the Autopilot candidate description.

In [None]:
models_metrics = pd.DataFrame.from_dict(
    {
        candidate: {
            "AUC": roc_auc_score(labels, prediction[1]),
            "AP": average_precision_score(labels, prediction[1]),
            "F1_test": f1_score(labels, prediction[0] == target_true_value),
        }
        for candidate, prediction in predictions_dict.items()
    },
    orient="index",
)
models_metrics = models_metrics.join(
    models.set_index("candidate_name").metric_value.rename("F1_autopilot")
)
models_metrics

## Plots

In [None]:
roc_curve_dict = {
    candidate: roc_curve(labels, prediction[1])
    for candidate, prediction in predictions_dict.items()
}

plt.figure(num=None, figsize=(16, 9), dpi=160, facecolor="w", edgecolor="k")
[plt.plot(i[0], i[1], label=k) for k, i in roc_curve_dict.items()]
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend(loc="lower right");

In [None]:
precision_recall_dict = {
    candidate: precision_recall_curve(labels, prediction[1])
    for candidate, prediction in predictions_dict.items()
}

plt.figure(num=None, figsize=(16, 9), dpi=160, facecolor="w", edgecolor="k")
[plt.plot(i[1], i[0], label=k) for k, i in precision_recall_dict.items()]
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.legend();

## Bias and Explainability

In [None]:
from sagemaker import clarify

### Bias - Age

In [None]:
clarify_processor = clarify.SageMakerClarifyProcessor(
    role=role,
    instance_count=1,
    instance_type="ml.m5.xlarge",
    sagemaker_session=sm_session,
)

In [None]:
bias_report_output_path = "s3://{}/{}/clarify-bias".format(bucket, prefix)
bias_data_config = clarify.DataConfig(
    s3_data_input_path=train_dataset_uri,
    s3_output_path=bias_report_output_path,
    label=target_name,
    headers=df_train.columns.to_list(),
    dataset_type="text/csv",
)

In [None]:
selected_model_name = candidates_list[0]["CandidateName"]

In [None]:
model_selected = automl_obj.create_model(
    name=selected_model_name,
    candidate=candidates_list[0],
    inference_response_keys=inference_response_keys,
)

In [None]:
model_config = clarify.ModelConfig(
    model_name=selected_model_name,
    instance_type="ml.m5.xlarge",
    instance_count=1,
    accept_type="text/csv",
    content_type="text/csv",
)

In [None]:
predictions_config = clarify.ModelPredictedLabelConfig(label=0)

In [None]:
bias_config = clarify.BiasConfig(
    label_values_or_threshold=[target_true_value],
    facet_name="age",
    facet_values_or_threshold=[35],
    group_name="status_sex",
)

In [None]:
clarify_processor.run_bias(
    data_config=bias_data_config,
    bias_config=bias_config,
    model_config=model_config,
    model_predicted_label_config=predictions_config,
    pre_training_methods="all",
    post_training_methods="all",
)

In [None]:
print(
    f" The bias reports in html, jupyter notebook, and PDF formats is at {bias_report_output_path}"
)

### Prediction Explanation

In [None]:
shap_config = clarify.SHAPConfig(
    baseline=[df_train.drop(columns=target_name).astype(str).iloc[0].values.tolist()],
    num_samples=15,
    agg_method="mean_abs",
    save_local_shap_values=True,
)

explainability_output_path = f"s3://{bucket}/{prefix}/clarify-explainability"
explainability_data_config = clarify.DataConfig(
    s3_data_input_path=train_dataset_uri,
    s3_output_path=explainability_output_path,
    label=target_name,
    headers=df_train.columns.to_list(),
    dataset_type="text/csv",
)

In [None]:
clarify_processor.run_explainability(
    data_config=explainability_data_config,
    model_config=model_config,
    explainability_config=shap_config,
    model_scores=1,
)

In [None]:
local_explanations_out = pd.read_csv(
    explainability_output_path + "/explanations_shap/out.csv"
)
feature_names = [
    str.replace(c, "_label0", "") for c in local_explanations_out.columns.to_series()
]
local_explanations_out.columns = feature_names

selected_example = 111
print(
    "Example number:",
    selected_example,
    "\nwith model prediction:",
    sum(local_explanations_out.iloc[selected_example]) > 0,
)
print("\nFeature values -- Label", df_train.iloc[selected_example])
local_explanations_out.iloc[selected_example].plot(
    kind="bar",
    title="Local explanation for the example number " + str(selected_example),
    rot=90,
)

## Cleanup

Remove all created models

In [None]:
# [sm_client.delete_model(ModelName=k['ModelName']) for k in sm_client.list_models()['Models']]

In [None]:
# [k['ModelName'] for k in sm_client.list_models()['Models']]

Remove all files and artifacts

In [None]:
# s3.rm(f"s3://{bucket}/{prefix}", recursive=True)

In [None]:
# !black-nb .