# Predicting Credit Card Fraud

In this activity, you will gain hands-on experience in deploying a machine learning model into the cloud to predict whether or not a credit card transaction is fraudulent.

## Instructions



In [1]:
# Initial imports
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

### Load the Data into Pandas

> **Note:** Since the features are all numerical, there is no need to perform any data encoding tasks.

In [2]:
# Load the CSV data into a DataFrame
# YOUR CODE HERE!

# Display sample data
# YOUR CODE HERE!

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,24803.0,1.139012,0.017618,1.268095,1.158486,-0.946785,-0.526911,-0.450542,-0.169922,1.91078,...,-0.042502,0.359293,-0.084736,0.75118,0.454871,0.445367,-0.018297,0.022022,29.99,0
1,3745.0,-0.873846,0.244037,2.00592,-1.767428,-0.131642,-0.54834,0.010448,0.058545,-0.243056,...,-0.189932,-0.542487,-0.121148,-0.11388,0.304427,-0.582626,0.223403,0.109915,25.74,0
2,16550.0,0.942873,0.217302,1.567674,2.692986,0.149267,2.244082,-0.93481,0.639751,1.264804,...,-0.108693,0.277727,0.175825,-1.014114,-0.015221,0.04444,0.092609,0.016055,0.0,0
3,1606.0,1.239456,0.225786,0.404418,0.422563,-0.214612,-0.464966,-0.011435,-0.067165,-0.27119,...,-0.221057,-0.650838,0.075829,0.006808,0.24913,0.095922,-0.03006,0.006747,2.69,0
4,20780.0,1.210441,-0.395225,0.842773,-0.638103,-0.866254,-0.120016,-0.798781,0.097261,3.265186,...,-0.162858,-0.004778,-0.037364,-0.347863,0.417151,-0.671253,0.074868,0.021739,11.85,0


## Preprocess Data

### Create the features and rarget sets

The features set will be all the columns from the original DataFrame except the `Class` column that constitutes the target set.

In [3]:
# Creating the features set X
# YOUR CODE HERE!

# Display sample data
# YOUR CODE HERE!

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
0,24803.0,1.139012,0.017618,1.268095,1.158486,-0.946785,-0.526911,-0.450542,-0.169922,1.91078,...,-0.044323,-0.042502,0.359293,-0.084736,0.75118,0.454871,0.445367,-0.018297,0.022022,29.99
1,3745.0,-0.873846,0.244037,2.00592,-1.767428,-0.131642,-0.54834,0.010448,0.058545,-0.243056,...,0.275532,-0.189932,-0.542487,-0.121148,-0.11388,0.304427,-0.582626,0.223403,0.109915,25.74
2,16550.0,0.942873,0.217302,1.567674,2.692986,0.149267,2.244082,-0.93481,0.639751,1.264804,...,-0.296585,-0.108693,0.277727,0.175825,-1.014114,-0.015221,0.04444,0.092609,0.016055,0.0
3,1606.0,1.239456,0.225786,0.404418,0.422563,-0.214612,-0.464966,-0.011435,-0.067165,-0.27119,...,-0.036046,-0.221057,-0.650838,0.075829,0.006808,0.24913,0.095922,-0.03006,0.006747,2.69
4,20780.0,1.210441,-0.395225,0.842773,-0.638103,-0.866254,-0.120016,-0.798781,0.097261,3.265186,...,-0.225921,-0.162858,-0.004778,-0.037364,-0.347863,0.417151,-0.671253,0.074868,0.021739,11.85


In [4]:
# Creating the target set y
# YOUR CODE HERE!

# Display sample data
# YOUR CODE HERE!

0    0
1    0
2    0
3    0
4    0
Name: Class, dtype: int64

### Split the features and target sets into training and testing datasets

In [5]:
# Split the preprocessed data into training and testing datasets
# YOUR CODE HERE!

### Use the Scikit-Learn’s `StandardScaler` to scale the features data

In [6]:
# Create a StandardScaler instance
# YOUR CODE HERE!

# Fit the StandardScaler
# YOUR CODE HERE!

# Scale the data
# YOUR CODE HERE!

## Create a Machine Learning Model in SageMaker Studio

### Importing the required libraries

In [7]:
# Import Amazon SageMaker libraries and modules
import sagemaker
import sagemaker.amazon.common as smac
from sagemaker import get_execution_role
from sagemaker.predictor import csv_serializer, json_deserializer

# Import AWS Python SDK
import boto3

# Import support libraries
import io
import os
import json
import numpy as np

### Configure general settings for the SageMaker model

In [8]:
# Set the S3 bucket name
bucket = # YOUR CODE HERE!

In [9]:
# Set a prefix for the data files
prefix = # YOUR CODE HERE!

In [10]:
# Set the IAM execution role
role = # YOUR CODE HERE!

### Upload the training and testing data to Amazon S3

#### Encode and upload the training data

In [11]:
# Encode the training data as Protocol Buffer
buf = io.BytesIO()
vectors = np.array(X_train).astype("float32")
labels = np.array(y_train).astype("float32")
smac.write_numpy_to_dense_tensor(buf, vectors, labels)
buf.seek(0)

# Upload encoded training data to Amazon S3
key = 'linear_train.data'
boto3.resource("s3").Bucket(bucket).Object(os.path.join(prefix, "train", key)).upload_fileobj(buf)
s3_train_data = "s3://{}/{}/train/{}".format(bucket, prefix, key)
print("Training data uploaded to: {}".format(s3_train_data))

Training data uploaded to: s3://fintech-bootcamp-activities-jams-2021-02-11/credit-card-risk/train/linear_train.data


#### Encode and upload the testing data

In [12]:
# Encode the testing data as Protocol Buffer
buf = io.BytesIO()
vectors = np.array(X_test).astype("float32")
labels = np.array(y_test).astype("float32")
smac.write_numpy_to_dense_tensor(buf, vectors, labels)
buf.seek(0)

# Upload encoded testing data to Amazon S3
key = "linear_test.data"
boto3.resource("s3").Bucket(bucket).Object(os.path.join(prefix, "test", key)).upload_fileobj(buf)
s3_test_data = "s3://{}/{}/test/{}".format(bucket, prefix, key)
print("Testing data uploaded to: {}".format(s3_test_data))

Testing data uploaded to: s3://fintech-bootcamp-activities-jams-2021-02-11/credit-card-risk/test/linear_test.data


### Specify the Amazon SageMaker session to use

In [13]:
# Save the current session in a variable
sess = # YOUR CODE HERE!

### Create an instance of the machine learning model

In [14]:
# Import the get_image_uri module from the sagemaker library
from sagemaker.amazon.amazon_estimator import get_image_uri

In [15]:
# Import the container image
# YOUR CODE HERE!")

The method get_image_uri has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
Defaulting to the only supported framework/algorithm version: 1. Ignoring framework/algorithm version: 1.


In [16]:
# Create an instance of the machine learning model
# YOUR CODE HERE!

train_instance_count has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
train_instance_type has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


### Define Linear Learner hyperparameters

In [17]:
# Get the dimension of the feature-input vector
# YOUR CODE HERE!

In [18]:
# Define linear learner hyperparameters
# YOUR CODE HERE!

## Fit the Machine Learning Model in SageMaker Studio

Use the fit function of the model to train it using the train and testing data stored in the Amazon S3 bucket.

In [19]:
# Fitting the linear learner model
# YOUR CODE HERE!

2021-02-24 18:40:09 Starting - Starting the training job...
2021-02-24 18:40:32 Starting - Launching requested ML instancesProfilerReport-1614192008: InProgress
......
2021-02-24 18:41:33 Starting - Preparing the instances for training.........
2021-02-24 18:42:54 Downloading - Downloading input data...
2021-02-24 18:43:38 Training - Training image download completed. Training in progress..[34mDocker entrypoint called with argument(s): train[0m
[34mRunning default environment configuration script[0m
[34m[02/24/2021 18:43:44 INFO 139778145212224] Reading default configuration from /opt/amazon/lib/python2.7/site-packages/algorithm/resources/default-input.json: {u'loss_insensitivity': u'0.01', u'epochs': u'15', u'feature_dim': u'auto', u'init_bias': u'0.0', u'lr_scheduler_factor': u'auto', u'num_calibration_samples': u'10000000', u'accuracy_top_k': u'3', u'_num_kv_servers': u'auto', u'use_bias': u'true', u'num_point_for_scaler': u'10000', u'_log_level': u'info', u'quantile': u'0.5', 

## Make Predictions With the Model in SageMaker Studio

### Deploy the model

In [20]:
# Deploy an instance of the linear learner model to create a predictor
# YOUR CODE HERE!

-----------------!

### Setting configurations for the predictor

In [21]:
# Linear predictor configurations
linear_predictor.serializer = csv_serializer
linear_predictor.deserializer = json_deserializer

### Make Predictions Using Testing Data

#### Use the `predict` function of the predictor to make predictions using the testing data stored in Pandas

In [22]:
# Making some predictions using the test data
# YOUR CODE HERE!

The csv_serializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
The json_deserializer has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.


In [23]:
# Display sample predictions
# YOUR CODE HERE!

[{'score': 0.0006074642296880484, 'predicted_label': 0},
 {'score': 0.0003968806122429669, 'predicted_label': 0},
 {'score': 0.0009853204246610403, 'predicted_label': 0}]

#### Creating a list of the predicted values

In [24]:
# Create a list with the predicted values
y_predictions = [np.uint8(value["predicted_label"]) for value in model_predictions["predictions"]]

# Transforming the list into an array
y_predictions = np.array(y_predictions)

# Display sample data
y_predictions[:10]

array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0], dtype=uint8)

## Evaluate the Machine Model

Use the `classification_report` module from Scikit-learn to assess the performance of the model to predict fraudulent credit card transactions.

In [25]:
# Import the classification report from Scikit-learn
from sklearn.metrics import classification_report

In [26]:
# Display classification report
# YOUR CODE HERE!

Classification report
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      4877
           1       0.56      0.17      0.25       121

    accuracy                           0.98      4998
   macro avg       0.77      0.58      0.62      4998
weighted avg       0.97      0.98      0.97      4998



## Delete the End-Point to Avoid Additional AWS Resources Usage and Billing

Make sure that you delete all the Amazon SageMaker endpoints to prevent unwanted charges.

In [27]:
# Delete Amazon SageMaker endpoint
sagemaker.Session().delete_endpoint(linear_predictor.endpoint)

The endpoint attribute has been renamed in sagemaker>=2.
See: https://sagemaker.readthedocs.io/en/stable/v2.html for details.
