# Example using the Amazon Fraud Detector SDK for Python
Demonstration of how the SDK for Python simplifies using the AWS Fraud Detector service.

In [None]:
# Set authentiation to AWS via temporary access tokens
import json
import os
from getpass import getpass

*Either* setup authentication to AWS cloud environment before starting this notebook, *or* use the next section to enter access keys and tokens for AWS cloud account to run this in.

In [None]:
# Optional section to set access keys and tokens for AWS cloud account access:
ACCESS_KEY = getpass("Enter the AWS Access Key:")
SECRET_KEY =  getpass("Enter the AWS Secret Key:")
SESSION_TOKEN = getpass("Enter the AWS Session Token to use:")

os.environ['AWS_ACCESS_KEY_ID'] = ACCESS_KEY
os.environ['AWS_SECRET_ACCESS_KEY'] = SECRET_KEY
os.environ['AWS_SESSION_TOKEN'] = SESSION_TOKEN

In [None]:
# set the import path relative to the notebook examples location
import sys
sys.path.append("../")

In [None]:
!pip install frauddetector

### Amazon Fraud Detector SDK for Python Imports

In [None]:
import pandas as pd
from frauddetector import frauddetector, profiler

### Global variables

In [None]:
# Training
INPUT_BUCKET = "YOUR_S3_BUCKET_FOR_TRAINING"
DETECTOR_NAME = "YOUR_DETECTOR_NAME"
MODEL_NAME = "YOUR_MODEL_NAME"
ENTITY_TYPE = "YOUR_ENTITY_TYPE" # e.g. "transaction"
EVENT_TYPE = "YOUR_EVENT_TYPE" # e.g. "credit-card-transaction"
MODEL_TYPE = "ONLINE_FRAUD_INSIGHTS"
MODEL_VERSION = "1" # leave this as one if you start right at the beginning
DETECTOR_VERSION = "1" # leave this as one if you start right at the beginning
REGION = "THE_REGION"

# Profile Data with `Profiler()`; generate Data Schema, Variables Definition, Label Definitions
A representative set of data needs to be loaded into a Pandas data-frame for the profiler to generate the FraudDetector definitions from.

In [None]:
profiler = profiler.Profiler()
df = pd.read_csv("training_data/registration_data_20K_minimum.csv")
data_schema, variables, labels = profiler.get_frauddetector_inputs(data=df)

In [None]:
variables

In [None]:
labels

# Instantiate a `FraudDetector()` with variables and labels

In [None]:
detector = frauddetector.FraudDetector(
    entity_type=ENTITY_TYPE,
    event_type=EVENT_TYPE,
    detector_name=DETECTOR_NAME,
    model_name=MODEL_NAME,
    model_version=MODEL_VERSION,
    model_type=MODEL_TYPE, 
    region=REGION,
    detector_version=DETECTOR_VERSION)


# Train a Model - `fit()` stage
Supply an IAM role ARN as well as the training data location in S3.  The role should have AmazonFraudDetectorFullAccessPolicy attached to it.

In [None]:
# https://docs.aws.amazon.com/frauddetector/latest/ug/security-iam.html
role_arn="arn:aws:iam::9999999999:role/MyFraudDetectorRole"

In [None]:
detector.fit(
    data_schema=data_schema,
    data_location="s3://<my-s3-bucket>/training/registration_data_20K_minimum.csv"
    , role=role_arn,
    variables=variables,
    labels=labels)

# Create a Detector - `activate()` stage
Provide a list of outcomes to create an active model associted with FraudDetector "outcomes" ready for Amazon Fraud Detector rules to be associated with the outcomes.

In [None]:
# get the model status - should be TRAINING_COMPLETE before starting compile stage.
print(detector.model_status)

In [None]:
print(detector.model_version)

In [None]:
outcomes = [
    ("review_outcome", "Start a review process workflow"),
    ("verify_outcome", "Sideline event for review"),
    ("approve_outcome", "Approve the event")
]

In [None]:
detector.activate(outcomes_list=outcomes)

In [None]:
# get the model status - should be ACTIVE after compile stage (will be ACTIVATE_IN_PROGRESS for some time)
print(detector.model_status)

# Deploy a Fraud Detector - `deploy()` stage
Provide a list of rules that map to rule-outcomes to deploy the detector-version ready for fraud predicition actions.
Each rule item in the list is a dictionary of
```
{
    'ruleId': 'name_of_rule',
    'expression': 'rule_expression_for_evaluating_rule', 
    'outcomes': [list_of, outcomes_for, matching_rule]
}
```
See documentation here: https://docs.aws.amazon.com/frauddetector/latest/ug/rule-language-reference.html

In [None]:
print(detector.model_name)

The model name with `_insightscore` appended to it is used to reference the output value from the model

In [None]:
rules = [{
        'ruleId': 'high_fraud_risk',
        'expression': '$registration_model_insightscore > 900',
        'outcomes': ['verify_outcome']
    },
    {
        'ruleId': 'low_fraud_risk',
        'expression': '$registration_model_insightscore <= 900 and $registration_model_insightscore > 700',
        'outcomes': ['review_outcome']
    },
    {
        'ruleId': 'no_fraud_risk',
        'expression': '$registration_model_insightscore <= 700',
        'outcomes': ['approve_outcome']
    }
]

In [None]:
response = detector.deploy(rules_list=rules)

In [None]:
# Optional
print(response)

# Get Predictions - `predict()` stage
Use the `predict()` or `batch_predict()` methods to predict for a single event, passed in as a dictionary, or a batch of events passed in as a dataframe.

**Example** Single event that triggers the `no_fraud_risk` rule resulting in an `approve_outcome`

In [None]:
event_variables = {
    'email_address' : 'johndoe@gmail.com',
    'ip_address' : '82:24:61:42'
}
detector.predict(
    event_timestamp='2021-11-14T12:18:21Z',
    event_variables=event_variables)

#### Batch predict with list of events

In [None]:
events = [{
        'EVENT_TIMESTAMP': '2021-11-14T12:18:21Z',
        'email_address' : 'johndoe@gmail.com',
        'ip_address' : '82:24:61:42'
    },
    {
        'EVENT_TIMESTAMP': '2021-11-15T11:18:20Z',
        'email_address' : 'janedoe@yahoo.com',
        'ip_address' : '82:24:61:41'
    }
]
detector.batch_predict(
    timestamp='EVENT_TIMESTAMP',
    events=events)

#### Batch predict with Pandas DataFrame

In [None]:
events = df.iloc[:100, :] # First 100 observations from the example DataFrame
detector.batch_predict(
    timestamp='EVENT_TIMESTAMP',
    df=events)

# Destroy resources

In [None]:
detector.delete_detector()

In [None]:
detector.rules

In [None]:
rule_ids = [r['ruleId'] for r in detector.rules]

In [None]:
print(rule_ids)

In [None]:
detector.delete_rules(detector.rules)

In [None]:
detector.rules

In [None]:
detector.delete_detector_version()

In [None]:
#de-activate the model
detector.set_model_version_inactive()

In [None]:
print(detector.model_name)

In [None]:
# get the model status - should be INACTIVE or TRAINING_COMPLETE before deleting it
print(detector.model_status)

In [None]:
#delete the model
detector.delete_model()

In [None]:
detector.delete_detector_version()

In [None]:
detector.fd.delete_detector_version(detectorId=detector.detector_name, detectorVersionId=detector.detector_version)