In [9]:
# import libraies 

import pandas as pd 
import json 
import boto3 
import pathlib 
import io 
import sagemaker 

from sagemaker.deserializers import CSVDeserializer 
from sagemaker.serializers import CSVSerializer 


from sagemaker.xgboost.estimator import XGBoost
from sagemaker.sklearn.processing import SKLearnProcessor
from sagemaker.processing import (ProcessingInput,ProcessingOutput,ScriptProcessor)
from sagemaker.inputs import TrainingInput

from sagemaker.workflow.pipeline import Pipeline 
from sagemaker.workflow.steps import (ProcessingStep,TrainingStep,CreateModelStep)
from sagemaker.workflow.check_job_config import CheckJobConfig
from sagemaker.workflow.parameters import (ParameterInteger,ParameterFloat,ParameterString,ParameterBoolean)
from sagemaker.workflow.clarify_check_step import (ModelBiasCheckConfig,ClarifyCheckStep,ModelExplainabilityCheckConfig)
from sagemaker.workflow.step_collections import RegisterModel
from sagemaker.workflow.conditions import ConditionGreaterThanOrEqualTo
from sagemaker.workflow.properties import PropertyFile
from sagemaker.workflow.condition_step import ConditionStep 
from sagemaker.workflow.functions import JsonGet 

from sagemaker.workflow.lambda_step import (LambdaStep,LambdaOutputTypeEnum,LambdaOutput)
from sagemaker.model_metrics import (MetricsSource,ModelMetrics,FileSource)
from sagemaker.drift_check_baselines import DriftCheckBaselines
from sagemaker.image_uris import retrieve

In [None]:
# initiates aws session and client objects 
import sagemaker

# Replace "arn:aws:iam::905418308898:role/YOUR_SAGEMAKER_EXECUTION_ROLE_NAME" with the ARN of your SageMaker execution role

# Create the SageMaker session with the specified execution role ARN
sess = sagemaker.Session()
write_bucket = sess.default_bucket()
write_prefix = "fraud-detect-demo"

region = sess.boto_region_name
s3_client = boto3.client("s3",region_name = region)
sm_client = boto3.client("sagemaker",region_name=region)
sm_runtime_client = boto3.client("sagemaker-runtime")

# fetch sm excutution role 
sagemaker_role = sagemaker.get_execution_role()

#s3 location where raw data to fetch 
read_bucket ="sagemaker-sample-files"
read_prefix ="datasets/tabular/synthetic_autombile_claims"
# data fetch 
raw_data_key = f"s3://{read_bucket}/{read_prefix}"
#data upload 
processed_data_key=f"{write_prefix}/processed"
train_data_key = f"{write_prefix}/train"
validation_data_key=f"{write_prefix}/validation"
test_data_key=f"{write_prefix}/test"

# trai image 
training_image = retrive(framework="xgboost",region=region,version="1.3-1")

# Full S3 paths
claims_data_uri = f"{raw_data_key}/claims.csv"
customers_data_uri = f"{raw_data_key}/customers.csv"
output_data_uri = f"s3://{write_bucket}/{write_prefix}/"
scripts_uri = f"s3://{write_bucket}/{write_prefix}/scripts"
estimator_output_uri = f"s3://{write_bucket}/{write_prefix}/training_jobs"
processing_output_uri = f"s3://{write_bucket}/{write_prefix}/processing_jobs"
model_eval_output_uri = f"s3://{write_bucket}/{write_prefix}/model_eval"
clarify_bias_config_output_uri = f"s3://{write_bucket}/{write_prefix}/model_monitor/bias_config"
clarify_explainability_config_output_uri = f"s3://{write_bucket}/{write_prefix}/model_monitor/explainability_config"
bias_report_output_uri = f"s3://{write_bucket}/{write_prefix}/clarify_output/pipeline/bias"
explainability_report_output_uri = f"s3://{write_bucket}/{write_prefix}/clarify_output/pipeline/explainability"

In [12]:
# set names of pipeline objects 
pipeline_name ="FraudDetectXGBPipeline"
pipeline_model_name="fraud-detect-xgb-pipeline"
model_package_group_name = "fraud-detect-xgb-model-group"
base_job_name_prefix="fraud-detect"
endpoint_config_name =f"{pipeline_model_name}-endpoint-config"
endpoint_name = f"{pipeline_model_name}-endpoint"

#set the parameters 
target_col = "fraud"

#set instance types and counts 
process_instance_type = "ml.c5.xlarge"
train_instance_count = 1
train_instance_type = "ml.m4.xlarge"
predictor_instance_count = 1 
predictor_instance_type="ml.m4.xlarge"
clarify_instance_count = 1
clarify_instance_type ="ml.m4.xlarge"

In [None]:
# Set up pipeline input parameters

# Set processing instance type
process_instance_type_param = ParameterString(
    name="ProcessingInstanceType",
    default_value=process_instance_type,
)

# Set training instance type
train_instance_type_param = ParameterString(
    name="TrainingInstanceType",
    default_value=train_instance_type,
)

# Set training instance count
train_instance_count_param = ParameterInteger(
    name="TrainingInstanceCount",
    default_value=train_instance_count
)

# Set deployment instance type
deploy_instance_type_param = ParameterString(
    name="DeployInstanceType",
    default_value=predictor_instance_type,
)

# Set deployment instance count
deploy_instance_count_param = ParameterInteger(
    name="DeployInstanceCount",
    default_value=predictor_instance_count
)

# Set Clarify check instance type
clarify_instance_type_param = ParameterString(
    name="ClarifyInstanceType",
    default_value=clarify_instance_type,
)

# Set model bias check params
skip_check_model_bias_param = ParameterBoolean(
    name="SkipModelBiasCheck", 
    default_value=False
)

register_new_baseline_model_bias_param = ParameterBoolean(
    name="RegisterNewModelBiasBaseline",
    default_value=False
)

supplied_baseline_constraints_model_bias_param = ParameterString(
    name="ModelBiasSuppliedBaselineConstraints", 
    default_value=""
)

# Set model explainability check params
skip_check_model_explainability_param = ParameterBoolean(
    name="SkipModelExplainabilityCheck", 
    default_value=False
)

register_new_baseline_model_explainability_param = ParameterBoolean(
    name="RegisterNewModelExplainabilityBaseline",
    default_value=False
)

supplied_baseline_constraints_model_explainability_param = ParameterString(
    name="ModelExplainabilitySuppliedBaselineConstraints", 
    default_value=""
)

# Set model approval param
model_approval_status_param = ParameterString(
    name="ModelApprovalStatus", default_value="Approved"
)