In [3]:
import pandas as pd

## Preprocessing container for stats calculation

In [2]:
import boto3

client = boto3.client('sagemaker')
sec_groups = ["sg-044e0e7ce4f5721c0"]
subnets = ["subnet-0cf0e3f46326aa259",
           "subnet-0156b7f5500cf0b78",
           "subnet-032420199163cff9b"]



In [4]:
response = client.create_processing_job(
    ProcessingInputs=[
    {
        'InputName': 'input_1',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/input/monitoring_dataset_input/poc/AllTraffic/2021/08/11/10',
            'S3Uri': 's3://wipcoe-ml-s3-data-bucket/tempdatacap/',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    }, {
        'InputName': 'baseline',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/baseline/stats',
            'S3Uri': 's3://wipcoe-ml-s3-config-bucket/monitoring/statistics.json',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    }, {
        'InputName': 'constraints',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/baseline/constraints',
            'S3Uri': 's3://wipcoe-ml-s3-config-bucket/monitoring/constraints.json',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    }, {
        'InputName': 'post_processor_script',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/code/postprocessing',
            'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/DEMO-ModelMonitor/code/postprocessor.py',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    },
    ],
    ProcessingOutputConfig={
        'Outputs': [
           {
            'OutputName': 'result',
            'AppManaged': False,
            'S3Output': {
                'LocalPath': '/opt/ml/processing/output',
                'S3Uri': 's3://wipcoe-ml-s3-config-bucket/monitoring/customresult/',
                'S3UploadMode': 'Continuous'
            },
        }
        ],
    },
    ProcessingJobName='model-monitoring-notebook-25',
    ProcessingResources={
        'ClusterConfig': {
            'InstanceCount': 1,
            'InstanceType': 'ml.m5.xlarge',
            'VolumeSizeInGB': 100
        }
    },
    StoppingCondition={
        'MaxRuntimeInSeconds': 3600
    },
    AppSpecification={
        'ImageUri': '156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer'
    },
    Environment={
        'baseline_constraints': '/opt/ml/processing/baseline/constraints/constraints.json',
        'baseline_statistics': '/opt/ml/processing/baseline/stats/statistics.json',
        'dataset_format': '{"sagemakerCaptureJson":{"captureIndexNames":["endpointInput","endpointOutput"]}}',
        'dataset_source': '/opt/ml/processing/input/endpoint',
        "start_time": "2021-08-11T10:00:00Z",
        "end_time": "2021-08-11T11:00:00Z",
        "metric_time": "2021-08-11T10:00:00Z",
        'output_path': '/opt/ml/processing/output',
        'post_analytics_processor_script': '/opt/ml/processing/code/postprocessing/postprocessor.py',
        'publish_cloudwatch_metrics': 'Disabled',
        'sagemaker_endpoint_name': 'poc',
        'sagemaker_monitoring_schedule_name': 'poc'
        
    },
    NetworkConfig= { 
      "EnableInterContainerTrafficEncryption": False,
      "EnableNetworkIsolation": False,
      "VpcConfig": { 
         "SecurityGroupIds":sec_groups,
         "Subnets":subnets
      }
   },
    RoleArn='arn:aws:iam::525102048888:role/wipcoe-pricing-real-time-moni-MonitorSageMakerRole-11WVCBI6XNZO1'
)

In [6]:
response = client.create_processing_job(
    ProcessingInputs=[
    {
        'InputName': 'input_1',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/input/endpoint/poc/AllTraffic/2021/04/18/04',
            'S3Uri': 's3://wi-cred-datalake-dev-raw/capture/wipuat-pricing-ml-training-pipeline-lr-endpoint/AllTraffic/2021/09/19/06/',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    }, {
        'InputName': 'baseline',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/baseline/stats',
            'S3Uri': 's3://wi-cred-datalake-dev-s3-mlops-config/custommonitor/statistics.json',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    }, {
        'InputName': 'constraints',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/baseline/constraints',
            'S3Uri': 's3://wi-cred-datalake-dev-s3-mlops-config/custommonitor/constraints.json',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    }, 
    #{
    #    'InputName': 'pre_processor_script',
    ##    'AppManaged': False,
    #    'S3Input': {
    #        'LocalPath': '/opt/ml/processing/code/preprocessing',
    #       'S3Uri': 's3://wipcoe-ml-s3-config-bucket/monitoring/inferpreprocessingscript.py',
    #        'S3DataDistributionType': 'FullyReplicated',
    #        'S3DataType': 'S3Prefix',
    #        'S3InputMode': 'File'
    #    }
    #},
        {
        'InputName': 'post_processor_script',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/code/postprocessing',
            'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/DEMO-ModelMonitor/code/postprocessor.py',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    },
    ],
    ProcessingOutputConfig={
        'Outputs': [
           {
            'OutputName': 'result',
            'AppManaged': False,
            'S3Output': {
                'LocalPath': '/opt/ml/processing/output',
                'S3Uri': 's3://wi-cred-datalake-dev-s3-mlops-config/custommonitor/customresult/lr/',
                'S3UploadMode': 'Continuous'
            },
        }
        ],
    },
    ProcessingJobName='model-monitoring-poc-18sepLRv2',
    ProcessingResources={
        'ClusterConfig': {
            'InstanceCount': 1,
            'InstanceType': 'ml.m5.xlarge',
            'VolumeSizeInGB': 100
        }
    },
    StoppingCondition={
        'MaxRuntimeInSeconds': 3600
    },
    AppSpecification={
        'ImageUri': '156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer'
    },
    Environment={
        'baseline_constraints': '/opt/ml/processing/baseline/constraints/constraints.json',
        'baseline_statistics': '/opt/ml/processing/baseline/stats/statistics.json',
        'dataset_format': '{"sagemakerCaptureJson":{"captureIndexNames":["endpointInput","endpointOutput"]}}',
        'dataset_source': '/opt/ml/processing/input/endpoint',
        'end_time': '2021-04-18T05:00:00Z',
        'metric_time': '2021-04-18T04:00:00Z',
        'output_path': '/opt/ml/processing/output',
       # 'record_preprocessor_script': '/opt/ml/processing/code/preprocessing/inferpreprocessingscript.py',
        'post_analytics_processor_script': '/opt/ml/processing/code/postprocessing/postprocessor.py',
        'publish_cloudwatch_metrics': 'Disabled',
        'sagemaker_endpoint_name': 'poc',
        'sagemaker_monitoring_schedule_name': 'poc',
        'start_time': '2021-04-18T04:00:00Z'
    },
    RoleArn='arn:aws:iam::525102048888:role/service-role/AmazonSageMaker-ExecutionRole-20191105T125227'
)

SyntaxError: invalid syntax (<ipython-input-6-53664e2b03ac>, line 13)

In [8]:
response = client.create_processing_job(
    ProcessingInputs=[
    {
        'InputName': 'input_1',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/input/endpoint/poc/AllTraffic/2021/04/18/04',
            'S3Uri': 's3://wi-cred-datalake-dev-raw/capture/wipuat-pricing-ml-training-pipeline-xgboost-endpoint/AllTraffic/2021/09/19/06/',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    }, {
        'InputName': 'baseline',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/baseline/stats',
            'S3Uri': 's3://wi-cred-datalake-dev-s3-mlops-config/custommonitor/statistics.json',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    }, {
        'InputName': 'constraints',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/baseline/constraints',
            'S3Uri': 's3://wi-cred-datalake-dev-s3-mlops-config/custommonitor/constraints.json',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    }, 
    #{
    #    'InputName': 'pre_processor_script',
    ##    'AppManaged': False,
    #    'S3Input': {
    #        'LocalPath': '/opt/ml/processing/code/preprocessing',
    #       'S3Uri': 's3://wipcoe-ml-s3-config-bucket/monitoring/inferpreprocessingscript.py',
    #        'S3DataDistributionType': 'FullyReplicated',
    #        'S3DataType': 'S3Prefix',
    #        'S3InputMode': 'File'
    #    }
    #},
        {
        'InputName': 'post_processor_script',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/code/postprocessing',
            'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/DEMO-ModelMonitor/code/postprocessor.py',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    },
    ],
    ProcessingOutputConfig={
        'Outputs': [
           {
            'OutputName': 'result',
            'AppManaged': False,
            'S3Output': {
                'LocalPath': '/opt/ml/processing/output',
                'S3Uri': 's3://wi-cred-datalake-dev-s3-mlops-config/custommonitor/customresult/xg/',
                'S3UploadMode': 'Continuous'
            },
        }
        ],
    },
    ProcessingJobName='model-monitoring-poc-18sepxgv22',
    ProcessingResources={
        'ClusterConfig': {
            'InstanceCount': 1,
            'InstanceType': 'ml.m5.xlarge',
            'VolumeSizeInGB': 100
        }
    },
    StoppingCondition={
        'MaxRuntimeInSeconds': 3600
    },
    AppSpecification={
        'ImageUri': '156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer'
    },
    Environment={
        'baseline_constraints': '/opt/ml/processing/baseline/constraints/constraints.json',
        'baseline_statistics': '/opt/ml/processing/baseline/stats/statistics.json',
        'dataset_format': '{"sagemakerCaptureJson":{"captureIndexNames":["endpointInput","endpointOutput"]}}',
        'dataset_source': '/opt/ml/processing/input/endpoint',
        'end_time': '2021-04-18T05:00:00Z',
        'metric_time': '2021-04-18T04:00:00Z',
        'output_path': '/opt/ml/processing/output',
       # 'record_preprocessor_script': '/opt/ml/processing/code/preprocessing/inferpreprocessingscript.py',
        'post_analytics_processor_script': '/opt/ml/processing/code/postprocessing/postprocessor.py',
        'publish_cloudwatch_metrics': 'Disabled',
        'sagemaker_endpoint_name': 'poc',
        'sagemaker_monitoring_schedule_name': 'poc',
        'start_time': '2021-04-18T04:00:00Z'
    },
    RoleArn='arn:aws:iam::525102048888:role/service-role/AmazonSageMaker-ExecutionRole-20191105T125227'
)

In [9]:
k = {'ProcessingJobArn': 'arn:aws:sagemaker:us-east-1:525102048888:processing-job/model-quality-monitoring-202205250900-899266e8f390c3654ea9f2d6', 'ProcessingJobName': 'model-quality-monitoring-202205250900-899266e8f390c3654ea9f2d6', 'Environment': {'analysis_type': 'MODEL_QUALITY', 'baseline_constraints': '/opt/ml/processing/baseline/constraints/constraints.json', 'dataset_format': '{"sagemakerMergeJson":{"captureIndexNames":["endpointOutput"]}}', 'dataset_source': '/opt/ml/processing/input_data', 'end_time': '2022-05-25T09:00:00Z', 'metric_time': '2022-05-25T08:00:00Z', 'output_path': '/opt/ml/processing/output', 'probability_attribute': '0', 'probability_threshold_attribute': '0.500000', 'problem_type': 'BinaryClassification', 'publish_cloudwatch_metrics': 'Enabled', 'sagemaker_endpoint_name': 'DEMO-xgb-churn-model-quality-monitor-2022-05-25-0520', 'sagemaker_monitoring_schedule_name': 'DEMO-xgb-churn-monitoring-schedule-2022-05-25-0559', 'start_time': '2022-05-25T08:00:00Z'}, 'AppSpecification': {'ImageUri': '156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer', 'ContainerEntrypoint': None, 'ContainerArguments': None}, 'ProcessingInputs': [{'InputName': 'constraints', 'AppManaged': False, 'S3Input': {'LocalPath': '/opt/ml/processing/baseline/constraints', 'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/Churn-ModelQualityMonitor-20201201/baselining/results/constraints.json', 'S3DataDistributionType': 'FullyReplicated', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3CompressionType': None, 'S3DownloadMode': 'StartOfJob'}, 'DatasetDefinition': None}, {'InputName': 'endpoint_input_1', 'AppManaged': False, 'S3Input': {'LocalPath': '/opt/ml/processing/input_data/DEMO-xgb-churn-model-quality-monitor-2022-05-25-0520/AllTraffic/2022/05/25/08', 'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/Churn-ModelQualityMonitor-20201201/baselining/results/merge/DEMO-xgb-churn-model-quality-monitor-2022-05-25-0520/AllTraffic/2022/05/25/08', 'S3DataDistributionType': 'FullyReplicated', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3CompressionType': 'None', 'S3DownloadMode': 'StartOfJob'}, 'DatasetDefinition': None}], 'ProcessingOutputConfig': {'Outputs': [{'OutputName': 'result', 'AppManaged': False, 'S3Output': {'LocalPath': '/opt/ml/processing/output', 'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/Churn-ModelQualityMonitor-20201201/baselining/results/DEMO-xgb-churn-model-quality-monitor-2022-05-25-0520/DEMO-xgb-churn-monitoring-schedule-2022-05-25-0559/2022/05/25/09', 'S3UploadMode': 'Continuous'}, 'FeatureStoreOutput': None}], 'KmsKeyId': None}, 'ProcessingResources': {'ClusterConfig': {'InstanceCount': 1, 'InstanceType': 'ml.m5.xlarge', 'VolumeSizeInGB': 20, 'VolumeKmsKeyId': None}}, 'RoleArn': 'arn:aws:iam::525102048888:role/wipuattt-pricing-real-time-mo-MonitorSageMakerRole-1NTM2IM8BWN6M', 'StoppingCondition': {'MaxRuntimeInSeconds': 1800}}
k

{'ProcessingJobArn': 'arn:aws:sagemaker:us-east-1:525102048888:processing-job/model-quality-monitoring-202205250900-899266e8f390c3654ea9f2d6',
 'ProcessingJobName': 'model-quality-monitoring-202205250900-899266e8f390c3654ea9f2d6',
 'Environment': {'analysis_type': 'MODEL_QUALITY',
  'baseline_constraints': '/opt/ml/processing/baseline/constraints/constraints.json',
  'dataset_format': '{"sagemakerMergeJson":{"captureIndexNames":["endpointOutput"]}}',
  'dataset_source': '/opt/ml/processing/input_data',
  'end_time': '2022-05-25T09:00:00Z',
  'metric_time': '2022-05-25T08:00:00Z',
  'output_path': '/opt/ml/processing/output',
  'probability_attribute': '0',
  'probability_threshold_attribute': '0.500000',
  'problem_type': 'BinaryClassification',
  'publish_cloudwatch_metrics': 'Enabled',
  'sagemaker_endpoint_name': 'DEMO-xgb-churn-model-quality-monitor-2022-05-25-0520',
  'sagemaker_monitoring_schedule_name': 'DEMO-xgb-churn-monitoring-schedule-2022-05-25-0559',
  'start_time': '2022-

In [10]:
bs = {'analysis_type': 'MODEL_QUALITY', 'baseline_constraints': '/opt/ml/processing/baseline/constraints/constraints.json', 'dataset_format': '{"sagemakerMergeJson":{"captureIndexNames":["endpointOutput"]}}', 'dataset_source': '/opt/ml/processing/input_data', 'end_time': '2022-05-25T09:00:00Z', 'metric_time': '2022-05-25T08:00:00Z', 'output_path': '/opt/ml/processing/output', 'probability_attribute': '0', 'probability_threshold_attribute': '0.500000', 'problem_type': 'BinaryClassification', 'publish_cloudwatch_metrics': 'Enabled', 'sagemaker_endpoint_name': 'DEMO-xgb-churn-model-quality-monitor-2022-05-25-0520', 'sagemaker_monitoring_schedule_name': 'DEMO-xgb-churn-monitoring-schedule-2022-05-25-0559', 'start_time': '2022-05-25T08:00:00Z'}

In [13]:
bs

{'analysis_type': 'MODEL_QUALITY',
 'baseline_constraints': '/opt/ml/processing/baseline/constraints/constraints.json',
 'dataset_format': '{"sagemakerMergeJson":{"captureIndexNames":["endpointOutput"]}}',
 'dataset_source': '/opt/ml/processing/input_data',
 'end_time': '2022-05-25T09:00:00Z',
 'metric_time': '2022-05-25T08:00:00Z',
 'output_path': '/opt/ml/processing/output',
 'probability_attribute': '0',
 'probability_threshold_attribute': '0.500000',
 'problem_type': 'BinaryClassification',
 'publish_cloudwatch_metrics': 'Enabled',
 'sagemaker_endpoint_name': 'DEMO-xgb-churn-model-quality-monitor-2022-05-25-0520',
 'sagemaker_monitoring_schedule_name': 'DEMO-xgb-churn-monitoring-schedule-2022-05-25-0559',
 'start_time': '2022-05-25T08:00:00Z'}

# Model Quality Monitoring

In [15]:
# 's3://wi-cred-datalake-dev-raw/vehicle/usedcars/feature/lr/ModelDrift-BaselineOutput'
response = client.create_processing_job(
    ProcessingInputs=[
    {
        'InputName': 'endpoint_input_1',
   'AppManaged': False,
   'S3Input': {'LocalPath': '/opt/ml/processing/input_data/DEMO-xgb-churn-model-quality-monitor-2022-05-25-0520/AllTraffic/2022/05/25/08',
    'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/Churn-ModelQualityMonitor-20201201/baselining/DEMO-xgb-churn-model-quality-monitor-2022-05-25-1232/AllTraffic/2022/05/23/16',
    'S3DataDistributionType': 'FullyReplicated',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
    'S3CompressionType': 'None'}}, 
        {
        'InputName': 'baseline',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/baseline/stats',
            'S3Uri': 's3://wi-cred-datalake-dev-raw/vehicle/usedcars/feature/lr/ModelDrift-BaselineOutput/statistics.json',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    }, {
        'InputName': 'constraints',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/baseline/constraints',
            'S3Uri': 's3://wi-cred-datalake-dev-raw/vehicle/usedcars/feature/lr/ModelDrift-BaselineOutput/constraints.json',
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    }, 
    #{
    #    'InputName': 'pre_processor_script',
    ##    'AppManaged': False,
    #    'S3Input': {
    #        'LocalPath': '/opt/ml/processing/code/preprocessing',
    #       'S3Uri': 's3://wipcoe-ml-s3-config-bucket/monitoring/inferpreprocessingscript.py',
    #        'S3DataDistributionType': 'FullyReplicated',
    #        'S3DataType': 'S3Prefix',
    #        'S3InputMode': 'File'
    #    }
    #},
#         {
#         'InputName': 'post_processor_script',
#         'AppManaged': False,
#         'S3Input': {
#             'LocalPath': '/opt/ml/processing/code/postprocessing',
#             'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/DEMO-ModelMonitor/code/postprocessor.py',
#             'S3DataDistributionType': 'FullyReplicated',
#             'S3DataType': 'S3Prefix',
#             'S3InputMode': 'File'
#         }
#     },
    ],
    
    ProcessingOutputConfig={'Outputs': [{'OutputName': 'result',
    'AppManaged': False,
    'S3Output': {'LocalPath': '/opt/ml/processing/output',
     'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/Churn-ModelQualityMonitor-20201201/baselining',
     'S3UploadMode': 'Continuous'}}]},
    ProcessingJobName='model-monitoring-poc-07777',
    ProcessingResources={
        'ClusterConfig': {'InstanceCount': 1,
   'InstanceType': 'ml.m5.xlarge',
   'VolumeSizeInGB': 20}
    },
    StoppingCondition={
        'MaxRuntimeInSeconds': 3600
    },
    AppSpecification={
        'ImageUri': '156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-analyzer'
    },
    Environment={'analysis_type': 'MODEL_QUALITY',
  'baseline_constraints': '/opt/ml/processing/baseline/constraints/constraints.json',
  'dataset_format': '{"sagemakerMergeJson":{"captureIndexNames":["endpointOutput"]}}',
  'dataset_source': '/opt/ml/processing/input_data',
  'end_time': '2022-05-25T09:00:00Z',
  'metric_time': '2022-05-25T08:00:00Z',
  'output_path': '/opt/ml/processing/output',
  'problem_type': 'Regression',
  'publish_cloudwatch_metrics': 'Enabled',
  'sagemaker_endpoint_name': 'DEMO-xgb-churn-model-quality-monitor-2022-05-25-0520',
  'sagemaker_monitoring_schedule_name': 'DEMO-xgb-churn-monitoring-schedule-2022-05-25-0559',
  'start_time': '2022-05-25T08:00:00Z',
  'inference_attribute': '0'},
    RoleArn='arn:aws:iam::525102048888:role/service-role/AmazonSageMaker-ExecutionRole-20191105T125227'
)

# Ground Truth Merge

In [6]:
gt= {'ProcessingJobArn': 'arn:aws:sagemaker:us-east-1:525102048888:processing-job/groundtruth-merge-202205251400-640ad982460f7b19302909e6', 'ProcessingJobName': 'groundtruth-merge-202205251400-640ad982460f7b19302909e6', 'Environment': {'dataset_source': '/opt/ml/processing/input_data', 'ground_truth_source': '/opt/ml/processing/groundtruth', 'output_path': '/opt/ml/processing/output'}, 'AppSpecification': {'ImageUri': '156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-groundtruth-merger', 'ContainerEntrypoint': None, 'ContainerArguments': None}, 'ProcessingInputs': [{'InputName': 'groundtruth_input_1', 'AppManaged': False, 'S3Input': {'LocalPath': '/opt/ml/processing/groundtruth/2022/05/25/13', 'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/Churn-ModelQualityMonitor-20201201/ground_truth_data/2022-05-25-05-20-25/2022/05/25/13', 'S3DataDistributionType': 'FullyReplicated', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3CompressionType': 'None', 'S3DownloadMode': 'StartOfJob'}, 'DatasetDefinition': None}, {'InputName': 'endpoint_input_1', 'AppManaged': False, 'S3Input': {'LocalPath': '/opt/ml/processing/input_data/DEMO-xgb-churn-model-quality-monitor-2022-05-25-1232/AllTraffic/2022/05/25/13', 'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/Churn-ModelQualityMonitor-20201201/datacapture/DEMO-xgb-churn-model-quality-monitor-2022-05-25-1232/AllTraffic/2022/05/25/13', 'S3DataDistributionType': 'FullyReplicated', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3CompressionType': 'None', 'S3DownloadMode': 'StartOfJob'}, 'DatasetDefinition': None}], 'ProcessingOutputConfig': {'Outputs': [{'OutputName': 'result', 'AppManaged': False, 'S3Output': {'LocalPath': '/opt/ml/processing/output', 'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/Churn-ModelQualityMonitor-20201201/baselining/results/merge', 'S3UploadMode': 'EndOfJob'}, 'FeatureStoreOutput': None}], 'KmsKeyId': None}, 'ProcessingResources': {'ClusterConfig': {'InstanceCount': 1, 'InstanceType': 'ml.m5.xlarge', 'VolumeSizeInGB': 20, 'VolumeKmsKeyId': None}}, 'RoleArn': 'arn:aws:iam::525102048888:role/wipuattt-pricing-real-time-mo-MonitorSageMakerRole-1NTM2IM8BWN6M', 'StoppingCondition': {'MaxRuntimeInSeconds': 1800}}

In [7]:
gt

{'ProcessingJobArn': 'arn:aws:sagemaker:us-east-1:525102048888:processing-job/groundtruth-merge-202205251400-640ad982460f7b19302909e6',
 'ProcessingJobName': 'groundtruth-merge-202205251400-640ad982460f7b19302909e6',
 'Environment': {'dataset_source': '/opt/ml/processing/input_data',
  'ground_truth_source': '/opt/ml/processing/groundtruth',
  'output_path': '/opt/ml/processing/output'},
 'AppSpecification': {'ImageUri': '156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-groundtruth-merger',
  'ContainerEntrypoint': None,
  'ContainerArguments': None},
 'ProcessingInputs': [{'InputName': 'groundtruth_input_1',
   'AppManaged': False,
   'S3Input': {'LocalPath': '/opt/ml/processing/groundtruth/2022/05/25/13',
    'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/Churn-ModelQualityMonitor-20201201/ground_truth_data/2022-05-25-05-20-25/2022/05/25/13',
    'S3DataDistributionType': 'FullyReplicated',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
    'S

In [15]:
response = client.create_processing_job(
    ProcessingInputs=[{'InputName': 'groundtruth_input_1',
      'AppManaged': False,
      'S3Input': {'LocalPath': '/opt/ml/processing/groundtruth/jk/dk/sk/tk', #/2022/05/25/13',
       'S3Uri': 's3://wi-cred-datalake-dev-raw/transformed/monitoring/inbound/currentrun/groundtruth/realtime/ll/2022/06/10/09/', #grountruthjsonl.jsonl',
       'S3DataDistributionType': 'FullyReplicated',
       'S3DataType': 'S3Prefix',
       'S3InputMode': 'File',
       'S3CompressionType': 'None'}},
     {'InputName': 'endpoint_input_1',
      'AppManaged': False,
      'S3Input': {'LocalPath': '/opt/ml/processing/input_data/DEMO/anything/jk/dk/sk/tk',      #2022/05/25/13',
       'S3Uri': 's3://wi-cred-datalake-dev-raw/transformed/monitoring/inbound/realtime/ll/2022/06/10/09/', #run-1654505031251-part-r-00000.jsonl',
       'S3DataDistributionType': 'FullyReplicated',
       'S3DataType': 'S3Prefix',
       'S3InputMode': 'File',
       'S3CompressionType': 'None'}}],
    
    ProcessingOutputConfig={'Outputs': [{'OutputName': 'result',
       'AppManaged': False,
       'S3Output': {'LocalPath': '/opt/ml/processing/output',
        'S3Uri': 's3://ajay-vishwakarma-useast1/config/',
        'S3UploadMode': 'EndOfJob'}}]},
    ProcessingJobName='MM-groundtruth-T27',
    ProcessingResources={
        'ClusterConfig': {'InstanceCount': 1,
   'InstanceType': 'ml.m5.xlarge',
   'VolumeSizeInGB': 20}
    },
    StoppingCondition={
        'MaxRuntimeInSeconds': 3600
    },
    AppSpecification={
        'ImageUri': '156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-groundtruth-merger'
    },
    Environment={'dataset_source': '/opt/ml/processing/input_data',
  'ground_truth_source': '/opt/ml/processing/groundtruth',
  'output_path': '/opt/ml/processing/output'},
    RoleArn='arn:aws:iam::525102048888:role/service-role/AmazonSageMaker-ExecutionRole-20191105T125227'
)

In [10]:
k = {'ProcessingJobArn': 'arn:aws:sagemaker:us-east-1:525102048888:processing-job/groundtruth-merge-202206070500-e79666b37043fc212e07a7c1', 'ProcessingJobName': 'groundtruth-merge-202206070500-e79666b37043fc212e07a7c1', 'Environment': {'dataset_source': '/opt/ml/processing/input/endpoint', 'ground_truth_source': '/opt/ml/processing/groundtruth', 'output_path': '/opt/ml/processing/output'}, 'AppSpecification': {'ImageUri': '156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-groundtruth-merger', 'ContainerEntrypoint': None, 'ContainerArguments': None}, 'ProcessingInputs': [{'InputName': 'groundtruth_input_1', 'AppManaged': False, 'S3Input': {'LocalPath': '/opt/ml/processing/groundtruth/2022/06/07/04', 'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/DEMO-ClarifyModelMonitor-20200901/ground_truth_data/2022-06-07-04-37-00/2022/06/07/04', 'S3DataDistributionType': 'FullyReplicated', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3CompressionType': 'None', 'S3DownloadMode': 'StartOfJob'}, 'DatasetDefinition': None}, {'InputName': 'endpoint_input_1', 'AppManaged': False, 'S3Input': {'LocalPath': '/opt/ml/processing/input/endpoint/DEMO-xgb-churn-model-monitor-2022-06-07-0437/AllTraffic/2022/06/07/04', 'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/DEMO-ClarifyModelMonitor-20200901/datacapture/DEMO-xgb-churn-model-monitor-2022-06-07-0437/AllTraffic/2022/06/07/04', 'S3DataDistributionType': 'FullyReplicated', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3CompressionType': 'None', 'S3DownloadMode': 'StartOfJob'}, 'DatasetDefinition': None}], 'ProcessingOutputConfig': {'Outputs': [{'OutputName': 'result', 'AppManaged': False, 'S3Output': {'LocalPath': '/opt/ml/processing/output', 'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/DEMO-ClarifyModelMonitor-20200901/reports/merge', 'S3UploadMode': 'EndOfJob'}, 'FeatureStoreOutput': None}], 'KmsKeyId': None}, 'ProcessingResources': {'ClusterConfig': {'InstanceCount': 1, 'InstanceType': 'ml.m5.xlarge', 'VolumeSizeInGB': 30, 'VolumeKmsKeyId': None}}, 'RoleArn': 'arn:aws:iam::525102048888:role/wipuattt-pricing-real-time-mo-MonitorSageMakerRole-1NTM2IM8BWN6M', 'StoppingCondition': {'MaxRuntimeInSeconds': 1800}}
k

{'ProcessingJobArn': 'arn:aws:sagemaker:us-east-1:525102048888:processing-job/groundtruth-merge-202206070500-e79666b37043fc212e07a7c1',
 'ProcessingJobName': 'groundtruth-merge-202206070500-e79666b37043fc212e07a7c1',
 'Environment': {'dataset_source': '/opt/ml/processing/input/endpoint',
  'ground_truth_source': '/opt/ml/processing/groundtruth',
  'output_path': '/opt/ml/processing/output'},
 'AppSpecification': {'ImageUri': '156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-groundtruth-merger',
  'ContainerEntrypoint': None,
  'ContainerArguments': None},
 'ProcessingInputs': [{'InputName': 'groundtruth_input_1',
   'AppManaged': False,
   'S3Input': {'LocalPath': '/opt/ml/processing/groundtruth/2022/06/07/04',
    'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/DEMO-ClarifyModelMonitor-20200901/ground_truth_data/2022-06-07-04-37-00/2022/06/07/04',
    'S3DataDistributionType': 'FullyReplicated',
    'S3DataType': 'S3Prefix',
    'S3InputMode': 'File',
   

# trying grountruth merge new job

In [3]:
response = client.create_processing_job(
    ProcessingInputs=[
    {
        'InputName': 'groundtruth_input_1',
        'AppManaged': False,
        'S3Input': {
            'LocalPath': '/opt/ml/processing/groundtruth/gtinput',
            'S3Uri':"s3://wi-cred-datalake-dev-raw/transformed/nlp/monitoring/inbound/currentrun/groundtruth/batch/turi/2022/10/26/10/",
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    }, {
        'InputName': 'endpoint_input_1',
        'AppManaged': False,
        'S3Input': {
            'LocalPath':  "/opt/ml/processing/input/endpoint/poc/AllTraffic/2022/10/26/10/",
            'S3Uri': "s3://wi-cred-datalake-dev-raw/transformed/nlp/monitoring/inbound/batch/turi/2022/10/26/10/",
            'S3DataDistributionType': 'FullyReplicated',
            'S3DataType': 'S3Prefix',
            'S3InputMode': 'File'
        }
    },
    #{
    #    'InputName': 'pre_processor_scriptllllll
    ##    'AppManaged': False,
    #    'S3Input': {
    #        'LocalPath': '/opt/ml/processing/code/preprocessing',
    #       'S3Uri': 's3://wipcoe-ml-s3-config-bucket/monitoring/inferpreprocessingscript.py',
    #        'S3DataDistributionType': 'FullyReplicated',
    #        'S3DataType': 'S3Prefix',
    #        'S3InputMode': 'File'
    #    }
    #},
#         {
#         'InputName': 'post_processor_script',
#         'AppManaged': False,
#         'S3Input': {
#             'LocalPath': '/opt/ml/processing/code/postprocessing',
#             'S3Uri': 's3://sagemaker-us-east-1-525102048888/sagemaker/DEMO-ModelMonitor/code/postprocessor.py',
#             'S3DataDistributionType': 'FullyReplicated',
#             'S3DataType': 'S3Prefix',
#             'S3InputMode': 'File'
#         }
#     },
    ],
    ProcessingOutputConfig={
        'Outputs': [
           {
            'OutputName': 'result',
            'AppManaged': False,
            'S3Output': {
                'LocalPath': '/opt/ml/processing/output',
                'S3Uri':"s3://wi-cred-datalake-dev-raw/transformed/monitoring/inbound/groundtruthmerge/batch/turi",
                'S3UploadMode': 'EndOfJob'
            },
        }
        ],
    },
    ProcessingJobName='NLP-GT-New-T1',
    ProcessingResources={
        'ClusterConfig': {
            'InstanceCount': 1,
            'InstanceType': 'ml.m5.xlarge',
            'VolumeSizeInGB': 100
        }
    },
    StoppingCondition={
        'MaxRuntimeInSeconds': 3600
    },
    AppSpecification={
        'ImageUri': '156813124566.dkr.ecr.us-east-1.amazonaws.com/sagemaker-model-monitor-groundtruth-merger'
    },
    Environment={'dataset_source': '/opt/ml/processing/input/endpoint',
  'ground_truth_source': '/opt/ml/processing/groundtruth',
  'output_path': '/opt/ml/processing/output',
 "start_time": "2022-10-26T10:00:00Z",
 "end_time": "2022-10-26T11:00:00Z"
    },
    RoleArn='arn:aws:iam::525102048888:role/wipuattt-pricing-real-time-mo-MonitorSageMakerRole-1NTM2IM8BWN6M'
)