# Experiments for service monitoring


# API Test

In [None]:
from pprint import pprint
import boto3

sagemaker_label = 'sagemaker'

sm = boto3.Session().client(sagemaker_label)

## Domain

In [None]:
domains = sm.list_domains()
pprint(domains)

## Service Quotas

In [None]:
# bring your own access key and secret jkey
import secure_config

In [None]:
sq_session = boto3.Session(
    region_name='eu-west-1',
    aws_access_key_id=secure_config.access_key,
    aws_secret_access_key=secure_config.secret_key
)

In [None]:
sq_client = sq_session.client('service-quotas')

In [None]:
print("Services:")
next_page = True
page = 0
parameters = { 
    'MaxResults':99
}
while next_page and page<10:
    response = sq_client.list_services(**parameters)
    #print(f"{page=}")
    #pprint(response)
    for service in response['Services']:
        print(f"{service['ServiceCode']} -> {service['ServiceName']}")
    if 'NextToken' in response:  
        parameters['NextToken'] = response['NextToken']
        page += 1
    else:
        next_page = False

In [None]:
print("Quotas:")
next_page = True
page = 0
parameters = { 
    'ServiceCode':'ec2',
    'MaxResults':99
}
while next_page and page<10:
    response = sq_client.list_aws_default_service_quotas(**parameters)
    print(f"{page=}")
    #pprint(response)
    for quota in response['Quotas']:
        name = quota['QuotaName']
        value = quota['Value']
        unit = quota['Unit'] if quota['Unit'] != 'None' else ""
        print(f"{name}: {value} {unit}")
    if 'NextToken' in response:  
        parameters['NextToken'] = response['NextToken']
        page += 1
    else:
        next_page = False

In [None]:
response = client.get_service_quota(
    ServiceCode='string',
    QuotaCode='string'
)

sagemaker pipeline requires

[ml.trn1.32xlarge, ml.p2.xlarge, ml.m5.4xlarge, ml.m4.16xlarge, ml.p4d.24xlarge, ml.g5.2xlarge, ml.c5n.xlarge, ml.p3.16xlarge, ml.m5.large, ml.p2.16xlarge, ml.g5.4xlarge, ml.c4.2xlarge, ml.c5.2xlarge, ml.c4.4xlarge, ml.g5.8xlarge, ml.c5.4xlarge, ml.c5n.18xlarge, ml.g4dn.xlarge, ml.g4dn.12xlarge, ml.c4.8xlarge, ml.g4dn.2xlarge, ml.c5.9xlarge, ml.g4dn.4xlarge, ml.c5.xlarge, ml.g4dn.16xlarge, ml.c4.xlarge, ml.g4dn.8xlarge, ml.g5.xlarge, ml.c5n.2xlarge, ml.g5.12xlarge, ml.g5.24xlarge, ml.c5n.4xlarge, ml.trn1.2xlarge, ml.c5.18xlarge, ml.p3dn.24xlarge, ml.g5.48xlarge, ml.g5.16xlarge, ml.p3.2xlarge, ml.m5.xlarge, ml.m4.10xlarge, ml.c5n.9xlarge, ml.m5.12xlarge, ml.m4.xlarge, ml.m5.24xlarge, ml.m4.2xlarge, ml.p2.8xlarge, ml.m5.2xlarge, ml.p4de.24xlarge, ml.p3.8xlarge, ml.m4.4xlarge]

# Multiregion

In [None]:
from pprint import pprint
from boto3.session import Session
import botocore

sagemaker_label = 'sagemaker'

session = Session()
sagemaker_regions = session.get_available_regions(sagemaker_label)

pprint(sagemaker_regions)

In [None]:
region_filter = 'eu'
selected_regions = [region for region in sagemaker_regions if region.startswith(region_filter)]

pprint(selected_regions)

In [None]:
sagemaker_clients = { (region,  Session(region_name = region).client(sagemaker_label))
                       for region in selected_regions}

pprint(sagemaker_clients)

## Domains

In [None]:
domain_ref = { }
for region, client in sagemaker_clients:
    # print(f"\n {region=}")
    try:
        response = client.list_domains()
        # pprint(response)
        domains = response['Domains']
        # pprint(domains)
        domain_ids = []
        for domain in domains:
            # pprint(domain)
            domain_id = domain['DomainId']
            domain_name = domain['DomainName']
            domain_status = domain['Status']
            print(f"{region=} {domain_id} {domain_name} {domain_status}")
            domain_ids.append(domain_id)
        if domain_ids:
            domain_ref[region] = domain_ids
    except botocore.exceptions.ClientError as exc:
        # print('skipped')
        pass
    
print("\ndomain_ref")
pprint(domain_ref)

### Domain - User Profiles

API reference
```python
response = client.list_user_profiles(
    NextToken='string',
    MaxResults=123,
    SortOrder='Ascending'|'Descending',
    SortBy='CreationTime'|'LastModifiedTime',
    DomainIdEquals='string',
    UserProfileNameContains='string'
)
```

In [None]:
for region, client in sagemaker_clients:
    if region in domain_ref:
        # print(f"\n {region=}")
        for domain_id in domain_ref[region]:
            # print(f"\n {domain_id=}")
            try:
                response = client.list_user_profiles(
                    DomainIdEquals=domain_id
                )
                # pprint(response)
                user_profiles = response['UserProfiles']
                # pprint(user_profiles)
                for user_profile in user_profiles:
                    # pprint(user_profile)
                    user_profile_name = user_profile['UserProfileName']
                    user_profile_status = user_profile['Status']
                    print(f"{region=} {domain_id} {user_profile_name} {user_profile_status}")
            except botocore.exceptions.ClientError as exc:
                print('skipped')
                pass

### Domain - Spaces

API Reference
```python
response = client.list_spaces(
    NextToken='string',
    MaxResults=123,
    SortOrder='Ascending'|'Descending',
    SortBy='CreationTime'|'LastModifiedTime',
    DomainIdEquals='string',
    SpaceNameContains='string'
)
```

In [None]:
for region, client in sagemaker_clients:
    if region in domain_ref:
        # print(f"\n {region=}")
        for domain_id in domain_ref[region]:
            # print(f"\n {domain_id=}")
            try:
                response = client.list_spaces(
                    DomainIdEquals=domain_id
                )
                # pprint(response)
                spaces= response['Spaces']
                # pprint(user_profiles)
                for space in spaces:
                    # pprint(user_profile)
                    space_name = space['SpaceName']
                    space_status = space['Status']
                    print(f"{region=} {domain_id} {space_name} {space_status}")
            except botocore.exceptions.ClientError as exc:
                print('skipped')
                pass

### Domain - Apps

API Reference
```python
response = client.list_apps(
    NextToken='string',
    MaxResults=123,
    SortOrder='Ascending'|'Descending',
    SortBy='CreationTime',
    DomainIdEquals='string',
    UserProfileNameEquals='string',
    SpaceNameEquals='string'
)
```

In [None]:
for region, client in sagemaker_clients:
    if region in domain_ref:
        # print(f"\n {region=}")
        for domain_id in domain_ref[region]:
            # print(f"\n {domain_id=}")
            try:
                response = client.list_apps(
                    DomainIdEquals=domain_id
                )
                # pprint(response)
                apps = response['Apps']
                # pprint(apps)
                for app in apps:
                    # pprint(user_profile)
                    app_name = app['AppName']
                    app_type = app['AppType']
                    user_profile_name = app['UserProfileName']
                    app_status = app['Status']
                    print(f"{region=} {domain_id} {user_profile_name} {app_type} {app_name} {app_status}")
            except botocore.exceptions.ClientError as exc:
                print('skipped')
                pass

### Domain - Instances

API Reference
```python
response = client.list_apps(
    NextToken='string',
    MaxResults=123,
    SortOrder='Ascending'|'Descending',
    SortBy='CreationTime',
    DomainIdEquals='string',
    UserProfileNameEquals='string',
    SpaceNameEquals='string'
)
```

```python
response = client.describe_app(
    DomainId='string',
    UserProfileName='string',
    AppType='JupyterServer'|'KernelGateway'|'TensorBoard'|'RStudioServerPro'|'RSessionGateway',
    AppName='string',
    SpaceName='string'
)
```

In [None]:
for region, client in sagemaker_clients:
    if region in domain_ref:
        # print(f"\n {region=}")
        for domain_id in domain_ref[region]:
            # print(f"\n {domain_id=}")
            try:
                response = client.list_apps(
                    DomainIdEquals=domain_id
                )
                # pprint(response)
                apps = response['Apps']
                # pprint(apps)
                for app in apps:
                    # pprint(user_profile)
                    app_name = app['AppName']
                    app_type = app['AppType']
                    user_profile_name = app['UserProfileName']
                    app_status = app['Status']
                    response = client.describe_app(
                        DomainId=domain_id,
                        UserProfileName=user_profile_name,
                        AppType=app_type,
                        AppName=app_name
                    )
                    # pprint(response)
                    instance_type = response['ResourceSpec']['InstanceType']
                    print(f"{region=} {domain_id} {user_profile_name} {app_type} {instance_type} {app_status}")
            except botocore.exceptions.ClientError as exc:
                print('skipped')
                pass



### Domain - Conffiguration

esp. instances quotas

API Reference

```python
response = client.describe_domain(
    DomainId='string'
)
```


In [None]:
for region, client in sagemaker_clients:
    if region in domain_ref:
        print(f"\n {region=}")
        for domain_id in domain_ref[region]:
            print(f"\n {domain_id=}")
            try:
                response = response = client.describe_domain(
                    DomainId=domain_id
                )
                pprint(response)
            except botocore.exceptions.ClientError as exc:
                print('skipped')
                pass


### Addtional API for Domain

```python
response = client.describe_domain(
    DomainId='string'
)
```

```python
response = client.describe_user_profile(
    DomainId='string',
    UserProfileName='string'
)
```

```python
response = client.describe_space(
    DomainId='string',
    SpaceName='string'
)
```

```python
response = client.describe_app(
    DomainId='string',
    UserProfileName='string',
    AppType='JupyterServer'|'KernelGateway'|'TensorBoard'|'RStudioServerPro'|'RSessionGateway',
    AppName='string',
    SpaceName='string'
)
```

# References


[SageMaker API Documentation](https://sagemaker.readthedocs.io/en/stable/api/index.html)
[SageMaker Boto3 API Documentation](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html)


import_hub_content()
list_actions()
list_algorithms()
list_aliases()
list_app_image_configs()
list_artifacts()
list_associations()
list_auto_ml_jobs()
list_candidates_for_auto_ml_job()
list_code_repositories()
list_compilation_jobs()
list_contexts()
list_data_quality_job_definitions()
list_device_fleets()
list_devices()
list_domains()
list_edge_deployment_plans()
list_edge_packaging_jobs()
list_endpoint_configs()
list_endpoints()
list_experiments()
list_feature_groups()
list_flow_definitions()
list_hub_content_versions()
list_hub_contents()
list_hubs()
list_human_task_uis()
list_hyper_parameter_tuning_jobs()
list_image_versions()
list_images()
list_inference_experiments()
list_inference_recommendations_job_steps()
list_inference_recommendations_jobs()
list_labeling_jobs()
list_labeling_jobs_for_workteam()
list_lineage_groups()
list_model_bias_job_definitions()
list_model_card_export_jobs()
list_model_card_versions()
list_model_cards()
list_model_explainability_job_definitions()
list_model_metadata()
list_model_package_groups()
list_model_packages()
list_model_quality_job_definitions()
list_models()
list_monitoring_alert_history()
list_monitoring_alerts()
list_monitoring_executions()
list_monitoring_schedules()
list_notebook_instance_lifecycle_configs()
list_notebook_instances()
list_pipeline_execution_steps()
list_pipeline_executions()
list_pipeline_parameters_for_execution()
list_pipelines()
list_processing_jobs()
list_projects()
list_stage_devices()
list_studio_lifecycle_configs()
list_subscribed_workteams()
list_tags()
list_training_jobs()
list_training_jobs_for_hyper_parameter_tuning_job()
list_transform_jobs()
list_trial_components()
list_trials()
list_workforces()
list_workteams()


describe_action()
describe_algorithm()
describe_app_image_config()
describe_artifact()
describe_auto_ml_job()
describe_code_repository()
describe_compilation_job()
describe_context()
describe_data_quality_job_definition()
describe_device()
describe_device_fleet()
describe_edge_deployment_plan()
describe_edge_packaging_job()
describe_endpoint()
describe_endpoint_config()
describe_experiment()
describe_feature_group()
describe_feature_metadata()
describe_flow_definition()
describe_hub()
describe_hub_content()
describe_human_task_ui()
describe_hyper_parameter_tuning_job()
describe_image()
describe_image_version()
describe_inference_experiment()
describe_inference_recommendations_job()
describe_labeling_job()
describe_lineage_group()
describe_model()
describe_model_bias_job_definition()
describe_model_card()
describe_model_card_export_job()
describe_model_explainability_job_definition()
describe_model_package()
describe_model_package_group()
describe_model_quality_job_definition()
describe_monitoring_schedule()
describe_notebook_instance()
describe_notebook_instance_lifecycle_config()
describe_pipeline()
describe_pipeline_definition_for_execution()
describe_pipeline_execution()
describe_processing_job()
describe_project()
describe_studio_lifecycle_config()
describe_subscribed_workteam()
describe_training_job()
describe_transform_job()
describe_trial()
describe_trial_component()
describe_workforce()
describe_workteam()

Client:

this is the original boto3 API abstraction
it provides low-level AWS service access
all AWS service operations are supported by clients
it exposes botocore client to the developer
it typically maps 1:1 with the AWS service API
it exposes snake-cased method names (e.g. ListBuckets API => list_buckets method)
typically yields primitive, non-marshalled data (e.g. DynamoDB attributes are dicts representing primitive DynamoDB values)
requires you to code result pagination
it is generated from an AWS service description

Here's an example of client-level access to an S3 bucket's objects:

import boto3

client = boto3.client('s3')

response = client.list_objects_v2(Bucket='mybucket')

for content in response['Contents']:
    obj_dict = client.get_object(Bucket='mybucket', Key=content['Key'])
    print(content['Key'], obj_dict['LastModified'])
Note: this client-level code is limited to listing at most 1000 objects. You would have to use a paginator, or implement your own loop, calling list_objects_v2() repeatedly with a continuation marker if there were more than 1000 objects.

Resource:

this is the newer boto3 API abstraction
it provides a high-level, object-oriented API
it does not provide 100% API coverage of AWS services
it uses identifiers and attributes
it has actions (operations on resources)
it exposes sub-resources and collections of AWS resources
typically yields marshalled data, not primitive AWS data (e.g. DynamoDB attributes are native Python values representing primitive DynamoDB values)
does result pagination for you
it is generated from an AWS resource description
Here's the equivalent example using resource-level access to an S3 bucket's objects:

import boto3

s3 = boto3.resource('s3')

bucket = s3.Bucket('mybucket')

for obj in bucket.objects.all():
    print(obj.key, obj.last_modified)
Note: in this case you do not have to make a second API call to get the objects; they're available to you as a collection on the bucket. These collections of sub-resources are lazily-loaded.

You can see that the Resource version of the code is much simpler, more compact, and has more capability (for example it does pagination for you and it exposes properties instead of a raw dictionary). The Client version of the code would actually be more complicated than shown above if you wanted to include pagination.

Finally, onto Session which is fundamental to both Client and Resource and how both get access to AWS credentials, for example.