In [None]:
import boto3
from sagemaker import get_execution_role
import time
from datetime import datetime

In [None]:
comprehend = boto3.client('comprehend')
s3 = boto3.client('s3')
role = get_execution_role()
role

In [None]:
file_path = 'reports.csv'
bucket_name = 'comprehend-classification-datasets'
key = 'reports.csv'

response = s3.upload_file(file_path, bucket_name, key)
response

In [None]:
comprehend_role_arn = 'arn:aws:iam::590184001591:role/comprehend-role'
classifier_response = comprehend.create_document_classifier(
    DocumentClassifierName = 'reports-classifier',
    LanguageCode = 'en',
    InputDataConfig = {
        'S3Uri': 's3://' + bucket_name + '/' + key
    },
    DataAccessRoleArn=comprehend_role_arn
)
classifier_response

In [None]:
classifier_arn = classifier_response['DocumentClassifierArn']
max_time = time.time() + 3*60*60
while time.time() < max_time:
    classifier_description = comprehend.describe_document_classifier(
        DocumentClassifierArn=classifier_arn
    )
    status = classifier_description['DocumentClassifierProperties']['Status']
    current_time = datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
    print('Status: ' + status, current_time)
    
    if status != 'TRAINING' and status != 'SUBMITTED':
        break
        
    time.sleep(60)

In [None]:
endpoint_response = comprehend.create_endpoint(
    EndpointName='report-classification-endpoint',
    ModelArn = classifier_arn,
    DesiredInferenceUnits=1
)
endpoint_response

In [None]:
endpoint_arn = endpoint_response['EndpointArn']

max_time = time.time() + 3*60*60
while time.time() < max_time:
    endpoint_description = comprehend.describe_endpoint(
        EndpointArn=endpoint_arn
    )
    status = endpoint_description['EndpointProperties']['Status']
    current_time = datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
    print('Status: ' + status, current_time)
    
    if status != 'CREATING':
        break
        
    time.sleep(60)