# Setup Comprehend Through CLI/API:
https://docs.aws.amazon.com/comprehend/latest/dg/get-started-customclass.html

# Setup Comprehend Through AWS Console 

https://docs.aws.amazon.com/comprehend/latest/dg/getting-started-document-classification.html

Good example of using Comprehend for Positive/Negative Sentiment:  https://github.com/aws-samples/amazon-comprehend-custom-entity/blob/master/3-AWS-Comprehend-Negative-Custom-Classifier.ipynb

# Make sure this SageMakerNotebookExecutionRole has access to Comprehend

In [None]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

In [None]:
comprehend = boto3.client('comprehend')

In [None]:
%store -r noheader_train_s3_uri

print(noheader_train_s3_uri)

In [None]:
!aws s3 ls $noheader_train_s3_uri

# Setup IAM Roles and Policies
TODO:  Fix this.

In [None]:
print(role)

In [None]:
# s3_policy = """
# {
#     "Version": "2012-10-17",
#     "Statement": [
#         {
#             "Action": [
#                 "s3:GetObject"
#             ],
#             "Resource": [
#                 "arn:aws:s3:::{}/*"
#             ],
#             "Effect": "Allow"
#         },
#         {
#             "Action": [
#                 "s3:ListBucket"
#             ],
#             "Resource": [
#                 "arn:aws:s3:::{}"
#             ],
#             "Effect": "Allow"
#         },
#         {
#             "Action": [
#                 "s3:PutObject"
#             ],
#             "Resource": [
#                 "arn:aws:s3:::{}/*"
#             ],
#             "Effect": "Allow"
#         }
#     ]
# }
# """.format(bucket, bucket, bucket)

# print(s3_policy)


In [None]:
# comprehend_policy = """
# {
#     "Version": "2012-10-17",
#     "Statement": [
#         {
#             "Action": [
#                 "comprehend:DetectDominantLanguage",
#                 "comprehend:BatchDetectDominantLanguage",
#                 "comprehend:DetectEntities",
#                 "comprehend:BatchDetectEntities",
#                 "comprehend:DetectKeyPhrases",
#                 "comprehend:BatchDetectKeyPhrases",
#                 "comprehend:DetectSentiment",
#                 "comprehend:BatchDetectSentiment",
#                 "comprehend:DetectSyntax",
#                 "comprehend:BatchDetectSyntax",
#                 "comprehend:ClassifyDocument",
#                 "comprehend:DescribeTopicsDetectionJob",
#                 "comprehend:ListTopicsDetectionJobs",
#                 "comprehend:DescribeDominantLanguageDetectionJob",
#                 "comprehend:ListDominantLanguageDetectionJobs",
#                 "comprehend:DescribeEntitiesDetectionJob",
#                 "comprehend:ListEntitiesDetectionJobs",
#                 "comprehend:DescribeKeyPhrasesDetectionJob",
#                 "comprehend:ListKeyPhrasesDetectionJobs",
#                 "comprehend:DescribeSentimentDetectionJob",
#                 "comprehend:ListSentimentDetectionJobs",
#                 "comprehend:DescribeDocumentClassifier",
#                 "comprehend:ListDocumentClassifiers",
#                 "comprehend:DescribeDocumentClassificationJob",
#                 "comprehend:ListDocumentClassificationJobs",
#                 "comprehend:DescribeEntityRecognizer",
#                 "comprehend:ListEntityRecognizers",
#                 "comprehend:ListTagsForResource",
#                 "comprehend:DescribeEndpoint",
#                 "comprehend:ListEndpoints"
#             ],
#             "Effect": "Allow",
#             "Resource": "*"
#         }
#     ]
# }
# """

# print(comprehend_policy)

# Train our Model

In [None]:
prefix = 'models'

s3_output_job = 's3://{}/{}/{}'.format(bucket, prefix, 'comprehend/output')
print(s3_output_job)

In [None]:
import datetime

id = str(datetime.datetime.now().strftime("%s"))

training_job = comprehend.create_document_classifier(
    DocumentClassifierName='Amazon-Customer-Reviews-Classifier-'+ id,
    DataAccessRoleArn=role,
    InputDataConfig={
        'S3Uri': noheader_train_s3_uri
    },
    OutputDataConfig={
        'S3Uri': s3_output_job
    },
    LanguageCode='en'
)

In [None]:
jobArn = training_job['DocumentClassifierArn']

max_time = time.time() + 3 * 60 * 60 # 3 hours
while time.time() < max_time:
    describe_custom_classifier = comprehend.describe_document_classifier(
        DocumentClassifierArn = jobArn
    )
    status = describe_custom_classifier["DocumentClassifierProperties"]["Status"]
    print("Custom classifier: {}".format(status))
    
    if status == "TRAINED" or status == "IN_ERROR":
        break
        
    time.sleep(5)

# Get Endpoint

In [None]:
#endpoint_arn = inference_endpoint_response["EndpointArn"]

# TODO:  get account_id
#account_id = sess.get_account??

endpoint_arn = 'arn:aws:comprehend:{}:{}:document-classifier-endpoint/reviews-star-rating'.format(region, account_id)


In [None]:
describe_response = comprehend.describe_endpoint(
    EndpointArn = endpoint_arn
)

In [None]:
import time

max_time = time.time() + 3*60*60 # 3 hours
while time.time() < max_time:
    describe_response = comprehend.describe_endpoint(
        EndpointArn = endpoint_arn
    )
    status = describe_response["EndpointProperties"]["Status"]
    print("Endpoint: {}".format(status))
    
    if status == "IN_SERVICE" or status == "IN_ERROR":
        break

In [None]:
txt = "This product is awesome."

response = comprehend.classify_document(
    Text= txt,
    EndpointArn = endpoint_arn
)

import json
print(json.dumps(response, indent=2, default=str))

In [None]:
txt = "This product is ok."

response = comprehend.classify_document(
    Text= txt,
    EndpointArn = endpoint_arn
)

import json
print(json.dumps(response, indent=2, default=str))

In [None]:
txt = "This product is terrible."

response = comprehend.classify_document(
    Text= txt,
    EndpointArn = endpoint_arn
)

import json
print(json.dumps(response, indent=2, default=str))