# Make sure this SageMakerNotebookExecutionRole has access to Kendra

In [None]:
import boto3
import sagemaker
import pandas as pd

sess   = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name='sagemaker', region_name=region)

In [None]:
kendra = boto3.client('kendra')

In [None]:
%store -r noheader_train_s3_uri

print(noheader_train_s3_uri)

In [None]:
!aws s3 ls $noheader_train_s3_uri

# Create Data Access Role for Kendra

## Create Policy

In [None]:
# assume_role_policy_doc = {
#   "Version": "2012-10-17",
#   "Statement": [
#     {
#       "Effect": "Allow",
#       "Principal": {
#         "Service": "kendra.amazonaws.com"
#       },
#       "Action": "sts:AssumeRole"
#     }
#   ]
# } 

## Create Role and Attach Policies

In [None]:
# iam_kendra_role_name = 'DSOAWS_Kendra'

In [None]:
# import json
# import boto3
# from botocore.exceptions import ClientError

# try:
#     iam = boto3.client('iam')

#     iam_role_kendra = iam.create_role(
#         RoleName=iam_kendra_role_name,
#         AssumeRolePolicyDocument=json.dumps(assume_role_policy_doc),
#         Description='DSOAWS Kendra Role'
#     )
# except ClientError as e:
#     if e.response['Error']['Code'] == 'EntityAlreadyExists':
#         iam_role_comprehend = iam.get_role(RoleName=iam_comprehend_role_name)
#         print("Role already exists")
#     else:
#         print("Unexpected error: %s" % e)

In [None]:
# kendra_s3_policy_doc = {
#     "Version": "2012-10-17",
#     "Statement": [
#         {
#             "Action": [
#                 "s3:GetObject"
#             ],
#             "Resource": [
#                 "arn:aws:s3:::{}/*".format(bucket)
#             ],
#             "Effect": "Allow"
#         },
#         {
#             "Action": [
#                 "s3:ListBucket"
#             ],
#             "Resource": [
#                 "arn:aws:s3:::{}".format(bucket)
#             ],
#             "Effect": "Allow"
#         },
#         {
#             "Action": [
#                 "s3:PutObject"
#             ],
#             "Resource": [
#                 "arn:aws:s3:::{}/*".format(bucket)
#             ],
#             "Effect": "Allow"
#         }
#     ]
# }

# print(kendra_s3_policy_doc)


# Attach Policy to Role

In [None]:
# response = iam.put_role_policy(
#     RoleName=iam_kendra_role_name,
#     PolicyName='DSOAWS_KendraPolicyToS3',
#     PolicyDocument=json.dumps(kendra_s3_policy_doc)
# )

# Add S3 Data Source

In [None]:
prefix = 'kendra'

s3_output_job = 's3://{}/{}/{}'.format(bucket, prefix, 'output')
print(s3_output_job)

In [None]:
print("Create an S3 data source")
name = 'amazon-reviews'
description = 'amazon-reviews'

index_id = 'e9d93f01-5fd3-46ba-bc73-41fae0185d3a'

kendra_role_arn = 'arn:aws:iam::835319576252:role/service-role/AmazonKendra-us-east-1-dsoaws'

configuration = {
    'S3Configuration':
    {
        'BucketName': bucket,
        'InclusionPrefixes': ['data/amazon_reviews_us_Digital_Software_v1_00_nohe'], # Length is limited
    }
}

data_source_response = kendra.create_data_source(
    Configuration = configuration,
    Name = name,
    Description = description,
    RoleArn = kendra_role_arn,
    Type = 'S3',
    IndexId = index_id
)

In [None]:
print(data_source_response)

# Wait for Kendra Data Source Creation

In [None]:
import time

print(data_source_response)

data_source_id = data_source_response['Id']
    
while True:
    data_source_description = kendra.describe_data_source(
        Id = data_source_id,
        IndexId = index_id
    )
    status = data_source_description['Status']
    print('Creating data source. Status: ' + status)
    if status != 'CREATING':
        break;
    time.sleep(30)        
    


# Train the FAQ

In [None]:
faq_path = {
 'Bucket': bucket,
 'Key': 'data/amazon_reviews_us_Digital_Software_v1_00_header.csv'
}

training_job = kendra.create_faq(
    S3Path = faq_path,
    Name = 'amazon-reviews-faq',
    IndexId = index_id,
    RoleArn = kendra_role_arn
)

# training_job = comprehend.create_document_classifier(
#     DocumentClassifierName='Amazon-Customer-Reviews-Classifier-'+ id,
#     DataAccessRoleArn=iam_role_comprehend_arn,
#     InputDataConfig={
#         'S3Uri': noheader_train_s3_uri
#     },
#     OutputDataConfig={
#         'S3Uri': s3_output_job
#     },
#     LanguageCode='en'
# )

In [None]:
print(training_job)

In [None]:
print("Synchronize the data source.")
sync_response = kendra.start_data_source_sync_job(
    Id = data_source_id,
    IndexId = index_id
)

In [None]:
print(sync_response)

# _Please Wait Until the ^^ Data Source ^^ is Sync'd Above._

In [None]:
query = '5'

response = kendra.query(
    QueryText = query,
    IndexId = index_id)

print(response)

In [None]:
print ('\nSearch results for query: ' + query + '\n')
for query_result in response['ResultItems']:
    print('-------------------')
    print('Type: ' + str(query_result['Type']))

    if query_result['Type']=='ANSWER':
        answer_text = query_result['DocumentExcerpt']['Text']
        print(answer_text)

    if query_result['Type']=='DOCUMENT':
        if 'DocumentTitle' in query_result:
            document_title = query_result['DocumentTitle']['Text']
            print('Title: ' + document_title)
        document_text = query_result['DocumentExcerpt']['Text']
        print(document_text)
    print ('------------------\n\n') 