In [9]:
# API Version: https://docs.aws.amazon.com/kendra/latest/dg/create-index.html

In [1]:
!pip install sagemaker boto3 --upgrade
# Restart kernel after install

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting sagemaker
  Downloading sagemaker-2.169.0.tar.gz (851 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m851.8/851.8 kB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Collecting boto3
  Downloading boto3-1.26.165-py3-none-any.whl (135 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m135.9/135.9 kB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting attrs<24,>=23.1.0 (from sagemaker)
  Using cached attrs-23.1.0-py3-none-any.whl (61 kB)
Collecting PyYAML==6.0 (from sagemaker)
  Using cached PyYAML-6.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (682 kB)
Collecting botocore<1.30.0,>=1.29.165 (from boto3)
  Downloading botocore-1.29.165-py3-none-any.whl (11.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.0/11.0 MB[0m [3

In [2]:
import sagemaker
import boto3

sess = sagemaker.Session()
region = sess.boto_session.region_name
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
default_bucket=None
if default_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    default_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']


print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {default_bucket}")
print(f"sagemaker session region: {region}")

sagemaker role arn: arn:aws:iam::808577411626:role/Slab01AIGCPracticerRole
sagemaker bucket: sagemaker-us-west-2-808577411626
sagemaker session region: us-west-2


In [3]:
default_bucket_prefix = "kendra/qa"
qa_source_path = sess.upload_data("cleanroom-emr-msk-qa.csv", 
                                    default_bucket,
                                    default_bucket_prefix)
# The path will be used in building FAQ
print(qa_source_path)

s3://sagemaker-us-west-2-808577411626/kendra/qa/cleanroom-emr-msk-qa.csv


### Build Kendra Index

- In Amazon Kendra product page (console), locate the 'Create Index' button.

![图片.png](attachment:07f3e10e-2007-4e70-8127-205d741ad127.png)

- Specify a custom index 'Name', 'Description'
- 'Create new role' and specify a custom 'Role Name'
- All others left default and click 'Next' till finish

![图片.png](attachment:34248047-a9ce-4d7a-bbf0-8febc01a8696.png)

- Check if the index status is 'Active'

![图片.png](attachment:48d6f456-e178-4d33-a701-b1263a332212.png)

- Copy the 'Index ID' to any text editor
- Click 'FAQs' in the left pannel

![图片.png](attachment:a6e3df86-4380-4ea7-8d77-ce1e7133fe45.png)

![图片.png](attachment:dcae1d0d-48d3-4731-bafd-d93ef7d601d7.png)

- Specify names and descriptions, etc
- Choose Language code, we use 'Chinese/zh' in this demo
- Choose 'csv-Basic' as file format
- Copy the above generated S3 file path (s3://xxx/xxx/abc.csv) and paste it here
- Choose the above newly defined IAM role name

**AmazonKendra Role should add S3 policy to access data in bucket**

In [None]:
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "Statement1",
            "Effect": "Allow",
            "Action": "S3:*",
            "Resource": [
                "arn:aws:s3:::sagemaker-us-west-2-808577411626",
                "arn:aws:s3:::sagemaker-us-west-2-808577411626/kendra/qa/*"
            ]
        }
    ]
}

**sagemaker_execution_role Role should add Kendra policy to access indexes**

In [None]:
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "VisualEditor0",
            "Effect": "Allow",
            "Action": "kendra:*",
            "Resource": "*"
        }
    ]
}

![图片.png](attachment:2b0e4a41-5498-4b50-8c2f-32dddc4e49c7.png)

- Wait until the FAQ source in 'Active' state