# <B> Setup for Amazon Textract examples </B>

## AutoReload

In [1]:
%load_ext autoreload
%autoreload 2

## 1. Set roles

In [2]:
from sagemaker import get_execution_role

In [3]:
strSageMakerRoleName = get_execution_role().rsplit('/', 1)[-1]
print (f"SageMaker Execution Role Name: {strSageMakerRoleName}")

SageMaker Execution Role Name: AmazonSageMaker-ExecutionRole-20230307T130501


### 2.1 Attach IAM polich to sagemaker execution role (<b>with console</b>)
> step 1. IAM console 로 이동    
>
> step 2. 왼쪽 네비게이터에서  "Role" 선택  
> ![nn](images/Role.png)  
>
> step 3. SageMaker Execution Role 검색 후 role 선택 (상위 cell output 참조)  
> ![nn](images/search-by-rolename.png)  
>
> step 4. "attach policies" 메뉴 선택 
> ![nn](images/attach-policy-menu.png)  
>
> step 5. "IAMFullAccess" policy 검색 후 attach 
> ![nn](images/attach-policy.png) 

In [4]:
from utils.iam import iam_handler

In [5]:
iam = iam_handler()

### 2.2 Attach policy - sagemaker execution role
> SSM  
> S3 <BR>
> Textract <br>
> Comprehend

In [6]:
listPolicyArn=[
    "arn:aws:iam::aws:policy/AmazonSSMFullAccess",
    "arn:aws:iam::aws:policy/AmazonS3FullAccess",
    "arn:aws:iam::aws:policy/AmazonTextractFullAccess",
    "arn:aws:iam::aws:policy/ComprehendFullAccess"
]
for strPolicyArn in listPolicyArn: iam.attach_policy(strSageMakerRoleName, strPolicyArn)

## 2. Set default parameters

In [7]:
import boto3
import sagemaker
from utils.ssm import parameter_store

In [8]:
strRegionName=boto3.Session().region_name
pm = parameter_store(strRegionName)

In [10]:
strPrefix = "LGE-PoC-"

In [11]:
pm.put_params(key="PREFIX", value=strPrefix, overwrite=True)
pm.put_params(key="".join([strPrefix, "REGION"]), value=strRegionName, overwrite=True)
pm.put_params(key="".join([strPrefix, "BUCKET"]), value=sagemaker.Session().default_bucket(), overwrite=True)
pm.put_params(key="".join([strPrefix, "SAGEMAKER-ROLE-ARN"]), value=get_execution_role(), overwrite=True)
pm.put_params(key="".join([strPrefix, "ACCOUNT-ID"]), value=boto3.client("sts").get_caller_identity().get("Account"), overwrite=True)

'Store suceess'

In [21]:
## PoC 데이터
pm.put_params(key="".join([strPrefix, "DATA-BUCKET"]), value="lge-nerp-dx-poc-textract", overwrite=True)

'Store suceess'

## 3. Install packages

In [12]:
!sudo yum install poppler-utils -y
!pip install textract-trp pdf2image

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting textract-trp
  Downloading textract_trp-0.1.3-py3-none-any.whl (5.8 kB)
Installing collected packages: textract-trp
Successfully installed textract-trp-0.1.3
