# <B> Setup for Anormaly Detection with SageMaker </B>
* Container: codna_pytorch_p310

## AutoReload

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys, os
module_path = ".."
sys.path.append(os.path.abspath(module_path))

## 0. Install packages

In [3]:
install_needed = True  # should only be True once
# install_needed = False

In [4]:
%%bash
#!/bin/bash

DAEMON_PATH="/etc/docker"
MEMORY_SIZE=10G

FLAG=$(cat $DAEMON_PATH/daemon.json | jq 'has("data-root")')
# echo $FLAG

if [ "$FLAG" == true ]; then
    echo "Already revised"
else
    echo "Add data-root and default-shm-size=$MEMORY_SIZE"
    sudo cp $DAEMON_PATH/daemon.json $DAEMON_PATH/daemon.json.bak
    sudo cat $DAEMON_PATH/daemon.json.bak | jq '. += {"data-root":"/home/ec2-user/SageMaker/.container/docker","default-shm-size":"'$MEMORY_SIZE'"}' | sudo tee $DAEMON_PATH/daemon.json > /dev/null
    sudo service docker restart
    echo "Docker Restart"
fi

Already revised


In [5]:
import sys
import IPython

if install_needed:
    print("installing deps and restarting kernel")
    !{sys.executable} -m pip install -U pip
    !{sys.executable} -m pip install -U sagemaker
    #!{sys.executable} -m pip install -U smdebug sagemaker-experiments-logger sagemaker-experiments awswrangler==2.20.1
    !{sys.executable} -m pip install -U sagemaker
    !{sys.executable} -m pip install -U datasets transformers
    !sudo curl -L "https://github.com/docker/compose/releases/download/v2.27.1/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
    !sudo chmod +x /usr/local/bin/docker-compose
        
    IPython.Application.instance().kernel.do_shutdown(True)

installing deps and restarting kernel
Collecting sagemaker
  Downloading sagemaker-2.232.3-py3-none-any.whl.metadata (16 kB)
Downloading sagemaker-2.232.3-py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sagemaker
  Attempting uninstall: sagemaker
    Found existing installation: sagemaker 2.232.2
    Uninstalling sagemaker-2.232.2:
      Successfully uninstalled sagemaker-2.232.2
Successfully installed sagemaker-2.232.3
Collecting transformers
  Downloading transformers-4.46.1-py3-none-any.whl.metadata (44 kB)
Downloading transformers-4.46.1-py3-none-any.whl (10.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.0/10.0 MB[0m [31m14.5 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.46.0
    Uninstalling transfor

## 1. Set roles

In [3]:
from sagemaker import get_execution_role

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [4]:
strSageMakerRoleName = get_execution_role().rsplit('/', 1)[-1]
print (f"SageMaker Execution Role Name: {strSageMakerRoleName}")

SageMaker Execution Role Name: AmazonSageMaker-ExecutionRole-20221206T163436


## 1.1 Attach IAM polich to sagemaker execution role (<b>with console</b>)
> **S3**: "arn:aws:iam::aws:policy/AmazonS3FullAccess"

### 1.1 Attach IAM polich to sagemaker execution role (<b>with console</b>)
> step 1. IAM console 로 이동    
>
> step 2. 왼쪽 네비게이터에서  "Role" 선택  
> ![nn](../images/Role.png)  
>
> step 3. SageMaker Execution Role 검색 후 role 선택 (상위 cell output 참조)  
> ![nn](../images/search-by-rolename.png)  
>
> step 4. "attach policies" 메뉴 선택 
> ![nn](../images/attach-policy-menu.png)  
>
> step 5. "IAMFullAccess" policy 검색 후 attach 
> ![nn](../images/attach-policy.png) 

In [5]:
from utils.iam import iam_handler

In [6]:
iam = iam_handler()

### 1.2 Attach policy - sagemaker execution role
> AmazonEC2ContainerRegistryFullAccess <BR>
> SSM <BR>
> S3 <BR>

In [7]:
listPolicyArn=[
    "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryFullAccess",
    "arn:aws:iam::aws:policy/AmazonSSMFullAccess",
    "arn:aws:iam::aws:policy/AmazonS3FullAccess"
]
for strPolicyArn in listPolicyArn: iam.attach_policy(strSageMakerRoleName, strPolicyArn)

## 2. Set default parameters

In [8]:
import boto3
import sagemaker

### Bucket / Prefix 설정

In [19]:
strRegionName = boto3.Session().region_name
strAccountId = boto3.client("sts").get_caller_identity().get("Account")
strBucketName = 'sm-anomaly-detection' # <-- 사용할 bucket 명을 추가해 주세요. ex) sagemaker-us-east-1-123456789123, sm-nemo-bucket
strPrefix = 'ad-ts' ## <-- 작업할 prefix 명을 추가해 주세요. ex) nemo-test, nemo-asr

## 3. Extract & Upload dataset

In [20]:
import os
from utils.s3 import s3_handler

In [21]:
strLocalDataPath = "./data"
!tar -zxvf $strLocalDataPath/data.tar.gz --directory=data

./
./fault_label_10T.csv
./clicks.csv
./clicks_10T.csv
./fault_label_1T.csv
./clicks_1T.csv


* create bucket

In [22]:
s3 = s3_handler(region_name=strRegionName)

This is a S3 handler with [us-west-2] region.


In [23]:
s3.create_bucket(strBucketName)

CREATE:[sm-anomaly-detection] Bucket was created successfully


True

* updoad data to s3

In [24]:
source_dir, target_bucket, target_dir = strLocalDataPath, strBucketName, "data"
s3.upload_dir(source_dir, target_bucket, target_dir)

Upload:[./data] was uploaded to [s3://sm-anomaly-detection/data]successfully


## 4. [Optional] AWS Systems Manager Parameter Store 를 이용한 파라미터 저장/활용
- [AWS Systems Manager Parameter Store](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-parameter-store.html)
- Attach IAM polich to sagemaker execution role (<b>with console</b>)
> **SSM**: "arn:aws:iam::aws:policy/AmazonSSMFullAccess"<BR>

In [25]:
from utils.ssm import parameter_store

In [26]:
pm = parameter_store(strRegionName)

In [27]:
pm.put_params(key="PREFIX", value=strPrefix, overwrite=True)
pm.put_params(key="-".join([strPrefix, "REGION"]), value=strRegionName, overwrite=True)
pm.put_params(key="-".join([strPrefix, "BUCKET"]), value=strBucketName, overwrite=True)
pm.put_params(key="-".join([strPrefix, "SAGEMAKER-ROLE-ARN"]), value=get_execution_role(), overwrite=True)
pm.put_params(key="-".join([strPrefix, "ACCOUNT-ID"]), value=strAccountId, overwrite=True)
pm.put_params(key="-".join([strPrefix, "S3-DATA-PATH"]), value=f"s3://{strBucketName}/data", overwrite=True)

'Store suceess'

In [28]:
print (f'PREFIX: {pm.get_params(key="PREFIX")}')
print (f'REGION: {pm.get_params(key="-".join([strPrefix, "REGION"]))}')
print (f'BUCKET: {pm.get_params(key="-".join([strPrefix, "BUCKET"]))}')
print (f'SAGEMAKER-ROLE-ARN: {pm.get_params(key="-".join([strPrefix, "SAGEMAKER-ROLE-ARN"]))}')
print (f'ACCOUNT-ID: {pm.get_params(key="-".join([strPrefix, "ACCOUNT-ID"]))}')
print (f'S3-DATA-PATH: {pm.get_params(key="-".join([strPrefix, "S3-DATA-PATH"]))}')

PREFIX: ad-ts
REGION: us-west-2
BUCKET: sm-anomaly-detection
SAGEMAKER-ROLE-ARN: arn:aws:iam::419974056037:role/service-role/AmazonSageMaker-ExecutionRole-20221206T163436
ACCOUNT-ID: 419974056037
S3-DATA-PATH: s3://sm-anomaly-detection/data
