# [모듈 1.0] 컨피그 파일 생성

이 노트북은 아래와 같은 설정 파일을 생성 합니다. 이후 노트북에서 설정 파일을 로딩해서 사용 합니다.
- Code Pipeline Series (CodeCommit, CodeBuild, CodePipeline)
- SageMaker Pipeline 



# 1. 환경 확인

In [1]:
%load_ext autoreload
%autoreload 2

import sys
import sagemaker
import boto3
sys.path.append('./src')



### 입력 파일을 위한 변수는 기존에 처리가 되어야 합니다.

In [2]:
%store -r s3_input_data_uri
print("s3_input_data_uri: \n", s3_input_data_uri)

s3_input_data_uri: 
 s3://sagemaker-ap-northeast-2-057716757052/NCFModel/data


# 2.역할 정의
## 2.1. Code Build 역할 ARN 가져오기 
- 워크샵의 셋업 단계에서 CodeBuild_Role: 은 아래와 미리 생성이 됨
    -  "arn:aws:iam::XXXXXX:role/MLOps-CodeBuildRole"

In [3]:
import boto3

code_build_service_arn = boto3.client('iam').get_role(RoleName = 'MLOps-CodeBuildRole').get('Role').get('Arn')
print("code_build_service_arn: \n", code_build_service_arn)

code_build_service_arn: 
 arn:aws:iam::057716757052:role/MLOps-CodeBuildRole


## 2.2. Code Pipeline 역할 ARN 가져오기 
- 워크샵의 셋업 단계에서 CodePipeline_Role: 은 아래와 미리 생성이 됨
    -  "arn:aws:iam::XXXXXX:role/MLOps-CodePipelineRole"


In [4]:
code_pipeline_role_arn = boto3.client('iam').get_role(RoleName = 'MLOps-CodePipelineRole').get('Role').get('Arn')
print("code_pipeline_role_arn: \n", code_pipeline_role_arn)

code_pipeline_role_arn: 
 arn:aws:iam::057716757052:role/MLOps-CodePipelineRole


In [5]:
# # code_pipeline_role_arn = "arn:aws:iam::XXXXXXXX:role/code-pipeline"
# code_pipeline_role_arn = 'arn:aws:iam::057716757052:role/code-pipeline-gsmoon'


# 3. Code Pipeline 시리즈 파라미터 세팅

## 3.1. 변수 설정 및 dict 로 저장

In [6]:
# 기본 정보
region = sagemaker.Session().boto_region_name
account_id = boto3.client('sts').get_caller_identity().get('Account')
project_prefix = "CodePipeline-Train-NCF"
bucket = sagemaker.Session().default_bucket()

# Codecommit Repo Name
train_code_repo_name = "ncf-training-repo"

# Code Build Project 이름
train_code_build_project_name = "ncf-code-build-training-sm-pipeline"

# code pipeline project 변수
train_code_pipeline_name = "ncf-code-pipeline-training-sm-pipeline"
branch_name = 'master'
model_package_group_name = "NCF-Model-CodePipeline"


# 최종 json 파일 경로
code_pipeline_train_config_json_path = f'src/code_pipeline_train_config.json'


In [7]:
from datetime import datetime
currentDateAndTime = datetime.now()

# 현재 시간을 Json 에 넣기 위함
currentTime = currentDateAndTime.strftime("%Y-%m-%d-%H-%M-%S")
print("The current time is", currentTime)

The current time is 2022-12-21-05-57-13


In [8]:
code_pipeline_dict = {
    "code_pipeline_role_arn" : code_pipeline_role_arn,
    "code_build_service_arn" : code_build_service_arn,    
    "project_prefix" : project_prefix,
    "region" : region,        
    "account_id" : account_id,
    "train_code_repo_name": train_code_repo_name,    
    "code_build_project_name" : train_code_build_project_name,
    "bucket" : bucket,  
    "code_pipeline_name" : train_code_pipeline_name,  
    "model_package_group_name" :  model_package_group_name,    
    "branch_name" :  branch_name,
    "update_time" :  currentTime    
}

## 3.2. 설정 딕션너리 Json 파일로 저장 후 테스트를 위해 로딩

In [9]:
from common_utils import save_json, load_json

save_json(code_pipeline_train_config_json_path, code_pipeline_dict)
code_pipeline_train_dict = load_json(code_pipeline_train_config_json_path)


src/code_pipeline_train_config.json is saved


In [10]:
import json
print (json.dumps(code_pipeline_train_dict, indent=2))

{
  "code_pipeline_role_arn": "arn:aws:iam::057716757052:role/MLOps-CodePipelineRole",
  "code_build_service_arn": "arn:aws:iam::057716757052:role/MLOps-CodeBuildRole",
  "project_prefix": "CodePipeline-Train-NCF",
  "region": "ap-northeast-2",
  "account_id": "057716757052",
  "train_code_repo_name": "ncf-training-repo",
  "code_build_project_name": "ncf-code-build-training-sm-pipeline",
  "bucket": "sagemaker-ap-northeast-2-057716757052",
  "code_pipeline_name": "ncf-code-pipeline-training-sm-pipeline",
  "model_package_group_name": "NCF-Model-CodePipeline",
  "branch_name": "master",
  "update_time": "2022-12-21-05-57-13"
}


# 4. SageMaker Pipeline 파라미터 세팅

## 4.1. 변수 설정 및 dict 로 저장

In [11]:
project_prefix = "SageMaker-Train-NCF"
bucket = sagemaker.Session().default_bucket()

train_sm_pipeline_name = "ncf-training-code-pipeline-sm-pipeline"
ModelApprovalStatus="PendingManualApproval"
inference_image_uri = f"763104351884.dkr.ecr.{region}.amazonaws.com/pytorch-inference:1.8.1-gpu-py3"
training_instance_type = "ml.p3.2xlarge"
training_instance_count = 1

sm_pipeline_train_config_json_path = f'src/sm_pipeline_train_config.json'


In [12]:
sm_train_pipeline_dict = {
    "project_prefix" : project_prefix,            
    "s3_input_data_uri" : s3_input_data_uri,
    "sm_pipeline_name" : train_sm_pipeline_name,
    "training_instance_type" :  training_instance_type,    
    "training_instance_count" :  training_instance_count,        
    "ModelApprovalStatus" :  ModelApprovalStatus,    
    "inference_image_uri" :  inference_image_uri,
    "bucket" : bucket,        
    "update_time" :  currentTime    
}

## 4.2. 설정 딕션너리 Json 파일로 저장 후 테스트를 위해 로딩

In [13]:
from common_utils import save_json, load_json

save_json(sm_pipeline_train_config_json_path, sm_train_pipeline_dict)
sm_pipeline_train_dict = load_json(sm_pipeline_train_config_json_path)
print (json.dumps(sm_pipeline_train_dict, indent=2))

src/sm_pipeline_train_config.json is saved
{
  "project_prefix": "SageMaker-Train-NCF",
  "s3_input_data_uri": "s3://sagemaker-ap-northeast-2-057716757052/NCFModel/data",
  "sm_pipeline_name": "ncf-training-code-pipeline-sm-pipeline",
  "training_instance_type": "ml.p3.2xlarge",
  "training_instance_count": 1,
  "ModelApprovalStatus": "PendingManualApproval",
  "inference_image_uri": "763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/pytorch-inference:1.8.1-gpu-py3",
  "bucket": "sagemaker-ap-northeast-2-057716757052",
  "update_time": "2022-12-21-05-57-13"
}


# 5. Config 파일 복사

## 5.1. code_pipeline_train_config_json 파일

In [14]:
source_path = code_pipeline_train_config_json_path
target_path = 'codecommit/pipelines/ncf/src/'

! cp {source_path} {target_path}

## 5.2. sm_pipeline_train_config_json 파일

In [15]:
source_path = sm_pipeline_train_config_json_path
target_path = 'codecommit/pipelines/ncf/src/'

! cp {source_path} {target_path}

# 6. 변수 저장

In [16]:
%store code_pipeline_train_config_json_path
%store sm_pipeline_train_config_json_path

Stored 'code_pipeline_train_config_json_path' (str)
Stored 'sm_pipeline_train_config_json_path' (str)
