# Trainig Pipeline 재학습

## 1. 환경설정

### 1.1 프로젝트 기본정보

In [46]:
import sys
import boto3
import sagemaker
import json
import pprint as pp

In [103]:
# project namespace
project_prefix = "proto"

# model prefix - depends on model version
model_group = "ncf-sample-1"

# pipeline and model props
training_pipeline_name = f'{project_prefix}-{model_group}-training'
model_package_group_name = f'{project_prefix}-{model_group}'

# parameter store keys
key_repackage_lambda_arn = f'/{project_prefix}/MlOps/Lambda/Function/ModelRepackage'
key_metric_lambda_arn = f'/{project_prefix}/MlOps/Lambda/Function/ModelMetric'
key_s3_mlops_bucket_name = f'/{project_prefix}/MlOps/S3/Bucket/Name/s3MlOpsBucket'

# training pipeline manifest file
pipeline_manifest_file = "pipeline_config.json"

In [61]:
# S3 Bucket
ssm = boto3.client('ssm')
paramRes = ssm.get_parameter(Name=key_s3_mlops_bucket_name, WithDecryption=False)
bucket = paramRes["Parameter"]["Value"]

# S3 data path
data_prefix = f"data/{model_group}"
code_prefix=f'code/{model_group}'
model_prefix=f'model/{model_group}'

s3_input_data_uri = f"s3://{bucket}/{data_prefix}"
s3_model_output_uri =  f"s3://{bucket}/{model_prefix}"

In [62]:
# sagemaker
region = boto3.Session().region_name
sagemaker_session = sagemaker.session.Session()
role = sagemaker.get_execution_role()

print("bucket: ", bucket)
print("role: ", role)

sm_client = boto3.client('sagemaker', region_name=region)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
bucket:  cjproto-975050344093-ap-northeast-2-mlops-bucket
role:  arn:aws:iam::975050344093:role/cjprotoDev-cjprotoExperim-SageMakerStudioSageMakerE-ovIXL1hq7LV4


### 1.2 기존 파이프라인 파라메터

In [63]:
# 기존 파이프라인 정보 읽기
s3 = boto3.client('s3')
_res = s3.get_object(Bucket=bucket, Key=f'{code_prefix}/{pipeline_manifest_file}')
_body = _res['Body'].read()
pipeline_config = json.loads(_body.decode("utf-8"))

pp.pprint(pipeline_config)

{'PipelineName': 'cjproto-ncf-sample-training',
 'PipelineParameters': [{'Name': 'InputData',
                         'Value': 's3://cjproto-975050344093-ap-northeast-2-mlops-bucket/data/ncf-sample'},
                        {'Name': 'TrainCode',
                         'Value': 's3://cjproto-975050344093-ap-northeast-2-mlops-bucket/code/ncf-sample/source.tar.gz'},
                        {'Name': 'ModelApprovalStatus',
                         'Value': 'PendingManualApproval'}]}


## 2. 파이프라인 업데이트

## 2.1 학습/추론 코드 업데이트

In [64]:
# 새로운 학습 /추론 코드 경로

# code repository
model_code_dir = "src_v1"

# src 폴더 경로 설정
sys.path.append(f'./{model_code_dir}')

In [65]:
import os

package_dir = 'code_pkg'
os.makedirs(package_dir, exist_ok=True)

code_dir = f'../{model_code_dir}'

In [122]:
%%sh -s {package_dir} {code_dir}

package_dir=$1
code_dir=$2

cd $package_dir # 폴더 생성
echo $PWD
rm -rf ./*
cp -r $code_dir/*.py  .  # src py 모두 카피
cp -r $code_dir/*.txt  .  # src 파일 모두 카피
cp -r $code_dir/*.json  .  # json 파일 모두 카피
tar -czvf source_v6.tar.gz * # model.tar.gz 파일 생성

/home/sagemaker-user/cjproto-mlops-repo/mlpipelines/sample_recomm_model/code_pkg
common_utils.py
config.py
data_utils.py
evaluate.py
inference.py
model.py
model_config.json
requirements.txt
train.py
train_lib.py


In [123]:
source_path = os.path.join(package_dir, 'source_v6.tar.gz')
source_artifact = sagemaker_session.upload_data(source_path, bucket, code_prefix)
print("source_artifact: \n", source_artifact)

source_artifact: 
 s3://cjproto-975050344093-ap-northeast-2-mlops-bucket/code/ncf-sample/source_v6.tar.gz


### 2.2 파이프라인 파라메터 업데이트

In [124]:
params = [
    {
        'Name': 'InputData',
        'Value': f"s3://{bucket}/{data_prefix}"
    },
    {
        'Name': 'TrainCode',
        'Value': source_artifact
    },
    {
        'Name': 'ModelApprovalStatus',
        'Value': 'PendingManualApproval'
    }
]

In [125]:
pipeline_config['PipelineParameters'] = params

pp.pprint(pipeline_config)

{'PipelineName': 'cjproto-ncf-sample-training',
 'PipelineParameters': [{'Name': 'InputData',
                         'Value': 's3://cjproto-975050344093-ap-northeast-2-mlops-bucket/data/ncf-sample'},
                        {'Name': 'TrainCode',
                         'Value': 's3://cjproto-975050344093-ap-northeast-2-mlops-bucket/code/ncf-sample/source_v6.tar.gz'},
                        {'Name': 'ModelApprovalStatus',
                         'Value': 'PendingManualApproval'}]}


## 3. 파이프라인 다시 실행

In [126]:
# 파일로 기록
with open(pipeline_manifest_file, 'w', encoding='utf-8') as f:
    json.dump(pipeline_config, f, ensure_ascii=False, indent=4)

In [129]:
 # S3 코드 경로에 업로드
manifest_artifact = sagemaker_session.upload_data(pipeline_manifest_file, bucket, code_prefix)