# [모듈 3.1] Triton Docker 에 두 개의 NCF 모델 서빙

# 1. 환경 셋업

## 1.1. 기본 세팅
사용하는 패키지는 import 시점에 다시 재로딩 합니다.

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('./src')

In [2]:
import sagemaker

sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

## 1.2. 배포 준비

### 이전 노트북에서 훈련된 모델의 S3 경로 확인

In [3]:
%store -r artifact_path

In [4]:
print("model artifact is assigend from : ", artifact_path)

model artifact is assigend from :  s3://sagemaker-us-east-1-057716757052/pytorch-training-2022-12-11-07-36-01-118/output/model.tar.gz


### 추론을 위한  데이터 세트 로딩
- 전부 데이터를 로딩할 필요가 없지만, 여기서는 기존에 사용한 함수를 이용하기 위해서 전체 데이터를 로드 합니다. 


In [5]:
import data_utils 
train_data, test_data, user_num ,item_num, train_mat = data_utils.load_all(test_num=100)

### 파라미터 생성
- 모델 로딩시에 아라 파라미터 사용 

In [6]:
class Params:
    def __init__(self):
        # self.epochs = 1        
        self.num_ng = 4
        self.batch_size = 256
        self.test_num_ng = 99
        self.factor_num = 32
        self.num_layers = 3
        self.dropout = 0.0
        # self.lr = 0.001
        self.top_k = 10
        self.out = True
        # self.gpu = "0"
                        
args = Params()
print("# of batch_size: ", args.batch_size)


# of batch_size:  256


# 2. 훈련된 모델 아티펙트 다운로드 및 압축해제
- 모델 아티펙트를 다운로드 합니다.
- 다운로드 받은 모델 아티펙트의 압축을 해제하고 모델 가중치인 models/model.pth 파일을 얻습니다.

In [7]:
import os
import config

model_data_dir = config.model_path
os.makedirs(model_data_dir, exist_ok=True)
print("model_data_dir: ", model_data_dir)

model_data_dir:  ./models/


In [8]:
%%sh -s {artifact_path} {model_data_dir}

artifact_path=$1
model_data_dir=$2

echo $artifact_path
echo $model_data_dir

# 기존 데이터 삭제
rm -rf $model_data_dir/*

# 모델을 S3에서 로컬로 다운로드
aws s3 cp $artifact_path $model_data_dir

# 모델 다운로드 폴더로 이동
cd $model_data_dir

# 압축 해제
tar -xvf model.tar.gz  

s3://sagemaker-us-east-1-057716757052/pytorch-training-2022-12-11-07-36-01-118/output/model.tar.gz
./models/
download: s3://sagemaker-us-east-1-057716757052/pytorch-training-2022-12-11-07-36-01-118/output/model.tar.gz to models/model.tar.gz
NeuMF-end.pth


# 3. 훈련된 모델 로딩


## 3.1. 모델 네트워크 설정 저장
- 모델 네트워크를 생성시에 설정값을 model_config.json 로 저장함.
- model_fn() 함수에서 모델 네트워크를 생성시에 사용 함.

In [9]:
import json
from common_utils import save_json, load_json

model_config_dict = {
    'user_num': str(user_num),
    'item_num': str(item_num),
    'factor_num' : str(args.factor_num),
    'num_layers' : str(args.num_layers),
    'dropout' : str(args.dropout),
    'model_type': config.model
}

model_config_file = 'model_config.json'
model_config_file_path = os.path.join('src', model_config_file)

save_json(model_config_file_path, model_config_dict)


src/model_config.json is saved


'src/model_config.json'

## 3.2. 두 개의 모델 생성
- 복수개의 모델로 진행하기 위해서, 편의상 동일한 모델에서 생성 함.


In [10]:
from inference import model_fn

ncf_food_model = model_fn(config.model_path)
ncf_fashion_model = model_fn(config.model_path)


######## Staring model_fn() ###############
--> model_dir : ./models/
model_config_path: :  /home/ec2-user/SageMaker/Neural-Collaborative-Filtering-On-SageMaker/2_Triton_Inference/./src/model_config.json
--> model network is loaded
model_file_path: :  {model_file_path}
####### Model is loaded #########
######## Staring model_fn() ###############
--> model_dir : ./models/
model_config_path: :  /home/ec2-user/SageMaker/Neural-Collaborative-Filtering-On-SageMaker/2_Triton_Inference/./src/model_config.json
--> model network is loaded
model_file_path: :  {model_file_path}
####### Model is loaded #########


In [11]:
ncf_food_model

NCF(
  (embed_user_GMF): Embedding(6040, 32)
  (embed_item_GMF): Embedding(3706, 32)
  (embed_user_MLP): Embedding(6040, 128)
  (embed_item_MLP): Embedding(3706, 128)
  (linear): Linear(in_features=4, out_features=4, bias=True)
  (MLP_layers): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=256, out_features=128, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=128, out_features=64, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=64, out_features=32, bias=True)
    (8): ReLU()
  )
  (predict_layer): Linear(in_features=64, out_features=1, bias=True)
)

# 4. Trition 서빙 준비

## 4.1. 샘플 입력 생성

In [12]:
import numpy as np
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))

user_np = np.zeros((1,100)).astype(np.int32)
item_np = np.random.randint(low=1, high=1000, size=(1,100)).astype(np.int32)


dummy_inputs = [
    torch.from_numpy(user_np).to(device),
    torch.from_numpy(item_np).to(device)
]
print("dummy_inputs: \n", dummy_inputs)
dummy_user = dummy_inputs[0] 
dummy_item = dummy_inputs[1] 

# dummy_inputs

Using cuda device
dummy_inputs: 
 [tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0]], device='cuda:0', dtype=torch.int32), tensor([[478, 360, 373, 466, 131, 140, 524, 728, 400, 651, 371,  73, 904, 140,
         991, 213, 459, 802,  15, 100, 758, 867, 644, 243, 920, 586,  29, 747,
         247, 142, 710, 905, 425, 598, 478,  25, 139, 148, 229, 953, 100, 324,
         732,  80, 323, 959, 473, 372,  81, 467, 290, 934, 500,  65, 832, 132,
         144, 380, 471, 609,  34, 779,  99, 800,  40, 258, 781, 331, 530, 987,
         978, 331, 401, 520, 922, 891, 915, 100, 352, 554, 726, 821, 864, 316,
         310, 504,  90, 633, 614,  39, 876, 226,  28, 614, 137, 838, 200, 316,
         829, 474]], device=

## 4.2. 샘플 입력으로 모델 추론 테스트

In [13]:
result = ncf_food_model(dummy_user, dummy_item)
print("result shape: ", result.shape)
result = ncf_fashion_model(dummy_user, dummy_item)
print("result shape: ", result.shape)

result shape:  torch.Size([1, 100, 1])
result shape:  torch.Size([1, 100, 1])


## 4.3. Torch Script 으로 변환

In [14]:
is_trace = True
is_script = False

In [15]:
def trace_model(mode, device, model, dummy_inputs, trace_model_name):

    model = model.eval()
    model.to(device)

    if mode == 'trace' :
        IR_model = torch.jit.trace(model, dummy_inputs)

    elif mode == 'script':
        IR_model = torch.jit.script(model)

    print(f"As {mode} : Model is saved {trace_model_name}")
    torch.jit.save(IR_model, trace_model_name)

    print("#### Load Test ####")    
    loaded_m = torch.jit.load(trace_model_name)    
    print(loaded_m.code)    
    dummy_user = dummy_inputs[0]
    dummy_item = dummy_inputs[1]    
    
    result = loaded_m(dummy_user, dummy_item)
    print("Result shape: ", result.shape)

    
    
if is_trace:
    mode = 'trace'    
elif is_script:    
    mode = 'script'

# food
trace_food_model_name = 'ncf_food_model.pt'    
trace_model(mode, device, ncf_food_model, dummy_inputs, trace_food_model_name)    
# fashion
trace_fashion_model_name = 'ncf_fashion_model.pt'    
trace_model(mode, device, ncf_fashion_model, dummy_inputs, trace_fashion_model_name)    


As trace : Model is saved ncf_food_model.pt
#### Load Test ####
def forward(self,
    input: Tensor,
    input0: Tensor) -> Tensor:
  _0 = self.predict_layer
  _1 = self.MLP_layers
  _2 = self.embed_item_MLP
  _3 = self.embed_user_MLP
  _4 = self.embed_item_GMF
  _5 = (self.embed_user_GMF).forward(input, )
  output_GMF = torch.mul(_5, (_4).forward(input0, ))
  _6 = [(_3).forward(input, ), (_2).forward(input0, )]
  input1 = torch.cat(_6, -1)
  input2 = torch.cat([output_GMF, (_1).forward(input1, )], -1)
  return (_0).forward(input2, )

Result shape:  torch.Size([1, 100, 1])
As trace : Model is saved ncf_fashion_model.pt
#### Load Test ####
def forward(self,
    input: Tensor,
    input0: Tensor) -> Tensor:
  _0 = self.predict_layer
  _1 = self.MLP_layers
  _2 = self.embed_item_MLP
  _3 = self.embed_user_MLP
  _4 = self.embed_item_GMF
  _5 = (self.embed_user_GMF).forward(input, )
  output_GMF = torch.mul(_5, (_4).forward(input0, ))
  _6 = [(_3).forward(input, ), (_2).forward(input0, )]
 

## 4.4.config.pbtxt 생성

### ncf_food_config 생성

In [16]:
%%writefile ncf_food_config.pbtxt
name: "ncf_food_model"
platform: "pytorch_libtorch"
max_batch_size: 128
input [
  {
    name: "INPUT__0"
    data_type: TYPE_INT32
    dims: [100]
  },
  {
    name: "INPUT__1"
    data_type: TYPE_INT32
    dims: [100]
  }
]
output [
  {
    name: "OUTPUT__0"
    data_type: TYPE_FP32
    dims: [-1]
  }
]


Overwriting ncf_food_config.pbtxt


### ncf_fashion_config 생성

In [17]:
%%writefile ncf_fashion_config.pbtxt
name: "ncf_fashion_model"
platform: "pytorch_libtorch"
max_batch_size: 128
input [
  {
    name: "INPUT__0"
    data_type: TYPE_INT32
    dims: [100]
  },
  {
    name: "INPUT__1"
    data_type: TYPE_INT32
    dims: [100]
  }
]
output [
  {
    name: "OUTPUT__0"
    data_type: TYPE_FP32
    dims: [-1]
  }
]


Overwriting ncf_fashion_config.pbtxt


# 5. 아티펙트 패키징

## 복수 모델
```
model_serving_folder
    - model_name
        - version_number
            - model file
        - config file
    - model_name
        - version_number
            - model file
        - config file
        
triton-serve-pt
    - ncf_food_model
        - 1
            - model.pt
        - config.pbtxt
    - ncf_fashion_model
        - 1
            - model.pt
        - config.pbtxt

```


## 5.1. ncf_food_model 폴더 생성 및 아티펙트 카피

In [18]:
import os
from triton_util import make_folder_structure, copy_artifact, remove_folder


In [19]:
# ncf_food_model 폴더 생성
model_serving_folder = 'triton-multi-docker-serve-pt'
food_model_name = 'ncf_food_model'
make_folder_structure(model_serving_folder, food_model_name)

fodd_config = 'ncf_food_config.pbtxt'
copy_artifact(model_serving_folder, food_model_name, trace_food_model_name, fodd_config)

triton-multi-docker-serve-pt:
ncf_fashion_model
ncf_food_model

triton-multi-docker-serve-pt/ncf_fashion_model:
1
config.pbtxt

triton-multi-docker-serve-pt/ncf_fashion_model/1:
model.pt

triton-multi-docker-serve-pt/ncf_food_model:
1
config.pbtxt

triton-multi-docker-serve-pt/ncf_food_model/1:
model.pt


## 5.2. ncf_fashion_model 폴더 생성 및 아티펙트 카피

In [20]:
# ncf_food_model 폴더 생성
fashion_model_name = 'ncf_fashion_model'
make_folder_structure(model_serving_folder, fashion_model_name)

fashion_config = 'ncf_fashion_config.pbtxt'
copy_artifact(model_serving_folder, fashion_model_name, trace_fashion_model_name, fashion_config)

triton-multi-docker-serve-pt:
ncf_fashion_model
ncf_food_model

triton-multi-docker-serve-pt/ncf_fashion_model:
1
config.pbtxt

triton-multi-docker-serve-pt/ncf_fashion_model/1:
model.pt

triton-multi-docker-serve-pt/ncf_food_model:
1
config.pbtxt

triton-multi-docker-serve-pt/ncf_food_model/1:
model.pt


### 폴더 삭제
- 필요시 주석 제거하고 사용하세요.

In [21]:
# model_serving_folder = 'triton-serve-pt'
# remove_folder(model_serving_folder)

# 6. 로컬 도커에서 실행 테스트

In [22]:
from triton_util import setup_triton_client
triton_client, grpcclient = setup_triton_client()

In [23]:
def create_client_payload():
    inputs = []

    inputs.append(grpcclient.InferInput('INPUT__0', [1,100], "INT32"))
    inputs.append(grpcclient.InferInput('INPUT__1', [1,100], "INT32"))

    # user
    input0_data = np.zeros((1,100)).astype(np.int32)
    inputs[0].set_data_from_numpy(input0_data)

    # item
    input1_data = np.random.randint(low=1, high=1000, size=(1,100)).astype(np.int32)
    inputs[1].set_data_from_numpy(input1_data)

    print("input0_data: \n",input0_data) 
    print("input1_data: \n",input1_data) 
    
    return inputs



In [24]:
inputs = create_client_payload()


input0_data: 
 [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
input1_data: 
 [[193 708 780 727 630 962 372 664 892 431 848 824 870 341 448   6 742 795
  293 815 586 708 605 852 985 441 702 744 406 581 386 866 184   9 337 368
  115 559 918 478 922 825 798 505 785 511 144 186 134 711 689 571 208 304
  368 319 457 788 403 625 770 258 486 737 523 133 841  28  54  63 729 229
   97  89 196 485 493 463 785 353 764 701 326 769 603 194 125 396 316 559
  776 950 241 895  97 729 224 570  68 429]]


In [25]:
outputs = []
outputs.append(grpcclient.InferRequestedOutput('OUTPUT__0'))

In [26]:

def infer_triton_client(model_name, inputs, outputs):
    # Test with outputs
    results = triton_client.infer(model_name=model_name,
                                    inputs=inputs,
                                    outputs=outputs,
                                    headers={'test': '1'})

    # Get the output arrays from the results
    output0_data = results.as_numpy('OUTPUT__0')
    print("#### output #####")
    print(output0_data.shape)
    
    return None



In [27]:
# model_name = "ncf_food_model"
infer_triton_client(food_model_name, inputs, outputs)

#### output #####
(1, 100, 1)


In [28]:
# model_name = "ncf_fashion_model"
infer_triton_client(fashion_model_name, inputs, outputs)

#### output #####
(1, 100, 1)


## 6.0. 도커에서의 실행 테스트는 아래와 같은 순서로 진행 함.

#### (0) Triton Client 초기화
```
from triton_util import setup_triton_client
triton_client, grpcclient = setup_triton_client()
```

#### (1) 터미널 실행
![terminal.png](img/terminal.png)

#### (2) Triton 도커 컨테이너 실행
- 위의 터미널에 아래와 같이 명령어를 하나씩 실행 하세요.
```
cd /home/ec2-user/SageMaker/Neural-Collaborative-Filtering-On-SageMaker/2_Triton_Inference

docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v `pwd`/triton-multi-docker-serve-pt:/models nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models --log-verbose=3 --log-info=1 --log-warning=1 --log-error=1
```
#### (3) Triton 클라이언트로 추론 실행
#### (4) 도커 중단 및 삭제
```
docker rm -f $(docker ps -qa)
```

## 6.1. Triton Client 초기화

In [29]:
from triton_util import setup_triton_client
triton_client, grpcclient = setup_triton_client()

## 6.2. !!! #### 터미널에 "Triton 도커 컨테이너 실행" 을 해주세요. ### !!!

## 6.3. 입력 payload 생성

In [30]:
def create_client_payload():
    inputs = []

    inputs.append(grpcclient.InferInput('INPUT__0', [1,100], "INT32"))
    inputs.append(grpcclient.InferInput('INPUT__1', [1,100], "INT32"))

    # user
    input0_data = np.zeros((1,100)).astype(np.int32)
    inputs[0].set_data_from_numpy(input0_data)

    # item
    input1_data = np.random.randint(low=1, high=1000, size=(1,100)).astype(np.int32)
    inputs[1].set_data_from_numpy(input1_data)

    print("input0_data: \n",input0_data) 
    print("input1_data: \n",input1_data) 
    
    return inputs



In [31]:
inputs = create_client_payload()


input0_data: 
 [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
input1_data: 
 [[109 627 929 672 292 694 498 605 741 222 857 476 475 144 826 537 188 615
  619 105 735 618 320 205 689 324 123  46 675 279 457 718 270 126  34 277
  867  96 534 784 215 648 321 303 973 482 591 901 341 519  85 681 670 480
  762 692 896 803 902 269 337 349 964 922 563 864 636 584 567  82 729 609
  853 479 239 196 218 661 501  72 406 192 656 287 825  60 622 790  61 283
  839 563 430 885 157  99 947 617 643 358]]


## 6.4. 출력 변수 생성

In [32]:
outputs = []
outputs.append(grpcclient.InferRequestedOutput('OUTPUT__0'))


## 6.5. Triton에 추론 요청


In [33]:
from triton_util import infer_triton_client

infer_triton_client(triton_client, food_model_name, inputs, outputs)

#### output #####
(1, 100, 1)


In [34]:
infer_triton_client(triton_client, fashion_model_name, inputs, outputs)

#### output #####
(1, 100, 1)


# 9. 변수 저장

In [35]:
%store model_serving_folder
%store food_model_name
%store fashion_model_name


Stored 'model_serving_folder' (str)
Stored 'food_model_name' (str)
Stored 'fashion_model_name' (str)
