# Server Resnet50 on Trition Docker

## 1.1. 기본 세팅
사용하는 패키지는 import 시점에 다시 재로딩 합니다.

In [12]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('./utils')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# 2. Resnet50 Model 준비

In [3]:
import torch
import torchvision.models as models
import os

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using {} device".format(device))




Using cuda device




Saved workspace/resnet50-jit.pt


In [10]:
import torch


def trace_model(model, dummy_inputs, trace_model_name, workspace_folder):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print("Using {} device".format(device))

    os.makedirs(workspace_folder, exist_ok=True)
    model_path = os.path.join(workspace_folder, trace_model_name)

    # trace model
    model_jit = torch.jit.script(model)
    # Save model
    model_jit.save(model_path)
    print("Saved {}".format(model_path))

    print("#### Load Test ####")    
    loaded_m = torch.jit.load(model_path)    
    print(loaded_m.code)    

    dummy_inputs = dummy_inputs.to(device)
    result = loaded_m(dummy_inputs)
    print("Result : \n", result.shape)

    return model_path


# Load model
resnet50 = models.resnet50(pretrained=True)
resnet50 = resnet50.eval()
resnet50.to(device)

# Create dummy input
dummy_input = torch.randn(1, 3, 224, 224)

workspace_folder = "workspace"
model_path = trace_model(model=resnet50, dummy_inputs=dummy_input, trace_model_name="resnet50-jit.pt", workspace_folder=workspace_folder)




Using cuda device
Saved workspace/resnet50-jit.pt
#### Load Test ####
def forward(self,
    x: Tensor) -> Tensor:
  return (self)._forward_impl(x, )

Result : 
 torch.Size([1, 1000])


# 3. Trition 서빙 준비

## 3.1. Torch Script 으로 변환

## 3.2.config.pbtxt 생성

In [11]:
%%writefile workspace/config.pbtxt
name: "resnet"
platform: "pytorch_libtorch"
max_batch_size: 128
input {
  name: "INPUT__0"
  data_type: TYPE_FP32
  dims: 3
  dims: 224
  dims: 224
}
output {
  name: "OUTPUT__0"
  data_type: TYPE_FP32
  dims: 1000
}

Writing workspace/config.pbtxt


# 4. 아티펙트 패키징

## 모델 리파지토리 폴더 구조
```
model_serving_folder
    - model_name
        - version_number
            - model file
        - config file

# Example
hello-serve-pt
    - hello
        - 1
            - model.pt
        - config.pbtxt

```


## 4.1. 아티펙트 폴더 생성 

In [16]:
import os
from utils.triton_util import make_folder_structure, copy_artifact, remove_folder

# triton-hello-serve-pt 폴더 생성
model_serving_folder = 'triton-serve-jit'
model_name = "resnet"
make_folder_structure(model_serving_folder, model_name=model_name)

model_config_path = os.path.join(workspace_folder, 'config.pbtxt')

 #copy_artifact(model_serving_folder, model_name, trace_model_name, model_config)
copy_artifact(model_serving_folder=model_serving_folder, 
            model_name=model_name, 
            model_artifact=model_path, 
            config=model_config_path)

triton-serve-jit:
resnet
resnet50-jit.pt

triton-serve-jit/resnet:
1
config.pbtxt

triton-serve-jit/resnet/1:
model.pt

triton-serve-jit/resnet50-jit.pt:
1

triton-serve-jit/resnet50-jit.pt/1:


### 폴더 삭제
- 필요시 주석 제거하고 사용하세요.

In [17]:
# model_serving_folder = 'triton-serve-jit'
# remove_folder(model_serving_folder)

# 5. 로컬 도커에서 실행 테스트

## 5.0. 도커에서의 실행 테스트는 아래와 같은 순서로 진행 함.

#### (0) Triton Client 초기화
```
from triton_util import setup_triton_client
triton_client, grpcclient = setup_triton_client()
```

#### (1) 터미널 실행

#### (2) Triton 도커 컨테이너 실행
- 위의 터미널에 아래와 같이 명령어를 하나씩 실행 하세요.
```
# Move to current folder (e.g.: /home/ec2-user/SageMaker/lab/00-trition-tts-vits/ )

docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v `pwd`/triton-serve-jit:/models nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models --log-verbose=3 --log-info=1 --log-warning=1 --log-error=1
```
#### (3) Triton 클라이언트로 추론 실행
#### (4) 도커 관련 유용한 명령어
```
docker rm -f $(docker ps -qa)

# 도커 실행하여 들어가 보기
docker run -it --entrypoint /bin/bash nvcr.io/nvidia/tritonserver:22.08-py3
# 실행중인 도커 컨테이너 들어가기
docker exec -it <container_name_or_id> /bin/bash


```

## 5.1. Triton Client 초기화

In [19]:
from triton_util import setup_triton_client
triton_client, grpcclient = setup_triton_client()

## 5.2. !!! #### 터미널에 "Triton 도커 컨테이너 실행" 을 해주세요. ### !!!

## 5.3. 입력 payload 생성

In [20]:
import numpy as np 

def create_client_payload():
    inputs = []

    inputs.append(grpcclient.InferInput('INPUT__0', [1,3,224,224], "FP32"))

    input0_data = np.random.randn(1,3,224,224).astype(np.float32)
    # Initialize the data
    inputs[0].set_data_from_numpy(input0_data)

    print("input0_data: \n",input0_data.shape) 

    
    return inputs



In [21]:
inputs = create_client_payload()


input0_data: 
 (1, 3, 224, 224)


## 5.4. 출력 변수 생성

In [22]:
outputs = []
outputs.append(grpcclient.InferRequestedOutput('OUTPUT__0'))


## 5.5. Triton에 추론 요청


In [23]:
from triton_util import infer_triton_client

infer_triton_client(triton_client, model_name, inputs, outputs)

#### output #####
(1, 1000)
#### output values #####
[[-4.53453898e-01  1.44875988e-01  6.53280094e-02 -7.35980272e-01
   6.77011192e-01 -1.40235722e+00 -2.31226757e-02 -3.20033848e-01
   3.56744707e-01 -1.38171327e+00 -1.98832288e-01  1.18865001e+00
  -1.37397915e-01 -2.82973945e-01 -1.20420933e+00 -9.63679492e-01
  -1.16036463e+00 -9.04680610e-01  5.16964912e-01  2.78983235e-01
  -7.12347865e-01  8.58680233e-02 -4.24934715e-01  2.47784704e-01
  -2.43859857e-01 -1.19270217e+00 -1.75863624e+00 -8.29721570e-01
  -1.41527653e+00 -1.92971873e+00 -2.24917459e+00 -9.59906876e-02
  -3.48867059e+00 -2.00563169e+00 -5.23417711e-01 -2.02088261e+00
   5.43032765e-01 -2.48599696e+00  5.34985140e-02  4.10656452e-01
   8.83822665e-02 -2.25385737e+00 -3.69369648e-02 -1.41592175e-01
   9.30102170e-03  7.20523670e-02  3.58229160e-01 -2.18433952e+00
   1.63390779e+00 -1.97294235e+00  1.44157231e+00 -2.91571498e+00
  -1.74786329e+00 -4.51060295e-01  1.06025004e+00 -5.80334663e-01
   3.11141014e-01 -2.25