# [모듈 1.1] Inference NCF on INF2

# 1. 환경 셋업

## 1.1. 기본 세팅
사용하는 패키지는 import 시점에 다시 재로딩 합니다.

In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('./src')

필요한 torch_neuronx 를 로딩 합니다.

In [2]:
import torch
import torch_neuronx

# 2. 훈련된 모델 로딩

## 훈련 모델 아티펙트 확인

- 이미 훈련된 파이토치로 훈련된 모델 아티텍트의 경로를 지정합니다.

In [3]:
artifact_path = 'models/NeuMF-end.pth'
print("model artifact is assigend from : ", artifact_path)

model artifact is assigend from :  models/NeuMF-end.pth


## 모델 로딩에 필요한 설정 파일 생성

- 모델 로딩시에 필요한 파라미터 사용 (기존의 값을 사용 함)

In [4]:
import json
import os
import config
from common_utils import save_json, load_json

class Params:
    def __init__(self):
        self.factor_num = 32
        self.num_layers = 3
        self.dropout = 0.0
                        
args = Params()
print("# of num_layers: ", args.num_layers)


# 모델 훈련시에 결정된 user, item 의 숫자
user_num = 6040  
item_num = 3706
print("user_num: ", user_num, " item_num: ", item_num)

model_config_dict = {
    'user_num': str(user_num),
    'item_num': str(item_num),
    'factor_num' : str(args.factor_num),
    'num_layers' : str(args.num_layers),
    'dropout' : str(args.dropout),
    'model_type': config.model
}

model_config_file = 'model_config.json'
model_config_file_path = os.path.join('src', model_config_file)

save_json(model_config_file_path, model_config_dict)
# model_config_dict = load_json(model_config_file_path)    
# model_config_dict

# of num_layers:  3
user_num:  6040  item_num:  3706
src/model_config.json is saved


'src/model_config.json'

## 모델 로딩
- 모델 로딩 함수 model_fn() 를 통하여 모델 로딩


In [5]:
from inference import model_fn

ncf_model = model_fn(config.model_path)

######## Staring model_fn() ###############
device:  cpu


# 3. 모델 컴파일

## 샘플 입력 생성

In [6]:
import numpy as np
import torch

def create_dummy_input(batch_size):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print("Using {} device".format(device))

    user_np = np.zeros((1,100)).astype(np.int32)
    item_np = np.random.randint(low=1, high=1000, size=(1,100)).astype(np.int32)

    return (
        torch.repeat_interleave(torch.from_numpy(user_np), batch_size, 0),
        torch.repeat_interleave(torch.from_numpy(item_np), batch_size, 0),
    )

dummy_inputs = create_dummy_input(batch_size=1)

print("type: ", type(dummy_inputs))
print("len: ", len(dummy_inputs))


Using cpu device
type:  <class 'tuple'>
len:  2


## Torch Script 으로 변환 (컴파일)

In [7]:
def convert_torch_script(model, dummy_inputs):
    # Compile the model for Neuron
    model_neuron = torch_neuronx.trace(model, dummy_inputs)
    
    return model_neuron

model_neuron = convert_torch_script(ncf_model, dummy_inputs)

#### prediction: 
 tensor([[[-2.8306],
         [-1.6553],
         [ 2.5198],
         [ 0.1828],
         [-1.6887],
         [-1.7654],
         [-0.2166],
         [-2.7551],
         [-2.4077],
         [-3.2328],
         [-1.1433],
         [-2.6161],
         [-3.3304],
         [ 0.2665],
         [-0.8025],
         [-1.6792],
         [-0.3755],
         [-0.1721],
         [-1.1990],
         [ 3.0670],
         [-1.9710],
         [ 1.1168],
         [-1.0715],
         [-2.0493],
         [-2.0830],
         [-1.3871],
         [-2.8947],
         [ 0.2034],
         [-1.7649],
         [ 1.8365],
         [-3.6234],
         [-2.7542],
         [-1.3663],
         [-0.1977],
         [-1.7464],
         [-3.9345],
         [-2.0422],
         [-0.5732],
         [-1.8859],
         [-1.6468],
         [-3.0406],
         [ 0.3931],
         [-2.0422],
         [ 0.4182],
         [-2.5493],
         [ 1.4564],
         [-4.1203],
         [ 2.5553],
         [-0.6861],
 

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  sys.exit(main())



# 4. 모델 추론

In [8]:
def extract_top_k(prediction, top_k = 10):
    prediction = torch.squeeze(prediction) # remove dimension
    _, indices = torch.topk(prediction, top_k)
    
    return indices

prediction = model_neuron(dummy_inputs[0],dummy_inputs[1])
print("type:prediction ", type(prediction))
print("type:prediction[0] ", type(prediction[0]))

recommended_item_index = extract_top_k(prediction[0], top_k = 10)
print("recommended_item_index: \n", recommended_item_index)

type:prediction  <class 'tuple'>
type:prediction[0]  <class 'torch.Tensor'>
recommended_item_index: 
 tensor([19, 47,  2, 49, 29, 52, 78, 45, 62, 95])
