## 위로봇 오복이 모델 프로세스

### Base Model Load
 - 출처 : https://github.com/snunlp/KR-SBERT

In [7]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('snunlp/KR-SBERT-V40K-klueNLI-augSTS')

### 데이터 불러오기

In [8]:
df = pd.read_excel("../data/base_datasets.xlsx")

### 챗봇 테스트

In [None]:
query_embeddings = model.encode(
    df['user'].tolist(),
    show_progress_bar=True,
    normalize_embeddings=True,
    convert_to_numpy=True
)

In [None]:
query = "너무 힘들다 요즘"
query_embedding = model.encode(query, normalize_embeddings=True)

top_k = min(5, len(df))
cos_scores = util.pytorch_cos_sim(query_embedding, query_embeddings)[0]
top_results = torch.topk(cos_scores, k=top_k)

print(f"입력 문장: {query}")
print(f"<입력 문장과 유사한 {top_k} 개의 문장>")

for i, (score, idx) in enumerate(zip(top_results[0], top_results[1])):
    print(f"{i+1}: {df.loc[int(idx)]['system']} {'(유사도: {:.4f})'.format(score)}")

### Sbert 모델 ONNX 양자화(quantization) 

#### Onnx 모델로 변환

In [4]:
from pathlib import Path

from transformers.convert_graph_to_onnx import convert
convert(framework="pt", model="snunlp/KR-SBERT-V40K-klueNLI-augSTS", output=Path("onnx_models/sbert-model.onnx"), opset=11)



ONNX opset version set to: 11
Loading pipeline (model: snunlp/KR-SBERT-V40K-klueNLI-augSTS, tokenizer: snunlp/KR-SBERT-V40K-klueNLI-augSTS)
Creating folder onnx_models
Using framework PyTorch: 2.0.0
Found input input_ids with shape: {0: 'batch', 1: 'sequence'}
Found input token_type_ids with shape: {0: 'batch', 1: 'sequence'}
Found input attention_mask with shape: {0: 'batch', 1: 'sequence'}
Found output output_0 with shape: {0: 'batch', 1: 'sequence'}
Found output output_1 with shape: {0: 'batch'}
Ensuring inputs are in correct order
position_ids is not present in the generated input list.
Generated inputs order: ['input_ids', 'attention_mask', 'token_type_ids']
verbose: False, log level: Level.ERROR



#### Onnx 모델 Uint8(0~255)로 가중치(Weight) 양자화

In [5]:
from onnxruntime.quantization import quantize_dynamic, QuantType
quantize_dynamic("onnx_models/sbert-model.onnx", "onnx_models/sbert-model_uint8.onnx", 
                 weight_type=QuantType.QUInt8)

Ignore MatMul due to non constant B: /[/encoder/layer.0/attention/self/MatMul]
Ignore MatMul due to non constant B: /[/encoder/layer.0/attention/self/MatMul_1]
Ignore MatMul due to non constant B: /[/encoder/layer.1/attention/self/MatMul]
Ignore MatMul due to non constant B: /[/encoder/layer.1/attention/self/MatMul_1]
Ignore MatMul due to non constant B: /[/encoder/layer.2/attention/self/MatMul]
Ignore MatMul due to non constant B: /[/encoder/layer.2/attention/self/MatMul_1]
Ignore MatMul due to non constant B: /[/encoder/layer.3/attention/self/MatMul]
Ignore MatMul due to non constant B: /[/encoder/layer.3/attention/self/MatMul_1]
Ignore MatMul due to non constant B: /[/encoder/layer.4/attention/self/MatMul]
Ignore MatMul due to non constant B: /[/encoder/layer.4/attention/self/MatMul_1]
Ignore MatMul due to non constant B: /[/encoder/layer.5/attention/self/MatMul]
Ignore MatMul due to non constant B: /[/encoder/layer.5/attention/self/MatMul_1]
Ignore MatMul due to non constant B: /[/

### Onnx 모델로 "user"질문 임베딩

In [1]:
from onnxruntime import InferenceSession
from transformers import AutoTokenizer
import torch
from tqdm import tqdm

tokenizer = AutoTokenizer.from_pretrained("snunlp/KR-SBERT-V40K-klueNLI-augSTS")
sess = InferenceSession("./onnx_models/sbert-model_uint8.onnx" , providers=["CPUExecutionProvider"])

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def mean_pooling(model_output, attention_mask):
    model_output = torch.from_numpy(model_output[0])
    # First element of model_output contains all token embeddings
    token_embeddings = model_output
    attention_mask = torch.from_numpy(attention_mask)
    input_mask_expanded = attention_mask.unsqueeze(
        -1).expand(token_embeddings.size())
    sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
    sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    return sum_embeddings / sum_mask, input_mask_expanded, sum_mask

In [5]:
def embedding_query(query: str, normalize_embeddings=False) -> np.ndarray:
    # user turn sequence to query embedding
    model_inputs = tokenizer(query, return_tensors="pt")
    inputs_onnx = {k: v.cpu().detach().numpy()
                   for k, v in model_inputs.items()}
    sequence = sess.run(None, inputs_onnx)
    query_embedding = mean_pooling(
        sequence, inputs_onnx["attention_mask"])[0][0]

    if normalize_embeddings:
        query_embedding = query_embedding / \
            np.linalg.norm(query_embedding)

    return query_embedding.numpy()

In [9]:
onnx_embeddings = [ embedding_query(sen, normalize_embeddings=True) for sen in tqdm(df['user'].tolist())]

100%|█████████████████████████████████████| 99203/99203 [22:18<00:00, 74.13it/s]


In [10]:
np.save("onnx_embeddings.npy",onnx_embeddings)

### Faiss 벡터 양자화(PQ)

In [11]:
import faiss

In [12]:
embeddings = np.load("./onnx_embeddings.npy")

# IndexPQ 생성
d = embeddings.shape[1]
nbits = 8  # 각 부분벡터의 비트 수
m = 768  # 분할 수

# dot product 거리 측정을 사용하는 벡터 인코더
index = faiss.IndexPQ(d, m, nbits, faiss.METRIC_INNER_PRODUCT)  # PQ 색인 생성
index.train(embeddings)  # 색인 훈련
index.add(embeddings)  # 데이터 추가

In [13]:
# index 저장
faiss.write_index(index, "faiss_onnx_uint8")

### 챗봇 테스트

In [16]:
def reply(query: str):
    embedding = np.expand_dims(embedding_query(query, normalize_embeddings=True), axis=0)
    D, I = index.search(embedding, 5)
    return df.loc[I[0]].system

In [17]:
reply("아 너무 힘들다 쉬고 싶어")

91502                 00님이 제대로 휴식을 취하지 못하고 계신 거 같아 걱정스러워요.
90957    피곤해 보여서 걱정이에요. 고른 영양 섭취와 운동 등으로 기초 체력을 길러 보시는 ...
88786    피로감이 있으시군요. 잘 쉬는 게 생각보다 참 쉽지 않은 것 같아요. 마음을 조금 ...
88240                        피로하면 몸뿐만 아니라 마음도 괴로워지는 거 같아요.
17730                        00님이 기운이 없어 보여서 저도 기분이 가라앉아요.
Name: system, dtype: object