## 번역 성능 평가 

Kor - Eng - Jpn

In [1]:
import pandas as pd
import numpy as np
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.translate.meteor_score import meteor_score
from bert_score import score

## 번역 시스템 준비 

In [2]:
import os
from furiosa_llm import LLM, SamplingParams

    PyTorch 2.5.1+cu121 with CUDA 1201 (you have 2.1.0+cu121)
    Python  3.10.15 (you have 3.10.15)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details
Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [3]:
os.environ["RUST_BACKTRACE"] = "full"
# Loading an artifact of Llama 3.1 8B Instruct model
path = "/home/elicer/nopro/gitRepo/shinhan/renegade/Llama-3.1-8B-Instruct"
llm = LLM.from_artifacts(path, devices="npu:1:*")

# You can specify various parameters for text generation
sampling_params = SamplingParams(temperature=0, max_tokens=200)

INFO:2024-12-28 23:06:02 Prefill buckets: [Bucket(batch_size=1, attention_size=512), Bucket(batch_size=1, attention_size=1024)]
INFO:2024-12-28 23:06:02 Decode buckets: [Bucket(batch_size=64, attention_size=2048), Bucket(batch_size=128, attention_size=2048)]
INFO:2024-12-28 23:06:02 For some LLaMA V1 models, initializing the fast tokenizer may take a long time. To reduce the initialization time, consider using 'hf-internal-testing/llama-tokenizer' instead of the original tokenizer.


In [4]:
def apply_template(prompt):
    return f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>

{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

In [5]:
prompt = apply_template("What is quantization in LLM models?")
async for output_txt in llm.stream_generate(prompt, sampling_params):
    print(output_txt, end="", flush=True)



In Large Language Models (LLM), "quantization" refers to the process of reducing the precision of model weights from 32-bit floating-point numbers (e.g., float32) to lower-precision data types, such as 8-bit integers (e.g., int8) or 16-bit integers (e.g., int16). This technique is often used to reduce the memory footprint of large LLMs and accelerate inference on hardware with limited precision arithmetic capabilities.

Quantization involves reducing the number of bits used to represent each model weight, effectively reducing the number of possible values each weight can take. This process relies on techniques such as:

1. **Weight clustering**: Combining similar weights into a single set of representative values, which are then scaled and shifted to minimize the loss of accuracy.
2. **Quantization aware training**: Training the model to learn the optimal values for quantized weights, rather than relying on a post-training quantization step.

There are several quantization techniques

In [7]:
def apply_translation_template(source_lang, target_lang, source_text):
    prompt = f"""This is an {source_lang} to {target_lang} translation, please provide the {target_lang} translation for this text in as polite a tone as possible. \
Do not provide any explanations or text apart from the translation.
The translation result must be written in {target_lang}.

{source_lang}: {source_text}

{target_lang}:"""
    return f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>

You are a helpful assistant specialized in translation tasks.<|eot_id|><|start_header_id|>user<|end_header_id|>

{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

# 프롬프트 생성
source_lang = "English"
target_lang = "Korean"
source_text = "What is quantization in LLM models?"

prompt = apply_translation_template(source_lang, target_lang, source_text)

# 실행 코드
async for output_txt in llm.stream_generate(prompt, sampling_params):
    print(output_txt, end="", flush=True)



LLM 모델의 quantization란 무엇입니까?

In [16]:
output_txt = llm.generate(prompt, sampling_params)
print(output_txt.outputs[0].text)



LLM 모델의 quantization란 무엇입니까?


In [33]:
def translate(source_text, source_lang, target_lang):
    prompt = apply_translation_template(source_lang, target_lang, source_text)
    output_txt = llm.generate(prompt, sampling_params)
    return output_txt.outputs[0].text[2:]

## 평가 시스템 준비

In [20]:
# 언어별 토크나이저
from nltk.tokenize import word_tokenize
from janome.tokenizer import Tokenizer
from kiwipiepy import Kiwi

def tokenize(text, lang):
    if lang == "eng":
        # 영어: NLTK word_tokenize
        return word_tokenize(text)
    elif lang == "kor":
        # 한국어: Kiwi 형태소 분석기
        kiwi = Kiwi()
        tokens = kiwi.tokenize(text)
        return [token.form for token in tokens]  # 형태소만 추출
    elif lang == "jpn":
        # 일본어: Janome 형태소 분석기
        tokenizer = Tokenizer()
        return [token.surface for token in tokenizer.tokenize(text)]
    else:
        raise ValueError(f"Unsupported language: {lang}")

In [18]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.translate.meteor_score import meteor_score
from bert_score import score
import numpy as np

# 평가 함수
def evaluate(
    reference: str,
    candidate: str,
    tgt_lang: str,
    bert_model="microsoft/deberta-xlarge-mnli",
):
    # 텍스트를 토큰화
    reference_tokens = tokenize(reference, tgt_lang)
    candidate_tokens = tokenize(candidate, tgt_lang)

    # BLEU 점수 계산 (스무딩 적용)
    bleu = sentence_bleu(
        [reference_tokens],
        candidate_tokens,
        smoothing_function=SmoothingFunction().method1,
    )

    # METEOR 점수 계산 (리스트 형태로 전달)
    meteor = meteor_score([reference_tokens], candidate_tokens)

    # BERTScore 계산
    P, R, F1 = score(
        [candidate],
        [reference],
        lang=tgt_lang,
        model_type=bert_model,
        device="cuda",  # GPU 사용
    )

    return {
        "BLEU": round(bleu, 4),
        "METEOR": round(meteor, 4),
        "BERT": round(F1.item(), 4),
    }

데이터셋 FLORES-Plus 준비

In [19]:
# 텍스트 파일 로더
def load_text_file(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        lines = f.readlines()
    return [line.strip() for line in lines]

# 데이터 위치 
data_dir = "/home/elicer/Jun/llm-rag-chatbot/data/flores/"

data_eng = load_text_file(f"{data_dir}/devtest.eng_Latn")
data_kor = load_text_file(f"{data_dir}/devtest.kor_Hang")
data_jpn = load_text_file(f"{data_dir}/devtest.jpn_Jpan")

print(data_eng[0])
print(data_kor[0])
print(data_jpn[0])

"We now have 4-month-old mice that are non-diabetic that used to be diabetic," he added.
"그는 ""현재 4개월 된 당뇨병에서 치료된 생쥐가 있다""고 덧붙였다."
「我々が飼っている生後4か月のマウスはかつて糖尿病でしたが現在は糖尿病ではない、」と彼は付け加えました。


In [25]:
test_idx = 1
print(f'original: {data_kor[test_idx]}')
result = translate(data_kor[test_idx], 'Korean', 'Enlgish')
print(f'translated: {result}')

# eval_result = evaluate(data_eng[test_idx], result, 'eng')
# print(eval_result)

original: 노바스코샤주 핼리팩스의 댈하우지대학교 의과 교수이자 캐나다 당뇨 협회 임상과학부 의장인 Ehud Ur 박사는 이 연구가 아직 초기 단계라고 경고했습니다.
translated: 

Dr. Ehud Ur, a professor of medicine at Dalhousie University in Halifax, Nova Scotia, and a member of the Clinical Science Committee of the Canadian Diabetes Association, cautioned that the study is still in its early stages.


In [29]:
result[2:]

'Dr. Ehud Ur, a professor of medicine at Dalhousie University in Halifax, Nova Scotia, and a member of the Clinical Science Committee of the Canadian Diabetes Association, cautioned that the study is still in its early stages.'

In [32]:
print(f'original: {data_eng[1]}')
result = translate(data_eng[1], 'Enlgish', 'Korean')
print(f'translated: {result}')

# eval_result = evaluate(data_kor[1], result, 'Korean')
# print(eval_result)

original: Dr. Ehud Ur, professor of medicine at Dalhousie University in Halifax, Nova Scotia and chair of the clinical and scientific division of the Canadian Diabetes Association cautioned that the research is still in its early days.
translated: 

Dr. Ehud Ur, Halifax의 Dalhousie University의 의학 교수와 캐나다糖尿病協會의临床과과 과학부의 의장은 연구는 아직 초기 단계에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에에


In [15]:
print(f'original: {data_eng[1]}')
result = translate(data_eng[1], 'eng', 'jpn', prompt)
print(f'translated: {result}')

eval_result = evaluate(data_jpn[1], result, 'jpn')
print(eval_result)

original: Dr. Ehud Ur, professor of medicine at Dalhousie University in Halifax, Nova Scotia and chair of the clinical and scientific division of the Canadian Diabetes Association cautioned that the research is still in its early days.
translated: ドクター・エフード・ウール博士は、ハリファックスにあるノバスコティア州のダルハウス大学医学部教授であり、カナダ糖尿病協会臨床科学部門委員長として、研究がまだ初期段階であることを注意した。
{'BLEU': 0.2401, 'METEOR': 0.5821, 'BERT': 0.867}


## 번역 평가 코드 

In [34]:
import time
import pandas as pd

nTest = 200

# 언어 쌍 데이터 (명확히 영어, 일본어, 한국어로 명시)
pairs = [
    ("English", "Korean", data_eng[:nTest], data_kor[:nTest]),
    ("English", "Japanese", data_eng[:nTest], data_jpn[:nTest]),
    ("Korean", "English", data_kor[:nTest], data_eng[:nTest]),
    ("Korean", "Japanese", data_kor[:nTest], data_jpn[:nTest]),
    ("Japanese", "English", data_jpn[:nTest], data_eng[:nTest]),
    ("Japanese", "Korean", data_jpn[:nTest], data_kor[:nTest]),
]

# 평가 수행 및 저장 디렉토리
save_dir = "../../data/translate_rngd_flores"
all_results = []  # 모든 결과를 모아서 저장할 리스트

for source_lang, target_lang, sources, references in pairs:
    pair_results = []  # 현재 언어 쌍의 결과 저장
    ic = 0
    for source in sources:
        if ic % 20 == 0:
            print(ic)
        ic += 1

        # 번역 및 시간 측정
        start_time = time.time()
        candidate = translate(source, source_lang, target_lang)
        elapsed_time = time.time() - start_time  # 번역 소요 시간

        # 번역 결과 저장
        pair_results.append({
            "Source Language": source_lang,
            "Target Language": target_lang,
            "Source": source,
            "Candidate": candidate,
            "Translation Time (s)": round(elapsed_time, 4),  # 번역 소요 시간 추가
        })

    # 현재 언어 쌍 결과를 데이터프레임으로 변환 및 저장
    pair_df = pd.DataFrame(pair_results)
    pair_filename = f"{save_dir}/Translate_results_{source_lang}_to_{target_lang}.csv"
    pair_df.to_csv(pair_filename, index=False, encoding="utf-8")
    print(f"Results for {source_lang} to {target_lang} saved: {pair_filename}")

    # 전체 결과 통합
    all_results.extend(pair_results)


# 모든 결과를 통합한 데이터프레임 생성 및 저장
final_df = pd.DataFrame(all_results)
final_filename = f"{save_dir}/Translate_results_flores_all.csv"
final_df.to_csv(final_filename, index=False, encoding="utf-8")
print(f"All results saved: {final_filename}")

KeyboardInterrupt: 