In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, AutoModelForSeq2SeqLM, BitsAndBytesConfig
import os, torch

In [2]:
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
# 저장된 모델과 토크나이저 로드
model = AutoModelForCausalLM.from_pretrained("LLama_fine_tuned_model")
tokenizer = AutoTokenizer.from_pretrained("LLama_fine_tuned_model")

In [4]:
# 출처 조선일보 탑픽
document = f"""

"""

pipe = pipeline("text-generation", 
                model=model, 
                tokenizer=tokenizer, 
                max_new_tokens=256,
                device_map="auto",
                )

messages = [
    {"role": "user", "content": f"""
    Below is a Korean document. Your task is to:
    1. Translate it into English.
    2. Reformat it into a concise and AI-friendly English prompt.

    Document:
    {document}
    """},
]

prompt = pipe.tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
)

outputs = pipe(
    prompt,
    do_sample=True,
    temperature=0.4,
    top_k=50,
    top_p=0.95,
    add_special_tokens=True,
    eos_token_id = [ # eos_token_id를 지정하지 않으면 생성 토큰 반복
        pipe.tokenizer.eos_token_id,
        pipe.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
)

Device set to use cuda:0


In [8]:
torch.cuda.empty_cache()

In [9]:
generated_text = outputs[0]['generated_text'][len(prompt):]
print(generated_text)

Here are the translations and reformatted prompts:

**Translation:**

The South Korean government's detention order for President Moon Jae-in and related law enforcement agencies, the National Intelligence Service Bureau Special Investigation Unit, the Presidential Security Office, and the Presidential Protection Office, held a meeting on 14th to discuss the impact of the detention order on the execution of the plan. The meeting concluded that the meeting with the Presidential Protection Office did not affect the execution of the detention order.

The head of the Public Security Office stated that the meeting with the Presidential Protection Office was to request cooperation to ensure safe and peaceful execution of the detention order, but the Presidential Protection Office did not provide a clear answer to the request. The meeting did not discuss the timing of the second detention order or the duration of its execution. The head of the Public Security Office also stated that the meeti

아래 코드는 번역 전용 NLP 코드 아직 학습중

In [6]:
TRANS_MODEL = "Helsinki-NLP/opus-mt-ko-en"
# BitsAndBytes 설정: 양자화된 모델을 GPU에 로드
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # 4-bit 양자화
    bnb_4bit_quant_type="nf4",  # nf4 양자화
    bnb_4bit_use_double_quant=True,  # 이중 양자화 사용
    bnb_4bit_compute_dtype=torch.bfloat16,  # bfloat16을 사용
    llm_int8_enable_fp32_cpu_offload=True  # CPU 오프로드 활성화
)

Transtokenizer = AutoTokenizer.from_pretrained(TRANS_MODEL)

# EOS 토큰을 패딩 토큰으로 설정 (필요 시)
tokenizer.pad_token = tokenizer.eos_token

# Helsinki-NLP 모델과 토크나이저 로드
Transmodel = AutoModelForCausalLM.from_pretrained(
    TRANS_MODEL,
    quantization_config=bnb_config,
    #device_map="auto"  # 자동으로 GPU에 모델 할당
)

`low_cpu_mem_usage` was None, now default to True since model is quantized.


In [None]:
# 번역 파이프라인 설정
translator = pipeline("translation", model=Transmodel, tokenizer=Transtokenizer, device=0)

# 번역 실행
generated_text = "예시 텍스트를 번역합니다."  # 실제 입력 텍스트
translation = translator(generated_text, max_length=256, batch_size=1)

# 결과 출력
print(translation)