In [37]:
from langchain_community.vectorstores import FAISS
from langchain.docstore.document import Document
import bs4
import ssl
import urllib3
import pandas as pd

In [2]:
file_path = "./data/idiom_dict.txt"
with open(file_path, 'r', encoding='utf-8') as file:
    lines = file.readlines()
lines = [line.strip() for line in lines]

docs = [Document(page_content=f"{line}") for line in lines]

In [3]:
print(len(docs))

22138


In [4]:
print(docs[0].page_content)

가슴이 뜨겁다 > Have a passionate heart


In [5]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")

  embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-large")
  from .autonotebook import tqdm as notebook_tqdm


In [6]:
import os
faiss_index_path = "./embed_vector/multilingual-e5-large"
if os.path.exists(faiss_index_path):
    print("캐시된 벡터 로드...")
    vectorstore = FAISS.load_local(faiss_index_path, embeddings, allow_dangerous_deserialization=True)
else:
    print("새로운 벡터를 생성하고 저장...")
    vectorstore = FAISS.from_documents(documents=docs, embedding=embeddings)
    vectorstore.save_local(faiss_index_path)

캐시된 벡터 로드...


In [7]:
# ✅ FAISS 인덱스를 GPU로 변환 (공통 적용)
import faiss
try:
    # FAISS 인덱스를 GPU로 변환
    res = faiss.StandardGpuResources()
    gpu_index = faiss.index_cpu_to_gpu(res, 0, vectorstore.index)
    vectorstore.index = gpu_index
    print("GPU로 성공적으로 인덱스를 변환했습니다!")
    print(type(gpu_index))
except faiss.FaissException as e:
    print(f"FAISS 에러 발생: {e}")
    print("GPU 메모리가 부족하거나, 다른 이유로 변환에 실패했습니다.")

if isinstance(vectorstore.index, faiss.GpuIndex):  # GPU 인덱스이면
    print("🚀 FAISS 인덱스가 GPU에서 실행 중입니다!")
else:
    print("💻 FAISS 인덱스가 CPU에서 실행 중입니다!")

GPU로 성공적으로 인덱스를 변환했습니다!
<class 'faiss.swigfaiss.GpuIndexFlat'>
🚀 FAISS 인덱스가 GPU에서 실행 중입니다!


In [10]:
print(vectorstore.index.ntotal, vectorstore.index.d)

22138 1024


In [11]:
retriever = vectorstore.as_retriever(search_type='similarity', search_kwargs={'k':10})

In [12]:
print(retriever)

tags=['FAISS', 'HuggingFaceEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7fc05998d0f0> search_kwargs={'k': 10}


In [13]:
SYSTEM_PROMPT = {
            "role": "system",
            "content": [
                {
                    "type": "text",
                    "text": """You're an expert translator who translates Korean webtoon in English. Make sure the number of target sentences matches the number of source sentences. The result should be TSV formatted. 
            • Find a balance between staying true to the Korean meaning and keeping a natural flow. Don't be afraid to add to the text. Embellish it. 
            • Avoid translating word-for-word. Keep the general feeling and translate the text accordingly. 
            • Translate with an American audience in mind. This means easy-to-read, conversational English.""",
                }
            ],
        }

In [15]:
import re
def instruct_structure(prompt):
    input_text, output_text = prompt.split('### target')
    input_text = input_text.replace('### glossaries', '### glossary').replace('\n* ', '\n• ')
    input_text = re.sub(r"\[[^\]]+\] ", "[UNK] ", input_text)
    return input_text

In [16]:
project_id = "prod-ai-project"

from google.cloud import bigquery
client = bigquery.Client(project=project_id)
sql = """select series_id, episode_id, org_input_text, org_output_text, prompt 
        from webtoon_translation.structured_240820_ep_line
        where data_split = 'romance_valid'"""
df = client.query(sql).result().to_dataframe()
from tqdm import tqdm
tqdm.pandas()
df['prompt'] = df['prompt'].progress_apply(lambda x: instruct_structure(x))

100%|████████████████████████████████████████████████████████████████████████████████████████████████| 74/74 [00:00<00:00, 18487.02it/s]


In [47]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer, BitsAndBytesConfig, AutoConfig
from accelerate import Accelerator, infer_auto_device_map

LLAMA_PATH = 'model/dpo-240820-ep-line-merged'

# 4bit 퀀타이제이션 설정
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_storage=torch.bfloat16,
        llm_int8_enable_fp32_cpu_offload=True  # CPU 오프로딩 활성화
)
# 각 GPU에 최대 메모리 설정 (예시로 40GB씩 할당)
max_memory = {i: "40GB" for i in range(torch.cuda.device_count())}

# 모델의 설정을 먼저 불러오기
config = AutoConfig.from_pretrained(LLAMA_PATH)

from transformers.modeling_utils import init_empty_weights  # ✅ 올바른 import

# 아직 모델을 선언하지 않은 상태에서 `config`를 기반으로 빈 모델 생성
# 🚀 가중치를 전혀 로드하지 않는 완전 빈(empty) 모델 생성
with init_empty_weights():
    empty_model = LlamaForCausalLM(config)
print("빈 모델 선언 완료")

num_gpus = torch.cuda.device_count()

# **🚀 GPU 8대에 균등하게 분배하는 수동 device_map 생성**
device_map = {}

# 임베딩 & 출력 레이어는 GPU 0에 배치
device_map["model.embed_tokens"] = 1
device_map["lm_head"] = 2
# 126개 LlamaDecoderLayer를 8개의 GPU에 균등 분배
for i, layer in enumerate(empty_model.model.layers):
    assigned_gpu = i % num_gpus  # GPU 인덱스 0부터 7까지 순차적으로 할당
    device_map[f"model.layers.{i}"] = assigned_gpu

# 마지막 RMSNorm도 마지막 GPU로 배치
device_map["model.norm"] = 3
device_map["model.rotary_emb"] = 4  # Rotary Embedding도 마지막 GPU로

# 모델 로드 (`device_map` 적용)
model = LlamaForCausalLM.from_pretrained(
    LLAMA_PATH,
    quantization_config=bnb_config,
    device_map=device_map,  # 자동 분배된 device_map 적용
    attn_implementation="flash_attention_2"
)

from transformers import AutoTokenizer

TOKENIZER_PATH = 'model/dpo-240820-ep-line-merged'

# Tokenizer 로드
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_PATH)
tokenizer.pad_token = tokenizer.eos_token
# A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
tokenizer.padding_side = "left"

#tokenizer.save_pretrained('model/llama_405b_quantized')


빈 모델 선언 완료


Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████████████████| 191/191 [21:01<00:00,  6.60s/it]


In [64]:
data_idx = 3
data = df['prompt'][data_idx]
example = data.split("### source")[1].strip()
#print(example)

def retrieve(question):
    docs = retriever.invoke(question) #retriever로 문서 가져오고
    return docs #context와 question 포함한 딕셔너리 반환

findings = retrieve(example)
hint = data.split("### source")[0].strip()
for i in range(10):
    hint += '\n• '+findings[i].page_content
input_text = hint + '\n\n###source\n' + example
print(input_text)

### glossary
• 시그렌 (M): siegren
• 아벨 (M): abel
• 평민: commoner
• 제국: empire
• 피오나 (F): fiona
• 천년만년 살 것 같다 > Feel invincible
• 끝이 보이지 않다 > Seem endless
• 목숨이 경각에 달려 있다 > Be on the verge of death
• 쓸개가 빠졌다 > Be spineless
• 바늘 끝에 찔러도 피 한 방울 안 나겠다 > Cold-hearted
• 여우 같은 여자, 곰 같은 남자 > Clever woman, naive man
• 참을 인 세 번이면 살인도 면한다 > Patience solves everything
• 후환이 두렵다 > Fear future consequences
• 시소게임을 하다 > Be neck and neck
• 싸울아비 같은 > Brave and fearless

###source
000	none 이 녀석, 남자 주인공인 시그렌 아냐?!
001	none 원작 시작까지는 아직 6년이나 남았는데
002	none 왜 남주가 벌써 내 코앞에서 굴러다니고 있는 거지?
003	none 얘가 이 시기에 아벨을 만나는 거던가?
004	none 아니, 지금 이런 생각 할 때가 아니지!
005	none 이 녀석이 이 세계를 구원할 남주가 맞다면
006	none 이대로 날 구하고 사망하면 세계멸망이잖아!
007	none 뭘 해도 내가 최종보스야?!
008	none 여, 여기 빨리 의원 좀 불러 주세요!
009	none 이틀 뒤
010	none 흐음…
011	none 심각한 부상이지만….
012	none 다행히 큰 고비는 넘겼습니다.
013	none 정말요!
014	none 무슨 일 있으면 불러 주세요.
015	none 네!
016	none 다행이다~
017	none 세계 멸망의 위기는 넘겼어~!
018	none 그나저나 이렇게 보면
019	none 세계를 구할 영웅이라며 칭송받을 거라곤 안 믿기네.
020	none 이 시기면 시그렌 나이가 열

In [65]:
import re
def instruct_structure(prompt,system_prompt=
                       """You're an expert translator who translates Korean webtoon in English. Make sure the number of target sentences matches the number of source sentences. The result should be TSV formatted. 
    • Find a balance between staying true to the Korean meaning and keeping a natural flow. Don't be afraid to add to the text. Embellish it. 
    • Avoid translating word-for-word. Keep the general feeling and translate the text accordingly. 
    • Translate with an American audience in mind. This means easy-to-read, conversational English."""):

    return f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>
{input_text.strip()}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
    
sample = instruct_structure(input_text)
print(sample)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You're an expert translator who translates Korean webtoon in English. Make sure the number of target sentences matches the number of source sentences. The result should be TSV formatted. 
    • Find a balance between staying true to the Korean meaning and keeping a natural flow. Don't be afraid to add to the text. Embellish it. 
    • Avoid translating word-for-word. Keep the general feeling and translate the text accordingly. 
    • Translate with an American audience in mind. This means easy-to-read, conversational English.<|eot_id|><|start_header_id|>user<|end_header_id|>
### glossary
• 시그렌 (M): siegren
• 아벨 (M): abel
• 평민: commoner
• 제국: empire
• 피오나 (F): fiona
• 천년만년 살 것 같다 > Feel invincible
• 끝이 보이지 않다 > Seem endless
• 목숨이 경각에 달려 있다 > Be on the verge of death
• 쓸개가 빠졌다 > Be spineless
• 바늘 끝에 찔러도 피 한 방울 안 나겠다 > Cold-hearted
• 여우 같은 여자, 곰 같은 남자 > Clever woman, naive man
• 참을 인 세 번이면 살인도 면한다 > Patience solves everything
• 후

In [67]:
# 입력 변환 및 토큰화
inputs = tokenizer(sample, return_tensors="pt").to(model.device)

# 모델 추론
with torch.no_grad():
    output = model.generate(**inputs, 
                            max_length=4096,
                            do_sample=True,
                            temperature=0.1,
                            top_p=0.9,
                            top_k=30,
                            repetition_penalty=1.2
                           )

# 결과 출력
response = tokenizer.decode(output[0], skip_special_tokens=False)
print(response)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


<|begin_of_text|><|begin_of_text|><|start_header_id|>system<|end_header_id|>
You're an expert translator who translates Korean webtoon in English. Make sure the number of target sentences matches the number of source sentences. The result should be TSV formatted. 
    • Find a balance between staying true to the Korean meaning and keeping a natural flow. Don't be afraid to add to the text. Embellish it. 
    • Avoid translating word-for-word. Keep the general feeling and translate the text accordingly. 
    • Translate with an American audience in mind. This means easy-to-read, conversational English.<|eot_id|><|start_header_id|>user<|end_header_id|>
### glossary
• 시그렌 (M): siegren
• 아벨 (M): abel
• 평민: commoner
• 제국: empire
• 피오나 (F): fiona
• 천년만년 살 것 같다 > Feel invincible
• 끝이 보이지 않다 > Seem endless
• 목숨이 경각에 달려 있다 > Be on the verge of death
• 쓸개가 빠졌다 > Be spineless
• 바늘 끝에 찔러도 피 한 방울 안 나겠다 > Cold-hearted
• 여우 같은 여자, 곰 같은 남자 > Clever woman, naive man
• 참을 인 세 번이면 살인도 면한다 > Patience solv

In [69]:
print(sample)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>
You're an expert translator who translates Korean webtoon in English. Make sure the number of target sentences matches the number of source sentences. The result should be TSV formatted. 
    • Find a balance between staying true to the Korean meaning and keeping a natural flow. Don't be afraid to add to the text. Embellish it. 
    • Avoid translating word-for-word. Keep the general feeling and translate the text accordingly. 
    • Translate with an American audience in mind. This means easy-to-read, conversational English.<|eot_id|><|start_header_id|>user<|end_header_id|>
### glossary
• 시그렌 (M): siegren
• 아벨 (M): abel
• 평민: commoner
• 제국: empire
• 피오나 (F): fiona
• 천년만년 살 것 같다 > Feel invincible
• 끝이 보이지 않다 > Seem endless
• 목숨이 경각에 달려 있다 > Be on the verge of death
• 쓸개가 빠졌다 > Be spineless
• 바늘 끝에 찔러도 피 한 방울 안 나겠다 > Cold-hearted
• 여우 같은 여자, 곰 같은 남자 > Clever woman, naive man
• 참을 인 세 번이면 살인도 면한다 > Patience solves everything
• 후