# 1. polglot-ko-1.3b를 squarelike/sharegpt_deepl_ko_translation로 파인 튜닝한 모델

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "aeolian83/Gugugo_for_DnD_v0.62"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map="auto")

Downloading (…)okenizer_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.65M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/730 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/2.66G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

In [2]:
model.eval()
model.config.use_cache = True  # silence the warnings. Please re-enable for inference!

In [3]:
from transformers import StoppingCriteria, StoppingCriteriaList

class StoppingCriteriaSub(StoppingCriteria):

    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = [stop for stop in stops]

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
        for stop in self.stops:
            if torch.all((stop == input_ids[0][-len(stop):])).item():
                return True

        return False

stop_words = ["</끝>"]
stop_words_ids = [tokenizer(stop_word, return_tensors='pt')['input_ids'].squeeze() for stop_word in stop_words]
stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])

In [4]:
def gen(lan="en", x=""):
    if (lan == "ko"):
        prompt = f"### 한국어: {x}</끝>\n### 영어:"
    else:
        prompt = f"### 영어: {x}</끝>\n### 한국어:"
    gened = model.generate(
        **tokenizer(
            prompt,
            return_tensors='pt',
            return_token_type_ids=False
        ),
        max_new_tokens=2048,
        temperature=0.001,
        no_repeat_ngram_size=10,
        early_stopping=True,
        eos_token_id=2,
        stopping_criteria=stopping_criteria
    )
    return tokenizer.decode(gened[0]).replace(prompt+" ", "")

In [5]:
gen(lan="en", x="The Steward informed me that a mysterious package has somehow arrived on my ship. I should speak with her to learn more.")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'관리자는 내게 신비한 상자가 내 배에 도착했다고 알려주었소. 더 자세히 알아보기 위해 그녀와 이야기해봐야겠소.</끝>'

In [6]:
gen(lan="en", x="I've found myself in some sort of tournament, replete with spectators. I'm told that &lt;b&gt;Humaire&lt;/b&gt; is the custodian of this place, and may be able to provide answers regarding the strange invitation. She is currently &lt;b&gt;topside,&lt;/b&gt; watching the events.")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'나는 어떤 식으로든 구경거리를 찾고 있습니다. 나는 &lt;b&gt;호모페어&lt;/b&gt)는 이 장소의 관리인이라고 하며, 이상한 초대에 대한 해답을 제공할 수 있을 것입니다. 그녀는 현재 &lt;b&gt;면의,&lt;/b&gtp;에 있습니다.</끝>'

In [7]:
gen(lan="en", x="&lt;b&gt;Humaire&lt;/b&gt; told me to speak with her in the &lt;b&gt;Hall of Memories&lt;/b&gt; on the &lt;b&gt;west side of the temple&lt;/b&gt; beneath the arena.")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'&lt;b&gt>Humaire&lt;/B&gt; told me to see her in the &lt;b;Hall of Memories&Lt;/b&gt; on at the &lt;b&gt.</끝>'

In [8]:
gen(lan="en", x="I've found myself in some sort of tournament, replete with spectators. I'm told that <b>Humaire</b> is the custodian of this place, and may be able to provide answers regarding the strange invitation. She is currently <b>topside,</b> watching the events.")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'나는 어떤 식으로든 구경꾼들과 함께 여행을 하고 있는 것 같습니다. <b>Humaire</B>는 이곳의 관리인이라고 하더군요. 이상한 초대에 대한 답을 얻을 수 있을지도 모릅니다. 그녀는 현재 동쪽에 있습니다.</끝>'

In [9]:
gen(lan="en", x="<b>Humaire</b> told me to speak with her in the <b>Hall of Memories</b> on the <b>west side of the temple</b> beneath the arena.")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'<b>휴마레</b>는 <b>서실</b>사원의 서쪽에 있는 <b>서실</b>, 말하도록 했다</끝>'