In [1]:
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git

In [2]:
!nvidia-smi

Wed Aug 23 08:52:07 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.89.02    Driver Version: 528.49       CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:01:00.0  On |                  N/A |
|  0%   40C    P8    16W / 320W |   1078MiB / 16376MiB |     30%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel, PeftConfig

peft_model_id = "outputs/checkpoint-196000"
config = PeftConfig.from_pretrained(peft_model_id)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, quantization_config=bnb_config, device_map={"":0})
model = PeftModel.from_pretrained(model, peft_model_id)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)


  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 3/3 [00:04<00:00,  1.43s/it]


In [4]:
model.eval()
model.config.use_cache = True  # silence the warnings. Please re-enable for inference!

In [5]:
from transformers import StoppingCriteria, StoppingCriteriaList

class StoppingCriteriaSub(StoppingCriteria):

    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = [stop for stop in stops]

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
        for stop in self.stops:
            if torch.all((stop == input_ids[0][-len(stop):])).item():
                return True

        return False

stop_words = ["</끝>"]
stop_words_ids = [tokenizer(stop_word, return_tensors='pt')['input_ids'].squeeze() for stop_word in stop_words]
stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])

In [6]:
def gen_sentence(lan="en", x=""):
    if (lan == "ko"):
        prompt = f"### 한국어: {x}</끝>\n### 영어:"
    else:
        prompt = f"### 영어: {x}</끝>\n### 한국어:"
    gened = model.generate(
        **tokenizer(
            prompt,
            return_tensors='pt',
            return_token_type_ids=False
        ),
        max_new_tokens=2048,
        temperature=0.001,
        no_repeat_ngram_size=10,
        early_stopping=True,
        eos_token_id=2,
        stopping_criteria=stopping_criteria
    )
    return tokenizer.decode(gened[0]).replace(prompt+" ", "")

In [18]:
def gen_word(lan="en", x=""):
    if (lan == "ko"):
        prompt = f"### 한국어: {x}</끝>\n### 영어:"
    else:
        prompt = f"### 영어: {x}</끝>\n### 한국어:"
    gened = model.generate(
        **tokenizer(
            prompt,
            return_tensors='pt',
            return_token_type_ids=False
        ),
        # max_new_tokens=2048,
        temperature=0.001,
        no_repeat_ngram_size=10,
        early_stopping=True,
        eos_token_id=2,
        stopping_criteria=stopping_criteria
    )
    return tokenizer.decode(gened[0]).replace(prompt+" ", "")

In [34]:
gen_word(lan="en", x="me .")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'### 영어: me.</끝>\n### 한국어: 저는'

In [33]:
gen_sentence(lan="en", x="You’re misunderstanding right now.")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'당신은 지금 오해하고 있어요.</끝>'

In [32]:
gen_word(lan="en", x="name .")


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'### 영어: name.</끝>\n### 한국어: 이름을'

In [31]:
gen_word(lan="en", x="is")


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'그것은 ~입니다'

In [30]:
gen_word(lan="en", x="want.")


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'나는'

In [39]:
gen_word(lan="en", x="now")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'이제부터는'

In [40]:
gen_word(lan="en", x="ignorance")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'무지'

In [41]:
gen_word(lan="en", x="confidence")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'자신감'

In [43]:
gen_sentence(lan="en", x="You’re misunderstanding right now. Hear me out.")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'지금 오해하고 있는 것 같은데,  내 말을 잘 들어봐.</끝>'

In [44]:
gen_sentence(lan="en", x="All you need in this life is ignorance and confidence, then success is sure.")


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'인생에서 필요한 것은 무지와 자신감뿐이에요,  그럼 성공은 확실해요.</끝>'