In [None]:
import torch
import transformers
from transformers import AutoTokenizer

In [None]:
model_id = "../model/kanana-1.5-8b-instruct-2505"
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="right", trust_remote_code=True)

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"dtype": torch.bfloat16},
    device="cuda:0",
    trust_remote_code=True
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


In [8]:
pre_query_template = "[|system|]You are a helpful assistant.[|endofturn|]\n[|Korean user|]"

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("[|endofturn|]"),
]

instruction = pipeline(
    pre_query_template,
    max_new_tokens=2048,
    eos_token_id=terminators,
    do_sample=True,
    temperature=1,
    top_p=1,
)

sanitized_instruction = instruction[0]['generated_text'][len(pre_query_template):].split("\n")[0]
print(sanitized_instruction)

Could you give me some tips on how to prevent gum disease?


In [1]:
from transformers import TextIteratorStreamer, BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
import threading
import torch

In [2]:
local_dir = "../model/kanana-1.5-8b-instruct-2505"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    local_dir,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(
    local_dir,
    padding_side="right",
    trust_remote_code=True
)

model.eval()

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128259, 4096, padding_idx=128001)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )

In [5]:
tokenizer.apply_chat_template([{'role':'user','content':'안녕?'}], tokenize=False)

'<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n\n<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n안녕?<|eot_id|>'

In [7]:
pre_query_template = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n\n<|eot_id|><|start_header_id|>Korean user<|end_header_id|>\n\n"

terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>"),
]

streamer = TextIteratorStreamer(
    tokenizer,
    skip_prompt=True,
    skip_special_tokens=True,
)

input_ids = tokenizer(
    pre_query_template,
    return_tensors="pt"
).input_ids.to(model.device)

generation_kwargs = dict(
    input_ids=input_ids,
    max_new_tokens=2048,
    eos_token_id=terminators,
    do_sample=True,
    temperature=1,
    top_p=1,
    streamer=streamer,
)

thread = threading.Thread(
    target=model.generate,
    kwargs=generation_kwargs,
)
thread.start()

generated_text = ""

for new_text in streamer:
    print(new_text, end="", flush=True)
    generated_text += new_text

# Problem:
Implement a function that takes in a list of integers and returns a dictionary where the keys are the input integers and the values are their corresponding squares.

# Example:
For the input `[1, 2, 3]`, the output should be `{1: 1, 2: 4, 3: 9}`.

# Constraints:
# - Input can contain duplicates.
# - The function should handle both positive and negative integers.

# Solution in Python:
```python
def square_dictionary(numbers):
    return {num: num ** 2 for num in numbers}
```

# Explanation:
This solution uses a dictionary comprehension to efficiently create the desired dictionary. For each number in the input list, it adds an entry to the dictionary with the number as the key and its square as the value. Duplicates are handled naturally, as each number is only added once with its square value.

The solution is efficient in both time and space complexity, as it iterates through the list once and uses a constant amount of additional space per element.

In [6]:
sanitized_instruction = generated_text.split("\n")[0]
print("\n\n[FINAL]")
print(sanitized_instruction)



[FINAL]
안녕하세요! 인공지능에 대해 관심이 많습니다. 특히 인공지능의 발전 과정을 이해하고 싶은데, 그 발전 과정을 간략히 요약해주실 수 있을까요?
