In [1]:
from datasets import Dataset
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, DataCollatorForSeq2Seq, TrainingArguments, Trainer, GenerationConfig
import torch

In [2]:
torch.cuda.is_available()
torch.cuda.current_device()

0

# Load dataset

In [3]:
df = pd.read_csv("../datasets/kaggle_metadata.csv")
df.head()

Unnamed: 0,title,subtitle,description,keyword 1,keyword 2,keyword 3,keyword 4,keyword 5
0,Basic Arabic Vocal Emotions Dataset,ŸÖÿ¨ŸÖŸàÿπÿ© ÿ®ŸäÿßŸÜÿßÿ™ ÿßŸÑÿπŸàÿßÿ∑ŸÅ ÿßŸÑÿµŸàÿ™Ÿäÿ© ÿßŸÑÿπÿ±ÿ®Ÿäÿ©,Don't forget to upvote the dataset. Thank you....,music,education,audio,online communities,
1,Military Aircraft Detection Dataset,military aircraft images with aircraft type an...,## Overview\nThis dataset is designed for obje...,arts and entertainment,business,military,aviation,computer vision
2,Fashion Dataset UK-US,A Comprehensive Dataset for Informed Decision-...,The Fashion Sales Dataset is a comprehensive a...,clothing and accessories,data visualization,data analytics,,
3,CoNIC Challenge Dataset,Patch-level LIZARD dataset for CoNIC Challenge,The dataset consists of Haematoxylin and Eosin...,healthcare,earth and nature,biology,health,medicine
4,Lizard dataset,The largest known nuclear instance segmentatio...,The development of deep segmentation models fo...,biology,,,,


In [4]:
ds = Dataset.from_pandas(df)
ds

Dataset({
    features: ['title', 'subtitle', 'description', 'keyword 1', 'keyword 2', 'keyword 3', 'keyword 4', 'keyword 5'],
    num_rows: 6817
})

# Setup dataset

In [5]:
MODEL_NAME = "IlyaGusev/saiga_llama3_8b"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token, tokenizer.eos_token

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


('<|begin_of_text|>', '<|eot_id|>')

In [6]:
generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
print(generation_config)

GenerationConfig {
  "bos_token_id": 128000,
  "do_sample": true,
  "eos_token_id": 128009,
  "max_new_tokens": 1536,
  "pad_token_id": 128000,
  "repetition_penalty": 1.12,
  "temperature": 0.2,
  "top_k": 30,
  "top_p": 0.9
}



In [7]:
def gen_batches_train():
    for sample in iter(ds):
        # Extract instruction and input from the sample
        system_prompt = "–¢—ã –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å, –∫–æ—Ç–æ—Ä—ã–π —Ä–µ—à–∏–ª –∑–∞–≥—Ä—É–∑–∏—Ç—å –¥–∞—Ç–∞—Å–µ—Ç –Ω–∞ –ø–ª–∞—Ç—Ñ–æ—Ä–º—É —Å –¥–∞—Ç–∞—Å–µ—Ç–∞–º–∏. –¢–≤–æ—è –∑–∞–¥–∞—á–∞ –ø—Ä–∏–¥—É–º–∞—Ç—å —Ç–µ–≥–∏ –¥–ª—è –¥–∞–Ω–Ω–æ–≥–æ –¥–∞—Ç–∞—Å–µ—Ç–∞, —á—Ç–æ–±—ã –µ–≥–æ –±—ã–ª–æ –ª–µ–≥–∫–æ –Ω–∞–π—Ç–∏ –Ω–∞ –æ—Å–Ω–æ–≤–µ –∑–∞–≥–æ–ª–æ–≤–∫–∞, –ø–æ–¥–∑–∞–≥–æ–ª–æ–≤–∫–∞ –∏ –æ–ø–∏—Å–∞–Ω–∏—è –¥–∞—Ç–∞—Å–µ—Ç–∞. –¢—ã –≤—ã–≤–æ–¥–∏—à—å —Ç–æ–ª—å–∫–æ —Ç–µ–≥–∏ —á–µ—Ä–µ–∑ –∑–∞–ø—è—Ç—É—é."
        input_text = f"–ü—Ä–∏–¥—É–º–∞–π —Ç–µ–≥–∏ –¥–ª—è –¥–∞–Ω–Ω–æ–≥–æ –¥–∞—Ç–∞—Å–µ—Ç–∞:\n# –ó–∞–≥–æ–ª–æ–≤–æ–∫: {sample['title']}"
        if sample['subtitle'] != '':
            input_text += f"\n# –ü–æ–¥–∑–∞–≥–æ–ª–æ–≤–æ–∫: {sample['subtitle']}"
        input_text += f"\n# –û–ø–∏—Å–∞–Ω–∏–µ: {sample['description']}"
        out_text = f"{sample['keyword 1']}"
        if sample['keyword 2'] != '':
            out_text += f", {sample['keyword 2']}"
        if sample['keyword 3'] != '':
            out_text += f", {sample['keyword 3']}"
        if sample['keyword 4'] != '':
            out_text += f", {sample['keyword 4']}"
        if sample['keyword 5'] != '':
            out_text += f", {sample['keyword 5']}"
        formatted_prompt = None 
            
        formatted_prompt = tokenizer.apply_chat_template([{
                "role": "system",
                "content": system_prompt
            }, {
                "role": "user",
                "content": input_text
            }, {
                "role": "assistant",
                "content": out_text
            }], tokenize=False, add_generation_prompt=False) + '<|end_of_text|>'
        
        yield {'text': formatted_prompt}

next(gen_batches_train())

{'text': "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n–¢—ã –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å, –∫–æ—Ç–æ—Ä—ã–π —Ä–µ—à–∏–ª –∑–∞–≥—Ä—É–∑–∏—Ç—å –¥–∞—Ç–∞—Å–µ—Ç –Ω–∞ –ø–ª–∞—Ç—Ñ–æ—Ä–º—É —Å –¥–∞—Ç–∞—Å–µ—Ç–∞–º–∏. –¢–≤–æ—è –∑–∞–¥–∞—á–∞ –ø—Ä–∏–¥—É–º–∞—Ç—å —Ç–µ–≥–∏ –¥–ª—è –¥–∞–Ω–Ω–æ–≥–æ –¥–∞—Ç–∞—Å–µ—Ç–∞, —á—Ç–æ–±—ã –µ–≥–æ –±—ã–ª–æ –ª–µ–≥–∫–æ –Ω–∞–π—Ç–∏ –Ω–∞ –æ—Å–Ω–æ–≤–µ –∑–∞–≥–æ–ª–æ–≤–∫–∞, –ø–æ–¥–∑–∞–≥–æ–ª–æ–≤–∫–∞ –∏ –æ–ø–∏—Å–∞–Ω–∏—è –¥–∞—Ç–∞—Å–µ—Ç–∞. –¢—ã –≤—ã–≤–æ–¥–∏—à—å —Ç–æ–ª—å–∫–æ —Ç–µ–≥–∏ —á–µ—Ä–µ–∑ –∑–∞–ø—è—Ç—É—é.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n–ü—Ä–∏–¥—É–º–∞–π —Ç–µ–≥–∏ –¥–ª—è –¥–∞–Ω–Ω–æ–≥–æ –¥–∞—Ç–∞—Å–µ—Ç–∞:\n# –ó–∞–≥–æ–ª–æ–≤–æ–∫: Basic Arabic Vocal Emotions Dataset\n# –ü–æ–¥–∑–∞–≥–æ–ª–æ–≤–æ–∫: ŸÖÿ¨ŸÖŸàÿπÿ© ÿ®ŸäÿßŸÜÿßÿ™ ÿßŸÑÿπŸàÿßÿ∑ŸÅ ÿßŸÑÿµŸàÿ™Ÿäÿ© ÿßŸÑÿπÿ±ÿ®Ÿäÿ©\n# –û–ø–∏—Å–∞–Ω–∏–µ: Don't forget to upvote the dataset. Thank you.üòä \n---\n# Basic Arabic Vocal Emotions Dataset \nBasic Arabic Vocal Emotions Dataset (BAVED) is a datasetthat contains an arabic

# Prepare model

In [8]:
model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME, 
        device_map={"": 0}, 
        torch_dtype=torch.bfloat16,
    )

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [9]:
from peft import LoraConfig, TaskType, get_peft_model

peft_config = LoraConfig(
        lora_alpha=32,
        lora_dropout=0.1,
        r=8,
        bias="none",
        task_type=TaskType.CAUSAL_LM, 
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    )

In [10]:
tokenizer.pad_token = tokenizer.eos_token

# Training

In [11]:
training_arguments = TrainingArguments(
    output_dir='./saiga_results',
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    optim="adamw_torch",
    save_steps=100,
    logging_steps=5,
    learning_rate=3e-4,
    fp16=False,
    bf16=True,
    num_train_epochs=100,
    report_to="none"
)

train_gen = Dataset.from_generator(gen_batches_train)
tokenizer.padding_side = "right"

In [12]:
from trl import SFTTrainer

trainer = SFTTrainer(
    model=model,
    train_dataset=train_gen,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_arguments,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


In [13]:
trainer.train()

  attn_output = torch.nn.functional.scaled_dot_product_attention(


RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [17]:
peft_model_id="./saiga_lora2"
trainer.model.save_pretrained(peft_model_id)
tokenizer.save_pretrained(peft_model_id)

('./saiga_lora2/tokenizer_config.json',
 './saiga_lora2/special_tokens_map.json',
 './saiga_lora2/tokenizer.json')

In [18]:
from peft import PeftModel

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto",torch_dtype=torch.bfloat16)

model = PeftModel.from_pretrained(model, model_id=peft_model_id, config=peft_config)

model = model.merge_and_unload()

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [00:05<00:00,  1.36s/it]
Some parameters are on the meta device device because they were offloaded to the cpu.


In [19]:
def test(question, correct_answer):
    system_prompt = "–¢—ã –ø—Ä–æ—Ñ–µ—Å—Å–∏–æ–Ω–∞–ª—å–Ω—ã–π —ç–∫–∑–∞–º–µ–Ω–∞—Ç–æ—Ä —Å –≥–ª—É–±–æ–∫–∏–º –∑–Ω–∞–Ω–∏–µ–º –ø—Ä–µ–¥–º–µ—Ç–∞. –¢–≤–æ—è –∑–∞–¥–∞—á–∞ - –ø–æ–º–æ—â—å –≤ —Å–æ—Å—Ç–∞–≤–ª–µ–Ω–∏–∏ –≤–æ–ø—Ä–æ—Å–æ–≤ –¥–ª—è —Å—Ç—É–¥–µ–Ω—Ç—á–µ—Å–∫–æ–≥–æ —ç–∫–∑–∞–º–µ–Ω–∞."
    input_text = f"# –í–æ–ø—Ä–æ—Å: {question}\n# –ü—Ä–∞–≤–∏–ª—å–Ω—ã–π –æ—Ç–≤–µ—Ç: {correct_answer}\n\n–°–æ–∑–¥–∞–π 3 –ø—Ä–∞–≤–¥–æ–ø–æ–¥–æ–±–Ω—ã—Ö, –Ω–æ –Ω–µ–ø—Ä–∞–≤–∏–ª—å–Ω—ã—Ö –æ—Ç–≤–µ—Ç–∞ (–¥–∏—Å—Ç—Ä–∞–∫—Ç–æ—Ä–æ–≤) –¥–ª—è –¥–∞–Ω–Ω–æ–≥–æ –≤–æ–ø—Ä–æ—Å–∞. C–≥–µ–Ω–µ—Ä–∏—Ä—É–π 3 –Ω–µ–ø—Ä–∞–≤–∏–ª—å–Ω—ã—Ö –æ—Ç–≤–µ—Ç–∞ (–¥–∏—Å—Ç—Ä–∞–∫—Ç–æ—Ä–∞) –≤ —Å–ª–µ–¥—É—é—â–µ–º —Ñ–æ—Ä–º–∞—Ç–µ:\n# –î–∏—Å—Ç—Ä–∞–∫—Ç–æ—Ä—ã:\n - <–Ω–µ–ø—Ä–∞–≤–∏–ª—å–Ω—ã–π –æ—Ç–≤–µ—Ç 1>\n - <–Ω–µ–ø—Ä–∞–≤–∏–ª—å–Ω—ã–π –æ—Ç–≤–µ—Ç 2>\n - <–Ω–µ–ø—Ä–∞–≤–∏–ª—å–Ω—ã–π –æ—Ç–≤–µ—Ç 3>.\n–ù–µ –¥–æ–±–∞–≤–ª—è–π –Ω–æ–º–µ—Ä–∞ –∏–ª–∏ –±—É–∫–≤—ã –∫ –æ—Ç–≤–µ—Ç–∞–º."
            
    formatted_prompt = tokenizer.apply_chat_template([{
            "role": "system",
            "content": system_prompt
        }, {
            "role": "user",
            "content": input_text
        }], tokenize=False, add_generation_prompt=True)
    
    print("INPUT:")
    print(formatted_prompt)

    model_inputs = tokenizer([formatted_prompt], return_tensors="pt").to('cuda')

    generated_ids = model.generate(
        input_ids=model_inputs.input_ids,
        max_new_tokens=512,
        do_sample=True,
        top_p=0.9, 
        temperature=0.5, 
        repetition_penalty=1.1,
        eos_token_id=tokenizer.encode('<|eot_id|>')[0],
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]

    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    print("\nOUTPUT:")
    print(response)

In [20]:
#from dataset
test("–ö–∞–∫–æ–π –≤–∞—Ä–∏–∞–Ω—Ç –∏–∑ –ø–µ—Ä–µ—á–∏—Å–ª–µ–Ω–Ω—ã—Ö —è–≤–ª—è–µ—Ç—Å—è –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ–º –º–µ—Ç–æ–¥–∞ –∞–Ω—Å–∞–º–±–ª—è –º–æ–¥–µ–ª–µ–π?", 
     "–ö–æ–º–±–∏–Ω–∞—Ü–∏—è –Ω–µ—Å–∫–æ–ª—å–∫–∏—Ö –∞–ª–≥–æ—Ä–∏—Ç–º–æ–≤ –æ–±—É—á–µ–Ω–∏—è, –∫–æ—Ç–æ—Ä—ã–µ, —Ä–∞–±–æ—Ç–∞—è –≤–º–µ—Å—Ç–µ, –ø–æ–∑–≤–æ–ª—è—é—Ç –ø–æ—Å—Ç—Ä–æ–∏—Ç—å –º–æ–¥–µ–ª—å –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é –∏ —Ç–æ—á–Ω—É—é, —á–µ–º –ª—é–±–∞—è –∏–∑ –º–æ–¥–µ–ª–µ–π, –ø–æ—Å—Ç—Ä–æ–µ–Ω–Ω–∞—è —Å –ø–æ–º–æ—â—å—é –æ—Ç–¥–µ–ª—å–Ω–æ–≥–æ –∞–ª–≥–æ—Ä–∏—Ç–º–∞.")

The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


INPUT:
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

–¢—ã –ø—Ä–æ—Ñ–µ—Å—Å–∏–æ–Ω–∞–ª—å–Ω—ã–π —ç–∫–∑–∞–º–µ–Ω–∞—Ç–æ—Ä —Å –≥–ª—É–±–æ–∫–∏–º –∑–Ω–∞–Ω–∏–µ–º –ø—Ä–µ–¥–º–µ—Ç–∞. –¢–≤–æ—è –∑–∞–¥–∞—á–∞ - –ø–æ–º–æ—â—å –≤ —Å–æ—Å—Ç–∞–≤–ª–µ–Ω–∏–∏ –≤–æ–ø—Ä–æ—Å–æ–≤ –¥–ª—è —Å—Ç—É–¥–µ–Ω—Ç—á–µ—Å–∫–æ–≥–æ —ç–∫–∑–∞–º–µ–Ω–∞.<|eot_id|><|start_header_id|>user<|end_header_id|>

# –í–æ–ø—Ä–æ—Å: –ö–∞–∫–æ–π –≤–∞—Ä–∏–∞–Ω—Ç –∏–∑ –ø–µ—Ä–µ—á–∏—Å–ª–µ–Ω–Ω—ã—Ö —è–≤–ª—è–µ—Ç—Å—è –æ–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ–º –º–µ—Ç–æ–¥–∞ –∞–Ω—Å–∞–º–±–ª—è –º–æ–¥–µ–ª–µ–π?
# –ü—Ä–∞–≤–∏–ª—å–Ω—ã–π –æ—Ç–≤–µ—Ç: –ö–æ–º–±–∏–Ω–∞—Ü–∏—è –Ω–µ—Å–∫–æ–ª—å–∫–∏—Ö –∞–ª–≥–æ—Ä–∏—Ç–º–æ–≤ –æ–±—É—á–µ–Ω–∏—è, –∫–æ—Ç–æ—Ä—ã–µ, —Ä–∞–±–æ—Ç–∞—è –≤–º–µ—Å—Ç–µ, –ø–æ–∑–≤–æ–ª—è—é—Ç –ø–æ—Å—Ç—Ä–æ–∏—Ç—å –º–æ–¥–µ–ª—å –±–æ–ª–µ–µ —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω—É—é –∏ —Ç–æ—á–Ω—É—é, —á–µ–º –ª—é–±–∞—è –∏–∑ –º–æ–¥–µ–ª–µ–π, –ø–æ—Å—Ç—Ä–æ–µ–Ω–Ω–∞—è —Å –ø–æ–º–æ—â—å—é –æ—Ç–¥–µ–ª—å–Ω–æ–≥–æ –∞–ª–≥–æ—Ä–∏—Ç–º–∞.

–°–æ–∑–¥–∞–π 3 –ø—Ä–∞–≤–¥–æ–ø–æ–¥–æ–±–Ω—ã—Ö, –Ω–æ –Ω–µ–ø—Ä–∞–≤–∏–ª—å–Ω—ã—Ö –æ—Ç–≤–µ—Ç–

In [21]:
#new question
test("–ö–∞–∫—É—é —Å—Ç—Ä—É–∫—Ç—É—Ä—É –¥–∞–Ω–Ω—ã—Ö —Å–ª–µ–¥—É–µ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –¥–ª—è —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ–≥–æ –∏–Ω–¥–µ–∫—Å–∏—Ä–æ–≤–∞–Ω–∏—è –∏ –ø–æ–∏—Å–∫–∞ –±–ª–∏–∂–∞–π—à–∏—Ö —Å–æ—Å–µ–¥–µ–π –≤ –º–Ω–æ–≥–æ–º–µ—Ä–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö, –≥–¥–µ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ —á–∞—Å—Ç–æ –≤—ã–ø–æ–ª–Ω—è—Ç—å –æ–±–Ω–æ–≤–ª–µ–Ω–∏—è –¥–∞–Ω–Ω—ã—Ö?",
     "R-–¥–µ—Ä–µ–≤–æ")

INPUT:
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

–¢—ã –ø—Ä–æ—Ñ–µ—Å—Å–∏–æ–Ω–∞–ª—å–Ω—ã–π —ç–∫–∑–∞–º–µ–Ω–∞—Ç–æ—Ä —Å –≥–ª—É–±–æ–∫–∏–º –∑–Ω–∞–Ω–∏–µ–º –ø—Ä–µ–¥–º–µ—Ç–∞. –¢–≤–æ—è –∑–∞–¥–∞—á–∞ - –ø–æ–º–æ—â—å –≤ —Å–æ—Å—Ç–∞–≤–ª–µ–Ω–∏–∏ –≤–æ–ø—Ä–æ—Å–æ–≤ –¥–ª—è —Å—Ç—É–¥–µ–Ω—Ç—á–µ—Å–∫–æ–≥–æ —ç–∫–∑–∞–º–µ–Ω–∞.<|eot_id|><|start_header_id|>user<|end_header_id|>

# –í–æ–ø—Ä–æ—Å: –ö–∞–∫—É—é —Å—Ç—Ä—É–∫—Ç—É—Ä—É –¥–∞–Ω–Ω—ã—Ö —Å–ª–µ–¥—É–µ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –¥–ª—è —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ–≥–æ –∏–Ω–¥–µ–∫—Å–∏—Ä–æ–≤–∞–Ω–∏—è –∏ –ø–æ–∏—Å–∫–∞ –±–ª–∏–∂–∞–π—à–∏—Ö —Å–æ—Å–µ–¥–µ–π –≤ –º–Ω–æ–≥–æ–º–µ—Ä–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö, –≥–¥–µ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ —á–∞—Å—Ç–æ –≤—ã–ø–æ–ª–Ω—è—Ç—å –æ–±–Ω–æ–≤–ª–µ–Ω–∏—è –¥–∞–Ω–Ω—ã—Ö?
# –ü—Ä–∞–≤–∏–ª—å–Ω—ã–π –æ—Ç–≤–µ—Ç: R-–¥–µ—Ä–µ–≤–æ

–°–æ–∑–¥–∞–π 3 –ø—Ä–∞–≤–¥–æ–ø–æ–¥–æ–±–Ω—ã—Ö, –Ω–æ –Ω–µ–ø—Ä–∞–≤–∏–ª—å–Ω—ã—Ö –æ—Ç–≤–µ—Ç–∞ (–¥–∏—Å—Ç—Ä–∞–∫—Ç–æ—Ä–æ–≤) –¥–ª—è –¥–∞–Ω–Ω–æ–≥–æ –≤–æ–ø—Ä–æ—Å–∞. C–≥–µ–Ω–µ—Ä–∏—Ä—É–π 3 –Ω–µ–ø—Ä–∞–≤–∏–ª—å–Ω—ã—Ö –æ—Ç–≤–µ—Ç–∞ (–¥–∏—Å—Ç—Ä–∞–∫—Ç–æ—Ä–∞) –

In [22]:
test("–ö–∞–∫—É—é —Å—Ç—Ä—É–∫—Ç—É—Ä—É –¥–∞–Ω–Ω—ã—Ö —Å–ª–µ–¥—É–µ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –¥–ª—è —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ–≥–æ –∏–Ω–¥–µ–∫—Å–∏—Ä–æ–≤–∞–Ω–∏—è –∏ –ø–æ–∏—Å–∫–∞ –±–ª–∏–∂–∞–π—à–∏—Ö —Å–æ—Å–µ–¥–µ–π –≤ –º–Ω–æ–≥–æ–º–µ—Ä–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö, –≥–¥–µ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ —á–∞—Å—Ç–æ –≤—ã–ø–æ–ª–Ω—è—Ç—å –æ–±–Ω–æ–≤–ª–µ–Ω–∏—è –¥–∞–Ω–Ω—ã—Ö?",
     "R-–¥–µ—Ä–µ–≤–æ")

INPUT:
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

–¢—ã –ø—Ä–æ—Ñ–µ—Å—Å–∏–æ–Ω–∞–ª—å–Ω—ã–π —ç–∫–∑–∞–º–µ–Ω–∞—Ç–æ—Ä —Å –≥–ª—É–±–æ–∫–∏–º –∑–Ω–∞–Ω–∏–µ–º –ø—Ä–µ–¥–º–µ—Ç–∞. –¢–≤–æ—è –∑–∞–¥–∞—á–∞ - –ø–æ–º–æ—â—å –≤ —Å–æ—Å—Ç–∞–≤–ª–µ–Ω–∏–∏ –≤–æ–ø—Ä–æ—Å–æ–≤ –¥–ª—è —Å—Ç—É–¥–µ–Ω—Ç—á–µ—Å–∫–æ–≥–æ —ç–∫–∑–∞–º–µ–Ω–∞.<|eot_id|><|start_header_id|>user<|end_header_id|>

# –í–æ–ø—Ä–æ—Å: –ö–∞–∫—É—é —Å—Ç—Ä—É–∫—Ç—É—Ä—É –¥–∞–Ω–Ω—ã—Ö —Å–ª–µ–¥—É–µ—Ç –∏—Å–ø–æ–ª—å–∑–æ–≤–∞—Ç—å –¥–ª—è —ç—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–æ–≥–æ –∏–Ω–¥–µ–∫—Å–∏—Ä–æ–≤–∞–Ω–∏—è –∏ –ø–æ–∏—Å–∫–∞ –±–ª–∏–∂–∞–π—à–∏—Ö —Å–æ—Å–µ–¥–µ–π –≤ –º–Ω–æ–≥–æ–º–µ—Ä–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö, –≥–¥–µ –Ω–µ–æ–±—Ö–æ–¥–∏–º–æ —á–∞—Å—Ç–æ –≤—ã–ø–æ–ª–Ω—è—Ç—å –æ–±–Ω–æ–≤–ª–µ–Ω–∏—è –¥–∞–Ω–Ω—ã—Ö?
# –ü—Ä–∞–≤–∏–ª—å–Ω—ã–π –æ—Ç–≤–µ—Ç: R-–¥–µ—Ä–µ–≤–æ

–°–æ–∑–¥–∞–π 3 –ø—Ä–∞–≤–¥–æ–ø–æ–¥–æ–±–Ω—ã—Ö, –Ω–æ –Ω–µ–ø—Ä–∞–≤–∏–ª—å–Ω—ã—Ö –æ—Ç–≤–µ—Ç–∞ (–¥–∏—Å—Ç—Ä–∞–∫—Ç–æ—Ä–æ–≤) –¥–ª—è –¥–∞–Ω–Ω–æ–≥–æ –≤–æ–ø—Ä–æ—Å–∞. C–≥–µ–Ω–µ—Ä–∏—Ä—É–π 3 –Ω–µ–ø—Ä–∞–≤–∏–ª—å–Ω—ã—Ö –æ—Ç–≤–µ—Ç–∞ (–¥–∏—Å—Ç—Ä–∞–∫—Ç–æ—Ä–∞) –