In [1]:
!nvidia-smi

Mon Feb 24 14:33:09 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.02              Driver Version: 560.94         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4060 Ti     On  |   00000000:01:00.0  On |                  N/A |
|  0%   37C    P8              5W /  165W |    5621MiB /  16380MiB |     13%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
import os
import torch
import transformers
import peft
import datasets
import evaluate
import time
assert torch.cuda.is_available(), "you need cuda for this part"
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [3]:
device

device(type='cuda', index=0)

In [6]:
base_model_name = "Qwen/Qwen2.5-Coder-3B-Instruct"
model_name = "hodza/BlackBox-Coder-3B"


In [12]:
tokenizer = transformers.AutoTokenizer.from_pretrained(base_model_name)
bnb_config = transformers.BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_use_double_quant=True,
        bnb_4bit_compute_dtype=torch.bfloat16
    )
base_model = transformers.AutoModelForCausalLM.from_pretrained(base_model_name, device_map=device,quantization_config=bnb_config,)
model = peft.PeftModel.from_pretrained(base_model, model_name)
base_model = transformers.AutoModelForCausalLM.from_pretrained(base_model_name, device_map=device,quantization_config=bnb_config,)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
from IPython.display import HTML, display
table_template = """<table style="border:1px solid black" >
  <tr>
    <th style="text-align: center; border:1px solid black">PROMPT</th>
    <th style="text-align: center; border:1px solid black">BEFORE</th>
    <th style="text-align: center; border:1px solid black">AFTER</th>
  </tr>
{}
</table>"""

row_template = '''  <tr>
    <td style="width:20%; border:1px solid black"><pre align="left">`{}`</pre></td>
    <td style="width:40%; border:1px solid black"><pre align="left">{}</pre></td>
    <td style="width:40%; border:1px solid black"><pre align="left">{}</pre></td>
  </tr>'''

def prompt_to_chat(prompt):
    messages = [
        {"role": "user", "content": prompt}
    ]
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    return text

def infer(model, prompt, l=100, use_chat = True, temperature=0.4, top_p = 0.8):
    if use_chat:
        prompt = prompt_to_chat(prompt)
    model_inputs = tokenizer([prompt], return_tensors="pt").to(model.device)

    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=l,
        temperature=temperature, 
        top_p=top_p,
        do_sample=True ,  
    )
    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=False)[0]
    return response


prompts =  [
    'Как в BlackBox можно вывести данные на консоль?', 
    'Какой тип данных в Component Pascal используется для хранения целых чисел?', 
    'МОДУЛЬ i21егэDemo2010C4ru;', 
    'Component Pascal is Oberon microsystems refinement of?', 
    'Log.String(', 
    'Типом целой константы является?'
]  # feel free to add a few more that are not 100% assiciated with Python

In [14]:
def draw_diff(use_chat,l):
    print(f"USING CHAT TEMPLATE = {use_chat}")
        
    baseline = [infer(base_model, p, l = l, use_chat=use_chat) for p in prompts]
    check = [infer(model, p, l = l, use_chat=use_chat) for p in prompts]
    rows = []
    for i, prompt in enumerate(prompts):
        # replace placeholders in the format() arguments
        rows.append(row_template.format(prompt, baseline[i], check[i]))
    display(HTML(table_template.format('\n'.join(rows))))

In [16]:
draw_diff(True, 64)

USING CHAT TEMPLATE = True


PROMPT,BEFORE,AFTER
`Как в BlackBox можно вывести данные на консоль?`,В BlackBox (также известном как Ansible) вы можете выводить данные на консоль с помощью различных методов. Вот несколько распространенных способов: 1. **Использование `debug` модуля:**  ```yaml  - name: Print message to console,"В BlackBox Component Builder есть несколько способов вывода текста на консоль. Один из них - это процедура StdLog. В частности, можно использовать следующий пример: PROCEDURE Log* (IN s: ARRAY OF CHAR); 	BEGIN  StdLog.String(s); Std"
`Какой тип данных в Component Pascal используется для хранения целых чисел?`,В Component Pascal используется тип данных `Integer` для хранения целых чисел.<|im_end|>,В Component Pascal целые числа представлены типом INTEGER.<|im_end|>
`МОДУЛЬ i21егэDemo2010C4ru;`,"It looks like you've provided a module name in a format that might be specific to a certain programming language or framework. However, without more context, I'm not able to provide a detailed explanation or help with the module. If you're looking for information on a specific module or library, please provide more details about what",Модуль создан для демонстрации возможностей Блэкбокса в учебных целях.<|im_end|>
`Component Pascal is Oberon microsystems refinement of?`,"Component Pascal is a programming language developed by Oberon Microsystems as an extension of the Oberon programming language. It was designed to provide a more structured and object-oriented approach to software development, with features such as components, interfaces, and encapsulation. Component Pascal is not a direct refinement of any other programming language, but rather",Component Pascal is an extension of the language Oberon.<|im_end|>
`Log.String(`,"It looks like you're trying to log a string using the `Log` method. However, without more context, it's not clear what specific library or framework you're using. Could you please provide more information about the context in which you're using this method? This will help me give you a more accurate and helpful",Log is the standard log writer used by BlackBox.<|im_end|>
`Типом целой константы является?`,"Типом целой константы в большинстве языков программирования является: 1. **Целочисленный тип** (например, `int` в C, Java, Python, и т.д.) 2. **Беззнаковый целочисленный тип** (например",Типом целой константы является INTEGER.<|im_end|>
