# Generate Code Using the Big Code Bench Dataset
This experiment is done using:
* 10 small language models (SLMs)
* 3 types of prompt (no prompt, some instructions, detailed instructions)
* 3 SLM temperature settings
* 3 SLM top_p settings

In [1]:
# mount Google Drive
from google.colab import drive

drive.mount('/content/drive')             # Mount Google Drive to Colab session
!ls /content/drive/MyDrive                # Verify that your drive is accessible

Mounted at /content/drive
'16 б.gdoc'			 'Labwork_Andrew_Nedilko_20250219 (1).pdf'
'6 фото.     5 лет блондин.JPG'   Labwork_Andrew_Nedilko_20250219.pdf
 Apple				  rental_houses.gsheet
 Archive.zip			  share
'Colab Notebooks'		 'Tanya Passport.JPG'
 futuristic-office.jpeg		 'лечение травами 3.gdoc'


In [2]:
import sys
sys.path.append('/content/drive/My Drive/Colab Notebooks/')

from prompts import ( complete_code_prompt_basic, complete_code_prompt, complete_code_prompt_full,
                      complete_task_prompt_basic, complete_task_prompt, complete_task_prompt_full, )
from helpers import get_tokenizer, get_model, generate_response, clean_code, write_jsonl, read_problems

In [3]:
import logging
import sys
import time, datetime
import os
import torch
from google.colab import userdata

# get HuggingFace token from secrets
HF_TOKEN = userdata.get('HF_TOKEN')       # Retrieve token from Colab Secrets
os.environ['HF_TOKEN'] = HF_TOKEN         # Set token as env variable (recommended optional)
from huggingface_hub import HfFolder
HfFolder.save_token(HF_TOKEN)             # Store the token locally to avoid re-authentication

In [4]:
# select model, propmt, temperature, top_p
models = [
    ('mlabonne/phixtral-2x2_8', 'phixtral-2x2_8'),
    ('mlabonne/phixtral-4x2_8', 'phixtral-4x2_8'),
    ('NousResearch/Nous-Hermes-2-SOLAR-10.7B', 'Nous-Hermes-2-Solar-10.7B'),
    ('meta-llama/Meta-Llama-3.1-8B-Instruct', 'Meta-Llama-3.1-8B-Instruct'),
    ('google/codegemma-7b-it', 'codegemma-7b-it'),
    ('deepseek-ai/deepseek-coder-6.7b-instruct', 'deepseek-coder-6.7b-instruct'),
    ('m-a-p/OpenCodeInterpreter-DS-6.7B', 'OpenCodeInterpreter-DS-6.7B'),
    ('Artigenz/Artigenz-Coder-DS-6.7B', 'Artigenz-Coder-DS-6.7B'),
    ('Qwen/CodeQwen1.5-7B-Chat', 'CodeQwen1.5-7B-Chat'),
    ('NTQAI/Nxcode-CQ-7B-orpo', 'Nxcode-CQ-7B-orpo'), ]
# actual prompt & prompt name for log file name
prompts_and_names = [
    (complete_code_prompt_basic, 'complete_code_prompt_basic'),
    (complete_code_prompt,       'complete_code_prompt'),
    (complete_code_prompt_full,  'complete_code_prompt_full'),
    (complete_task_prompt_basic, 'complete_task_prompt_basic'),
    (complete_task_prompt,       'complete_task_prompt'),
    (complete_task_prompt_full,  'complete_task_prompt_full'), ]
# temperature and top_k values and labels for file names
temperature_values = [
    (1.0,  'temperature1.0'),
    (0.75, 'temperature0.75'),
    (0.5,  'temperature0.5'), ]
top_p_values = [
    (1.0,  'topP1.0'),
    (0.75, 'topP0.75'),
    (0.5,  'topP0.5'), ]

model_idx       = 7
prompt_idx      = 1
temperature_idx = 0
top_p_idx       = 0

model_name, model_nickname     = models[ model_idx ]
my_prompt, my_prompt_label     = prompts_and_names[ prompt_idx ]
TEMPERATURE, temperature_label = temperature_values[ temperature_idx ]
TOP_P, top_p_label             = top_p_values[ top_p_idx ]

SPECIAL_MESSAGE = f'Model temperature: {TEMPERATURE}. Model top_p: {TOP_P}. Model top_k: 50. Model dtype: torch.float32'

print(f'Model name:     {model_name}')
print(f'Model nickname: {model_nickname}')
print(f'Prompt name:    {my_prompt_label}')
print(f'Prompt:\n{my_prompt.strip()}\n')
print(f'Temperature: {TEMPERATURE}. Temperature label: {temperature_label}')
print(f'Top p: {TOP_P}. Top p label: {top_p_label}')
print(f'Special message: {SPECIAL_MESSAGE}')

Model name:     Artigenz/Artigenz-Coder-DS-6.7B
Model nickname: Artigenz-Coder-DS-6.7B
Prompt name:    complete_code_prompt
Prompt:
1. Act as an experienced Python software developer and complete the code provided below.
2. Enclose your output code in the following code fences:
```python
[code]
```
3. Complete the following Python code:
{}

Temperature: 1.0. Temperature label: temperature1.0
Top p: 1.0. Top p label: topP1.0
Special message: Model temperature: 1.0. Model top_p: 1.0. Model top_k: 50. Model dtype: torch.float32


In [5]:
# save results
time_stamp     = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-2]
results_file   = f'/content/drive/My Drive/Colab Notebooks/logs/{model_nickname}_{my_prompt_label}_{temperature_label}_{top_p_label}_completions_{time_stamp}.jsonl'
log_file       = f'/content/drive/My Drive/Colab Notebooks/logs/logs/{model_nickname}_{my_prompt_label}_{temperature_label}_{top_p_label}_log_{time_stamp}.log'

# log results
for handler in logging.root.handlers[:]:            # overwrite any previous handlers with different formats
    logging.root.removeHandler(handler)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s  %(levelname)s  %(message)s',
    handlers=[
        logging.FileHandler(log_file),              # Log to a file
        logging.StreamHandler(sys.stdout)           # Log to console (default - sys.stderr (red background)
    ]
)

# print job config
logging.info('*****     JOB CONFIGURATION     *****')
logging.info(f'MODEL: {model_name}')
logging.info(f'MODEL NICKNAME: {model_nickname}')
logging.info(SPECIAL_MESSAGE)
logging.info(f'GENERATED CODE SAVED IN: {results_file}')
logging.info(f'THIS LOG FILE: {log_file}')
if my_prompt in [complete_code_prompt_basic, complete_code_prompt, complete_code_prompt_full,]:
  logging.info(f'PROMPT:\n{my_prompt.lstrip().format("Starter Code")}')
else:
  logging.info(f'\nPROMPT:\n{my_prompt.format("Task Description", "Test Cases")}')
logging.info('*****     END OF JOB CONFIGURATION     *****')

2025-04-03 02:43:30,263  INFO  *****     JOB CONFIGURATION     *****
2025-04-03 02:43:30,266  INFO  MODEL: Artigenz/Artigenz-Coder-DS-6.7B
2025-04-03 02:43:30,266  INFO  MODEL NICKNAME: Artigenz-Coder-DS-6.7B
2025-04-03 02:43:30,267  INFO  Model temperature: 1.0. Model top_p: 1.0. Model top_k: 50. Model dtype: torch.float32
2025-04-03 02:43:30,268  INFO  GENERATED CODE SAVED IN: /content/drive/My Drive/Colab Notebooks/logs/Artigenz-Coder-DS-6.7B_complete_code_prompt_temperature1.0_topP1.0_completions_20250403_024330_0115.jsonl
2025-04-03 02:43:30,269  INFO  THIS LOG FILE: /content/drive/My Drive/Colab Notebooks/logs/logs/Artigenz-Coder-DS-6.7B_complete_code_prompt_temperature1.0_topP1.0_log_20250403_024330_0115.log
2025-04-03 02:43:30,270  INFO  PROMPT:
1. Act as an experienced Python software developer and complete the code provided below.
2. Enclose your output code in the following code fences:
```python
[code]
```
3. Complete the following Python code:
Starter Code

2025-04-03 02:4

In [6]:
# read dataset
file = '/content/drive/My Drive/Colab Notebooks/data/Big_Code_Bench_Test.jsonl.gz'
tasks = read_problems(file)
print(f'Type of tasks: {type(tasks)}\n')

counter = 0
for k,v in tasks.items():
    if counter == 1:
        break
    print(f'task_id: {k}', type(k))
    for k2, v2 in v.items():
        print(f'\n{k2}:\n{v2}')
    print('\n' + '='*100 + '\n')
    counter += 1

Type of tasks: <class 'dict'>

task_id: BigCodeBench/0 <class 'str'>

task_id:
BigCodeBench/0

test:


import unittest
from unittest.mock import patch
from random import seed, shuffle
import itertools
class TestCases(unittest.TestCase):
    def test_default_numbers(self):
        # Test with default number range (1 to 10) to check that the result is a positive float.
        result = task_func()
        self.assertIsInstance(result, float)
        self.assertGreater(result, 0)
    def test_custom_list(self):
        # Test with a custom list of small positive integers to ensure proper handling and positive result.
        result = task_func([1, 2, 3])
        self.assertIsInstance(result, float)
        self.assertGreater(result, 0)
    def test_negative_numbers(self):
        # Test with negative numbers to verify the function handles and returns a positive result.
        result = task_func([-3, -2, -1])
        self.assertIsInstance(result, float)
        self.assertGreater(result, 

In [7]:
torch_dtype = torch.float32
#torch_dtype = torch.bfloat16
tokenizer = get_tokenizer(model_name)
model     = get_model(model_name, torch_dtype=torch_dtype)

tokenizer_config.json:   0%|          | 0.00/4.87k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.37M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/458 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/879 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/3.59G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/119 [00:00<?, ?B/s]

In [8]:
print(f'Model device: {model.device}')
print(f'Model temperature: {model.config.temperature}. Model top_p: {model.config.top_p}. Model top_k: {model.config.top_k}. Model dtype: {model.dtype}')

input_prompt = "What is the capital of California?"
print(input_prompt)
generate_response(input_prompt, tokenizer, model)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Model device: cuda:0
Model temperature: 1.0. Model top_p: 1.0. Model top_k: 50. Model dtype: torch.float32
What is the capital of California?


'The capital of California is Sacramento.'

In [None]:
num_samples_per_task = 1
completions  = []
is_full_func = []

for task_id, task_body in tasks.items():
    logging.info( task_id )
    #if task_id == 'BigCodeBench/7':
    #        break
    for i in range(num_samples_per_task):
        start_time  = time.time()
        full_prompt = my_prompt.lstrip().format( task_body['prompt'] )
        try:
            completion = generate_response( full_prompt, tokenizer, model )
        except Exception as e:
            completion = f"Error when model predicted a completion:\n{e}"

        #clean_completion = clean_code(completion)
        completions.append( {'task_id': task_id, 'completion': completion} )
        write_jsonl(results_file, completions)
        logging.info('PROMPT:\n' + full_prompt + '\n')
        logging.info('COMPLETION:\n' + completion + '\n')
        logging.info(f"Time elapsed: {(time.time() - start_time):.4f} seconds\n")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
from collections import Counter

def task_func(file_path):
    """
    This function reads the specified CSV file, counts the frequency of each word, and returns the most common word 
    along with its frequency.

    Parameters:
    - file_path (str): The path to the CSV file.

    Requirements:
    - os
    - csv
    - collections

    Returns:
    - tuple: The most common word and its frequency, or None if the file doesn't exist or is empty.

    Example:
    >>> # Assuming 'example.txt' contains multiple repetitions of the word 'example'
    >>> task_func('example.txt')  # doctest: +SKIP
    ('example', <some_positive_integer>)

    Note:
    - The function specifically reads from the given file path.
    - This example uses +SKIP because it relies on external file content.
    """
    if not os.path.exists(file_path):
        return None

    with open(file_path, 'r') as file:
        reader = csv.reader(file)
     

In [None]:
from google.colab import runtime
runtime.unassign()