# Generate Code with LBPP Dataset
This experiment is done using:
* 10 small language models (SLMs)
* 3 types of prompt (no prompt, some instructions, detailed instructions)
* 3 SLM temperature settings
* 3 SLM top_p settings

In [1]:
# mount Google Drive
from google.colab import drive

drive.mount('/content/drive')             # Mount Google Drive to Colab session
!ls /content/drive/MyDrive                # Verify that your drive is accessible

Mounted at /content/drive
'16 б.gdoc'
'6 фото.     5 лет блондин.JPG'
 Apple
'Archive (1).zip'
 Archive.zip
'Colab Notebooks'
 futuristic-office.jpeg
'Labwork_Andrew_Nedilko_20250219 (1).pdf'
 Labwork_Andrew_Nedilko_20250219.pdf
'ML System Design Sketch.gdoc'
'Most Asked ML Questions.pdf'
 My_Drawing.gdraw
 rag_design.gdraw
 rental_houses.gsheet
 share
'Statistics in Plain English_Chapter Summaries.zip'
'Statistics in Plain English_WHAT I NEED Variance Significance Effect Size etc.pdf'
'Tanya Passport.JPG'
'лечение травами 3.gdoc'


In [2]:
import sys
sys.path.append('/content/drive/My Drive/Colab Notebooks/')

from prompts import ( complete_code_prompt_basic, complete_code_prompt, complete_code_prompt_full,
                      complete_task_prompt_basic, complete_task_prompt, complete_task_prompt_full,
                      reflection_prompt_basic, reflection_prompt, reflection_prompt_full)
from helpers import get_tokenizer, get_model, generate_response, clean_code, clean_code_light, is_function, write_jsonl, read_problems

In [3]:
import logging
import sys
import time, datetime
import os
import torch
from google.colab import userdata

# get HuggingFace token from secrets
HF_TOKEN = userdata.get('HF_TOKEN')       # Retrieve token from Colab Secrets
os.environ['HF_TOKEN'] = HF_TOKEN         # Set token as env variable (recommended optional)
from huggingface_hub import HfFolder
HfFolder.save_token(HF_TOKEN)             # Store the token locally to avoid re-authentication

In [4]:
# select model, propmts, temperature, top_p
models = [
    ('mlabonne/phixtral-2x2_8', 'phixtral-2x2_8'),
    ('meta-llama/Meta-Llama-3.1-8B-Instruct', 'Meta-Llama-3.1-8B-Instruct'),
    ('google/codegemma-7b-it', 'codegemma-7b-it'),
    ('deepseek-ai/deepseek-coder-6.7b-instruct', 'deepseek-coder-6.7b-instruct'),
    ('m-a-p/OpenCodeInterpreter-DS-6.7B', 'OpenCodeInterpreter-DS-6.7B'),
    ('Artigenz/Artigenz-Coder-DS-6.7B', 'Artigenz-Coder-DS-6.7B'),
    ('Qwen/CodeQwen1.5-7B-Chat', 'CodeQwen1.5-7B-Chat'),
    ('NTQAI/Nxcode-CQ-7B-orpo', 'Nxcode-CQ-7B-orpo'),
    #('mlabonne/phixtral-4x2_8', 'phixtral-4x2_8'),    # discarded
    #('NousResearch/Nous-Hermes-2-SOLAR-10.7B', 'Nous-Hermes-2-Solar-10.7B'),        # discarded
]
# actual prompt & prompt name for log file name
prompts_and_names = [
    (complete_code_prompt_basic, 'complete_code_prompt_basic'),
    (complete_code_prompt,       'complete_code_prompt'),
    (complete_code_prompt_full,  'complete_code_prompt_full'),
    (complete_task_prompt_basic, 'complete_task_prompt_basic'),
    (complete_task_prompt,       'complete_task_prompt'),
    (complete_task_prompt_full,  'complete_task_prompt_full'), ]
# reflection prompts & their names for log file name
reflection_prompts_and_names = [
    ( reflection_prompt_basic, 'reflection_prompt_basic' ),
    ( reflection_prompt, 'reflection_prompt' ),
    ( reflection_prompt_full, 'reflection_prompt_full' ), ]
# temperature and top_k values and their labels for file names
temperature_values = [
    (1.0,  'temperature1.0'),
    (0.75, 'temperature0.75'),
    (0.5,  'temperature0.5'),
    (0.005, 'temperature0.005'),]
top_p_values = [
    (1.0,  'topP1.0'),
    (0.75, 'topP0.75'),
    (0.5,  'topP0.5'),]

model_idx            = 6
prompt_idx           = 3
relection_prompt_idx = 0
temperature_idx      = 0
top_p_idx            = 0

model_name, model_nickname     = models[ model_idx ]
my_prompt, my_prompt_label     = prompts_and_names[ prompt_idx ]
my_reflection_prompt, my_reflection_prompt_label = reflection_prompts_and_names[ relection_prompt_idx ]
TEMPERATURE, temperature_label = temperature_values[ temperature_idx ]
TOP_P, top_p_label             = top_p_values[ top_p_idx ]

SPECIAL_MESSAGE = f'Model temperature: {TEMPERATURE}. Model top_p: {TOP_P}. Model top_k: 50. Model dtype: torch.float32'

print(f'Model name:     {model_name}')
print(f'Model nickname: {model_nickname}')
print(f'Prompt name:    {my_prompt_label}')
print(f'Relfection prompt name: {my_reflection_prompt_label}')
print(f'Prompt:\n{my_prompt.strip()}\n')
print(f'Reflection Prompt:\n{my_reflection_prompt.strip()}\n')
print(f'Temperature: {TEMPERATURE}. Temperature label: {temperature_label}')
print(f'Top p: {TOP_P}. Top p label: {top_p_label}')
print(f'Special message: {SPECIAL_MESSAGE}')

Model name:     Qwen/CodeQwen1.5-7B-Chat
Model nickname: CodeQwen1.5-7B-Chat
Prompt name:    complete_task_prompt_basic
Relfection prompt name: reflection_prompt_basic
Prompt:
Complete the following task:
{}.

Your output code must satisfy these tests:
{}

Reflection Prompt:
1. Read and understand the REQUIREMENTS and the PROPOSED SOLUTION listed below.
2. Analyze the PROPOSED SOLUTION for any errors, inefficiencies or inconsistencies.
3. Generate ONE optimized version of the PROPOSED SOLUTION.
4. If the PROPOSED SOLUTION is already optimal, return the PROPOSED SOLUTION.

REQUIREMENTS:
{}

PROPOSED SOLUTION:
{}

Temperature: 1.0. Temperature label: temperature1.0
Top p: 1.0. Top p label: topP1.0
Special message: Model temperature: 1.0. Model top_p: 1.0. Model top_k: 50. Model dtype: torch.float32


In [5]:
# save results
time_stamp     = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-2]
results_file   = f'/content/drive/My Drive/Colab Notebooks/logs/{model_nickname}_{my_prompt_label}_{my_reflection_prompt_label}_{temperature_label}_{top_p_label}_completions_{time_stamp}.jsonl'
log_file       = f'/content/drive/My Drive/Colab Notebooks/logs/logs/{model_nickname}_{my_prompt_label}_{my_reflection_prompt_label}_{temperature_label}_{top_p_label}_log_{time_stamp}.log'

# log results
for handler in logging.root.handlers[:]:            # overwrite any previous handlers with different formats
    logging.root.removeHandler(handler)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s  %(levelname)s  %(message)s',
    handlers=[
        logging.FileHandler(log_file),              # Log to a file
        logging.StreamHandler(sys.stdout)           # Log to console (default - sys.stderr (red background)
    ]
)

# print job config
logging.info('*****     JOB CONFIGURATION     *****')
logging.info(f'MODEL: {model_name}')
logging.info(f'MODEL NICKNAME: {model_nickname}')
logging.info(SPECIAL_MESSAGE)
logging.info(f'GENERATED CODE SAVED IN: {results_file}')
logging.info(f'THIS LOG FILE: {log_file}')
if my_prompt in [complete_code_prompt_basic, complete_code_prompt, complete_code_prompt_full,]:
  logging.info(f'PROMPT:\n{my_prompt.lstrip().format("Starter Code")}')
else:
  logging.info(f'\nPROMPT:\n{my_prompt.format("Task Description", "Test Cases")}')
logging.info('*****     END OF JOB CONFIGURATION     *****')

2025-06-11 07:24:21,233  INFO  *****     JOB CONFIGURATION     *****
2025-06-11 07:24:21,234  INFO  MODEL: Qwen/CodeQwen1.5-7B-Chat
2025-06-11 07:24:21,236  INFO  MODEL NICKNAME: CodeQwen1.5-7B-Chat
2025-06-11 07:24:21,237  INFO  Model temperature: 1.0. Model top_p: 1.0. Model top_k: 50. Model dtype: torch.float32
2025-06-11 07:24:21,237  INFO  GENERATED CODE SAVED IN: /content/drive/My Drive/Colab Notebooks/logs/CodeQwen1.5-7B-Chat_complete_task_prompt_basic_reflection_prompt_basic_temperature1.0_topP1.0_completions_20250611_072420_9629.jsonl
2025-06-11 07:24:21,238  INFO  THIS LOG FILE: /content/drive/My Drive/Colab Notebooks/logs/logs/CodeQwen1.5-7B-Chat_complete_task_prompt_basic_reflection_prompt_basic_temperature1.0_topP1.0_log_20250611_072420_9629.log
2025-06-11 07:24:21,239  INFO  
PROMPT:

Complete the following task:
Task Description.

Your output code must satisfy these tests:
Test Cases

2025-06-11 07:24:21,240  INFO  *****     END OF JOB CONFIGURATION     *****


In [6]:
# read dataset
file = '/content/drive/My Drive/Colab Notebooks/data/MBPP_Test.jsonl.gz'
tasks = read_problems(file)
print(f'Type of tasks: {type(tasks)}\n')

counter = 0
for k,v in tasks.items():
    if counter == 3:
        break
    print(f'task_id: {k}', type(k))
    for k2, v2 in v.items():
        print(f'\n{k2}:\n{v2}')
    print('\n' + '='*100 + '\n')
    counter += 1

Type of tasks: <class 'dict'>

task_id: 11 <class 'str'>

task_id:
11

test:

assert remove_Occ("hello","l") == "heo"
assert remove_Occ("abcda","a") == "bcd"
assert remove_Occ("PHP","P") == "H"

prompt:
Write a python function to remove first and last occurrence of a given character from the string.

canonical_solution:
def remove_Occ(s,ch): 
    for i in range(len(s)): 
        if (s[i] == ch): 
            s = s[0 : i] + s[i + 1:] 
            break
    for i in range(len(s) - 1,-1,-1):  
        if (s[i] == ch): 
            s = s[0 : i] + s[i + 1:] 
            break
    return s 

entry_point:
None


task_id: 12 <class 'str'>

task_id:
12

test:

assert sort_matrix([[1, 2, 3], [2, 4, 5], [1, 1, 1]])==[[1, 1, 1], [1, 2, 3], [2, 4, 5]]
assert sort_matrix([[1, 2, 3], [-2, 4, -5], [1, -1, 1]])==[[-2, 4, -5], [1, -1, 1], [1, 2, 3]]
assert sort_matrix([[5,8,9],[6,4,3],[2,1,4]])==[[2, 1, 4], [6, 4, 3], [5, 8, 9]]

prompt:
Write a function to sort a given matrix in ascending orde

In [7]:
torch_dtype = torch.float32
#torch_dtype = torch.bfloat16
tokenizer = get_tokenizer(model_name)
model     = get_model(model_name, torch_dtype=torch_dtype)

tokenizer_config.json:   0%|          | 0.00/972 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.46M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/1.42M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/31.7k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.89G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.71G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

In [8]:
print(f'Model device: {model.device}')
print(f'Model temperature: {model.config.temperature}. Model top_p: {model.config.top_p}. Model top_k: {model.config.top_k}. Model dtype: {model.dtype}')

input_prompt = "What is the capital of California?"
print(input_prompt)
generate_response(input_prompt, tokenizer, model, temperature=TEMPERATURE)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Model device: cuda:0
Model temperature: 1.0. Model top_p: 1.0. Model top_k: 50. Model dtype: torch.float32
What is the capital of California?


'The capital of California is Sacramento.'

In [None]:
num_samples_per_task = 1
completions  = []

for task_id, task_body in tasks.items():
    logging.info( task_id )
    #if task_id == 'lbpp/5':
    #        break
    for i in range(num_samples_per_task):
        start_time  = time.time()

        # generate proposed solution
        full_prompt = my_prompt.lstrip().format( task_body['prompt'], task_body['test'].strip() )
        try:
            proposed_solution = generate_response( full_prompt, tokenizer, model, temperature=TEMPERATURE )
        except Exception as e:
            proposed_solution = f"Error generating a completion:\n{e}"

        # improve proposed solution
        new_full_prompt = my_reflection_prompt.lstrip().format(full_prompt, proposed_solution)
        try:
            improved_solution = generate_response( new_full_prompt, tokenizer, model, temperature=TEMPERATURE )
        except Exception as e:
            improved_solution = f"Error generating a completion:\n{e}"

        # check if improved solution is a Python function. If not, use the previously proposed solution
        cleaned_improved_solution = clean_code_light(improved_solution).strip()
        if is_function(cleaned_improved_solution):
            final_solution = improved_solution
        else:
            final_solution = proposed_solution

        # save and log the results
        completions.append( {'task_id': task_id, 'completion': final_solution} )
        write_jsonl(results_file, completions)
        logging.info('NEW REFLECTION PROMPT:\n' + new_full_prompt + '\n')
        logging.info('IMPROVED COMPLETION:\n' + final_solution + '\n')
        logging.info(f"Time elapsed: {(time.time() - start_time):.4f} seconds\n")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
        if isinstance(sublist, list):
            if not any(isinstance(item, list) and check_subset_list(sublist, item) for item in main_list):
                return False
        else:
            if sublist not in main_list:
                return False
    return True
```

This function works by iterating over each sublist in the subset. If the sublist is a list itself, it checks if there is any item in the main list that is also a list and contains the same elements as the sublist. If there is no such item, it returns False. If the sublist is not a list, it checks if it is in the main list. If it is not, it returns False. If all sublists pass these checks, it returns True.

This function assumes that the elements in the subset and main list are hashable (i.e., they can be used as keys in a dictionary). If this is not the case, you may need to modify the function to handle these cases.

2025-06-11 08:06:51,198  INFO 

In [None]:
from google.colab import runtime
runtime.unassign()