In [None]:
!pip install -U datasets

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m26.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec, datasets
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.2
    Uninstalling fsspec-2025.3.2:
      Successfully uninstalled fsspec-2025.3.2
  Attempting uninstall: datasets
    Found existing installation: datasets 2.14.4
    Uninstalling datasets-2.14.4:
      Successfully uninstalled datasets-2.14.4
[31mERROR: pip's dependency r

In [None]:
# mount Google Drive
from google.colab import drive

drive.mount('/content/drive')             # Mount Google Drive to Colab session
!ls /content/drive/MyDrive                # Verify that your drive is accessible

Mounted at /content/drive
'16 б.gdoc'
'6 фото.     5 лет блондин.JPG'
 Apple
'Archive (1).zip'
 Archive.zip
'Colab Notebooks'
 futuristic-office.jpeg
'Labwork_Andrew_Nedilko_20250219 (1).pdf'
 Labwork_Andrew_Nedilko_20250219.pdf
'ML System Design Sketch.gdoc'
'Most Asked ML Questions.pdf'
 My_Drawing.gdraw
 rag_design.gdraw
 rental_houses.gsheet
 share
'Statistics in Plain English_Chapter Summaries.zip'
'Statistics in Plain English_WHAT I NEED Variance Significance Effect Size etc.pdf'
'Tanya Passport.JPG'
'лечение травами 3.gdoc'


In [None]:
import sys
sys.path.append('/content/drive/My Drive/Colab Notebooks/')

from prompts import ( complete_code_prompt_basic, complete_code_prompt, complete_code_prompt_full,
                      complete_task_prompt_basic, complete_task_prompt, complete_task_prompt_full,
                      reflection_prompt_basic, reflection_prompt, reflection_prompt_full)
from helpers import get_tokenizer, get_model, generate_response, clean_code, clean_code_light, is_function, write_jsonl, read_problems

In [None]:
import logging
import sys
import time, datetime
import os
import torch
from datasets import load_dataset
from google.colab import userdata

# get HuggingFace token from secrets
HF_TOKEN = userdata.get('HF_TOKEN')       # Retrieve token from Colab Secrets
os.environ['HF_TOKEN'] = HF_TOKEN         # Set token as env variable (recommended optional)
from huggingface_hub import HfFolder
HfFolder.save_token(HF_TOKEN)             # Store the token locally to avoid re-authentication

In [None]:
# select model, propmt, temperature, top_p
models = [
    ('mlabonne/phixtral-2x2_8', 'phixtral-2x2_8'),
    ('meta-llama/Meta-Llama-3.1-8B-Instruct', 'Meta-Llama-3.1-8B-Instruct'),
    ('google/codegemma-7b-it', 'codegemma-7b-it'),
    ('deepseek-ai/deepseek-coder-6.7b-instruct', 'deepseek-coder-6.7b-instruct'),
    ('m-a-p/OpenCodeInterpreter-DS-6.7B', 'OpenCodeInterpreter-DS-6.7B'),
    ('Artigenz/Artigenz-Coder-DS-6.7B', 'Artigenz-Coder-DS-6.7B'),
    ('Qwen/CodeQwen1.5-7B-Chat', 'CodeQwen1.5-7B-Chat'),
    ('NTQAI/Nxcode-CQ-7B-orpo', 'Nxcode-CQ-7B-orpo'),
    #('mlabonne/phixtral-4x2_8', 'phixtral-4x2_8'),    # discarded
    #('NousResearch/Nous-Hermes-2-SOLAR-10.7B', 'Nous-Hermes-2-Solar-10.7B'),        # discarded
]
# actual prompt & prompt name for log file name
prompts_and_names = [
    (complete_code_prompt_basic, 'complete_code_prompt_basic'),
    (complete_code_prompt,       'complete_code_prompt'),
    (complete_code_prompt_full,  'complete_code_prompt_full'),
    (complete_task_prompt_basic, 'complete_task_prompt_basic'),
    (complete_task_prompt,       'complete_task_prompt'),
    (complete_task_prompt_full,  'complete_task_prompt_full'), ]
# reflection prompts & their names for log file name
reflection_prompts_and_names = [
    ( reflection_prompt_basic, 'reflection_prompt_basic' ),
    ( reflection_prompt, 'reflection_prompt' ),
    ( reflection_prompt_full, 'reflection_prompt_full' ), ]
# temperature and top_k values and labels for file names
temperature_values = [
    (1.0,  'temperature1.0'),
    (0.75, 'temperature0.75'),
    (0.5,  'temperature0.5'), ]
top_p_values = [
    (1.0,  'topP1.0'),
    (0.75, 'topP0.75'),
    (0.5,  'topP0.5'), ]

model_idx            = 6
prompt_idx           = 0
relection_prompt_idx = 0
temperature_idx      = 0
top_p_idx            = 0

model_name, model_nickname     = models[ model_idx ]
my_prompt, my_prompt_label     = prompts_and_names[ prompt_idx ]
my_reflection_prompt, my_reflection_prompt_label = reflection_prompts_and_names[ relection_prompt_idx ]
TEMPERATURE, temperature_label = temperature_values[ temperature_idx ]
TOP_P, top_p_label             = top_p_values[ top_p_idx ]

SPECIAL_MESSAGE = f'Model temperature: {TEMPERATURE}. Model top_p: {TOP_P}. Model top_k: 50. Model dtype: torch.float32'

print(f'Model name:             {model_name}')
print(f'Model nickname:         {model_nickname}')
print(f'Prompt name:            {my_prompt_label}')
print(f'Relfection prompt name: {my_reflection_prompt_label}')
print(f'\nPrompt:\n{my_prompt.strip().format("Starter Code")}\n')
print(f'Temperature: {TEMPERATURE}. Temperature label: {temperature_label}')
print(f'Top p: {TOP_P}. Top p label: {top_p_label}')
print(f'Special message: {SPECIAL_MESSAGE}')

Model name:             Qwen/CodeQwen1.5-7B-Chat
Model nickname:         CodeQwen1.5-7B-Chat
Prompt name:            complete_code_prompt_basic
Relfection prompt name: reflection_prompt_basic

Prompt:
Complete the following Python code:
Starter Code

Temperature: 1.0. Temperature label: temperature1.0
Top p: 1.0. Top p label: topP1.0
Special message: Model temperature: 1.0. Model top_p: 1.0. Model top_k: 50. Model dtype: torch.float32


In [None]:
# save results
time_stamp   = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-2]
results_file = f'/content/drive/My Drive/Colab Notebooks/logs/{model_nickname}_{my_prompt_label}_{temperature_label}_{top_p_label}_completions_{time_stamp}.jsonl'
log_file     = f'/content/drive/My Drive/Colab Notebooks/logs/logs/{model_nickname}_{my_prompt_label}_{temperature_label}_{top_p_label}_log_{time_stamp}.log'

# log results
for handler in logging.root.handlers[:]:            # overwrite any previous handlers with different formats
    logging.root.removeHandler(handler)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s  %(levelname)s  %(message)s',
    handlers=[
        logging.FileHandler(log_file),              # Log to a file
        logging.StreamHandler(sys.stdout)           # Log to console (default - sys.stderr (red background)
    ]
)

# print job config
logging.info('*****     JOB CONFIGURATION     *****')
logging.info(f'MODEL: {model_name}')
logging.info(f'MODEL NICKNAME: {model_nickname}')
logging.info(SPECIAL_MESSAGE)
logging.info(f'GENERATED CODE SAVED IN: {results_file}')
logging.info(f'THIS LOG FILE: {log_file}')
if my_prompt in [complete_code_prompt_basic, complete_code_prompt, complete_code_prompt_full,]:
  logging.info(f'PROMPT:\n{my_prompt.lstrip().format("Starter Code")}')
else:
  logging.info(f'\nPROMPT:\n{my_prompt.format("Task Description", "Test Cases")}')
logging.info('*****     END OF JOB CONFIGURATION     *****')

2025-06-12 11:48:14,645  INFO  *****     JOB CONFIGURATION     *****
2025-06-12 11:48:14,647  INFO  MODEL: Qwen/CodeQwen1.5-7B-Chat
2025-06-12 11:48:14,647  INFO  MODEL NICKNAME: CodeQwen1.5-7B-Chat
2025-06-12 11:48:14,648  INFO  Model temperature: 1.0. Model top_p: 1.0. Model top_k: 50. Model dtype: torch.float32
2025-06-12 11:48:14,649  INFO  GENERATED CODE SAVED IN: /content/drive/My Drive/Colab Notebooks/logs/CodeQwen1.5-7B-Chat_complete_code_prompt_basic_temperature1.0_topP1.0_completions_20250612_114814_3215.jsonl
2025-06-12 11:48:14,650  INFO  THIS LOG FILE: /content/drive/My Drive/Colab Notebooks/logs/logs/CodeQwen1.5-7B-Chat_complete_code_prompt_basic_temperature1.0_topP1.0_log_20250612_114814_3215.log
2025-06-12 11:48:14,651  INFO  PROMPT:
Complete the following Python code:
Starter Code

2025-06-12 11:48:14,651  INFO  *****     END OF JOB CONFIGURATION     *****


In [None]:
dataset = load_dataset("openai/openai_humaneval")
counter = 0
for item in dataset["test"]:
    if counter == 1: break
    counter += 1
    for k,v in item.items():
        print(f'{k}:\n{v}\n')
    print('\n', '='*75, '\n', sep='')

README.md:   0%|          | 0.00/6.52k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/83.9k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/164 [00:00<?, ? examples/s]

task_id:
HumanEval/0

prompt:
from typing import List


def has_close_elements(numbers: List[float], threshold: float) -> bool:
    """ Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
    False
    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
    True
    """


canonical_solution:
    for idx, elem in enumerate(numbers):
        for idx2, elem2 in enumerate(numbers):
            if idx != idx2:
                distance = abs(elem - elem2)
                if distance < threshold:
                    return True

    return False


test:


METADATA = {
    'author': 'jt',
    'dataset': 'test'
}


def check(candidate):
    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.3) == True
    assert candidate([1.0, 2.0, 3.9, 4.0, 5.0, 2.2], 0.05) == False
    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.95) == True
    assert candidate([1.0, 2.0, 5.9, 4.0, 5.0], 0.8) == False

In [None]:
torch_dtype = torch.float32
#torch_dtype = torch.bfloat16
tokenizer = get_tokenizer(model_name)
model     = get_model(model_name, torch_dtype=torch_dtype)

tokenizer_config.json:   0%|          | 0.00/972 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.46M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/1.42M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/31.7k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.89G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.71G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

In [None]:
print(f'Model device: {model.device}')
print(f'Model temperature: {model.config.temperature}. Model top_p: {model.config.top_p}. Model top_k: {model.config.top_k}. Model dtype: {model.dtype}')

input_prompt = "What is the capital of California?"
print(input_prompt)
generate_response(input_prompt, tokenizer, model)

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Model device: cuda:0
Model temperature: 1.0. Model top_p: 1.0. Model top_k: 50. Model dtype: torch.float32
What is the capital of California?


'The capital of California is Sacramento.'

In [None]:
num_samples_per_task = 1
completions  = []

for item in dataset["test"]:
    logging.info(item['task_id'])
    for i in range(num_samples_per_task):
        start_time  = time.time()

        # generate completion
        full_prompt       = my_prompt.lstrip().format( item['prompt'] )
        try:
            proposed_solution = generate_response( full_prompt, tokenizer, model, temperature=TEMPERATURE )
        except Exception as e:
            proposed_solution = f"Error generating a completion:\n{e}"

        # improve solution
        new_full_prompt = my_reflection_prompt.lstrip().format(full_prompt, proposed_solution)
        try:
            improved_solution = generate_response( new_full_prompt, tokenizer, model, temperature=TEMPERATURE )
        except Exception as e:
            improved_solution = f"Error generating a completion:\n{e}"

        # check if improved solution is a Python function. If not, use the previously proposed solution
        cleaned_improved_solution = clean_code_light(improved_solution).strip()
        if is_function(cleaned_improved_solution):
            final_solution = improved_solution
        else:
            final_solution = proposed_solution

        # save and log the results
        completions.append( {'task_id': item['task_id'], 'completion': final_solution} )
        write_jsonl(results_file, completions)
        logging.info('NEW REFLECTION PROMPT:\n' + new_full_prompt + '\n')
        logging.info('IMPROVED COMPLETION:\n' + final_solution + '\n')
        logging.info(f"Time elapsed: {(time.time() - start_time):.4f} seconds\n")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
In this solution, we first initialize a min heap with the first element of the list. Then, we iterate through the remaining elements of the list and push each element onto the min heap. After that, we pop the smallest element from the min heap. Finally, we return the smallest element of the min heap, which is the second smallest element of the original list. This solution has a time complexity of O(n log n) due to the sorting operation and the use of a min heap.

2025-06-12 12:09:20,783  INFO  Time elapsed: 14.4353 seconds

2025-06-12 12:09:20,784  INFO  HumanEval/91
2025-06-12 12:09:37,859  INFO  NEW REFLECTION PROMPT:
1. Read and understand the REQUIREMENTS and the PROPOSED SOLUTION listed below.
2. Analyze the PROPOSED SOLUTION for any errors, inefficiencies or inconsistencies.
3. Generate ONE optimized version of the PROPOSED SOLUTION.
4. If the PROPOSED SOLUTION is already optimal, return the PROPOSED SOLUTION.

REQU

In [None]:
from google.colab import runtime
runtime.unassign()