In [1]:
!pip install peft
!pip install trl

Collecting peft
  Downloading peft-0.11.1-py3-none-any.whl.metadata (13 kB)
Downloading peft-0.11.1-py3-none-any.whl (251 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: peft
Successfully installed peft-0.11.1
Collecting trl
  Downloading trl-0.8.6-py3-none-any.whl.metadata (11 kB)
Collecting tyro>=0.5.11 (from trl)
  Downloading tyro-0.8.4-py3-none-any.whl.metadata (7.9 kB)
Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl)
  Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)
Downloading trl-0.8.6-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.2/245.2 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tyro-0.8.4-py3-none-any.whl (102 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m102.4/102.4 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading shtab-1.7.1-py3-none-any.whl

In [2]:
def reward_model(code, unit_tests):
    '''
    This function should take in a code snippet and a list of unit tests.
    It should return a score that represents how well the code snippet.
    The score is yet to be defined.
    I assume the score will depend on 2 things:
    1. Are there any syntax errors in the code snippet ?
    2. Does the code snippet pass all the unit tests ?
    The score should take into account both of these factors.
    '''
    # If the code is empty, return a specific score
    if code == '':
        print("Code snippet is empty, parsing failed")
        return -2

    # Check for syntax errors
    try:
        compile(code, '<string>', 'exec')
    except SyntaxError as e:
        print(f"Syntax Error: {e}")
        return -1.0

    # Combine the code and unit tests into a single script
    script = code + '\n' + unit_tests
    namespace = {}

    print('### SCRIPT ###')
    print(script)
    print('### SCRIPT ###')

    # Check for NameError and other runtime errors
    try:
        exec(script, namespace)
    except NameError as e:
        print(f"Name Error: {e}")
        return -0.6
    except RuntimeError as e:
        print(f"Runtime Error: {e}")
        return -0.6
    except AssertionError as e:
        print(f"Assertion Error: {e}")
        return -0.3
    except Exception as e:
        print(f"Other Error: {e}")
        return -0.4

    # If no exceptions, return a positive score
    print("Code snippet passed all unit tests")
    return 1.0

In [4]:
import numpy as np
import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, DataCollatorForLanguageModeling
from peft import get_peft_model, LoraConfig, TaskType
from trl import PPOConfig, PPOTrainer, AutoModelForCausalLMWithValueHead
from datasets import load_dataset
from tqdm import tqdm

In [5]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# We setup the LoRA configuration

peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=8,
    lora_alpha=32,
    lora_dropout=0.1
)

# We import model and tokenizer

checkpoint = "microsoft/phi-2"
model = AutoModelForCausalLMWithValueHead.from_pretrained(
    checkpoint, device_map={"": device}, peft_config=peft_config
)
tokenizer = AutoTokenizer.from_pretrained(
    checkpoint, padding_side="left", pad_token='<pad>'
)

if getattr(tokenizer, "pad_token", None) is None:
    tokenizer.pad_token = tokenizer.eos_token

print_trainable_parameters(model)


The secret `HF_TOKEN` does not exist in your Colab secrets.

To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.

You will be able to reuse this secret in all of your notebooks.

Please note that authentication is recommended but still optional to access public models or datasets.



config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


trainable params: 9177601 || all params: 2788861441 || trainable%: 0.3290805654621979


In [7]:
data_path = "/input/subset/exercises_sub_dataset.json"

ppo_config = PPOConfig(
    model_name="microsoft/phi-2",
    learning_rate=1e-5,
    remove_unused_columns=False,
    batch_size=8,
    mini_batch_size=2
    # More hyperparameters can be added here
)

def preprocess_function(examples):
    input_ids = []
    attention_masks = []
    unit_tests = []
    prefix = '''Instruct: For the following function please only generate the solution and only the solution.
    After you finish generating the function asked you should stop the generation. The python code starts with a ```python
 and once you finish generating the function you should close the code with a ```.\nOutput:\n```python\n
    '''

    for docstring, unit_test in zip(examples['docstring'], examples['unit_tests']):
        prompt = prefix + '\n' + docstring
        tokenized = tokenizer(prompt, truncation=True)
        input_ids.append(torch.tensor(tokenized['input_ids']))
        unit_tests.append(unit_test)

    return {
        "input_ids": input_ids,
        "unit_tests": unit_tests
    }

def data_collator(data):
    return {key: [d[key] for d in data] for key in data[0]}

dataset = load_dataset('json', data_files=data_path, split='train')

ds = dataset.map(
    preprocess_function,
    batched=True
)

ds = ds.filter(lambda x: len(x["input_ids"]) < 512, batched=False)

ds.set_format(type="torch")

trainer = PPOTrainer(
    model=model,
    tokenizer=tokenizer,
    config=ppo_config,
    dataset=ds,
    data_collator=data_collator
)

generation_kwargs = {
    # "min_length": -1,
    "top_k": 0.0,
    "top_p": 1.0,
    "do_sample": True,
    "pad_token_id": tokenizer.pad_token_id,
    "eos_token_id": 100_000,
    "max_new_tokens": 200,
    "early_stopping": True,
}

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/23 [00:00<?, ? examples/s]

Filter:   0%|          | 0/23 [00:00<?, ? examples/s]

In [8]:
ds

Dataset({
    features: ['title', 'docstring', 'unit_tests', 'entry_point', 'description', 'input_ids'],
    num_rows: 23
})

In [9]:
import re

def format_response(response):
    lines = response.split('\n')
    lines = lines[:-1]
    modified_solution = '\n'.join(lines)
    print('####### [RESPONSE] ########')
    print(response)
    print('####### [RESPONSE\] ########')
    return modified_solution

def print_prompt(query_tensor):
    prompt = tokenizer.decode(query_tensor.squeeze())
    print('####### PROMPT ########')
    print(prompt)
    print('####### PROMPT\ ########')


def extract_code(text, occurrence=1):
    # Using regular expression to find all occurrences of text enclosed in triple backticks
    codes = re.findall(r'```python(.*?)```', text, re.DOTALL)
    if len(codes) > occurrence:
        return codes[occurrence].strip()
    else:
        print('We could not parse the response')
        return ''

In [10]:
epochs = 3
imports = '''from typing import Any, List, Dict, Tuple, Optional, Union\nimport math\nimport random\nimport collections\nimport datetime\nimport itertools\n'''
for epoch in tqdm(range(epochs), "epoch: "):
    for batch in tqdm(trainer.dataloader, "batch: "):
        query_tensors = batch['input_ids']

        response_tensors = trainer.generate(query_tensors, **generation_kwargs)
        batch["response"] = tokenizer.batch_decode(response_tensors, skip_special_tokens=True)
        batch["response"] = [imports + extract_code(r) if extract_code(r) else extract_code(r) for r in batch["response"]]

        texts = [r for r in batch['response']]
        unit_tests = [tests for tests in batch['unit_tests']]
        rewards = [torch.tensor(reward_model(code, tests)) for code, tests in zip(texts, unit_tests)]
        print(rewards)

        stats = trainer.step(query_tensors, response_tensors, rewards)
        trainer.log_stats(stats, batch, rewards)

epoch:   0%|          | 0/3 [00:00<?, ?it/s]

batch:   0%|          | 0/2 [00:00<?, ?it/s][AYou're using a CodeGenTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.




We could not parse the response

We could not parse the response

We could not parse the response

We could not parse the response

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Code snippet is empty, parsing failed

Code snippet is empty, parsing failed

Syntax Error: 'return' outside function (<string>, line 25)

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

### SCRIPT ###

from typing import Any, List, Dict, Tuple, Optional, Union

import math

import random

import collections

import datetime

import itertools

def reverseList(inputList: List[Any]) -> List[Any] :

            """

            This function takes a list as an argument and returns a new list which is the reverse of the input list.



            Parameters:

                inputList (List[Any]): A list to be reversed.





batch:  50%|█████     | 1/2 [01:03<01:03, 63.05s/it][A

We could not parse the response

We could not parse the response

We could not parse the response

We could not parse the response

Code snippet is empty, parsing failed

### SCRIPT ###

from typing import Any, List, Dict, Tuple, Optional, Union

import math

import random

import collections

import datetime

import itertools

def bfs(graph: dict, start: str, end: str) -> List[str] :

            """Implement a breadth-first search algorithm to find the shortest path between two nodes in an undirected graph."""

            # TODO: Implement BFS algorithm here

            pass

# Test case 1: BFS should find the shortest path between two connected nodes

assert bfs({"A": ["B"], "B": ["A"]}, "A", "B") == ["A", "B"]

# Test case 2: BFS should find the shortest path between two nodes with multiple paths

assert bfs({"A": ["B", "C"], "B": ["A", "C"], "C": ["B"]}, "A", "C") == ["A", "B", "C"]

# Test case 3: BFS should return an empty list if the start and end nodes are not connected

ass



batch: 100%|██████████| 2/2 [02:07<00:00, 63.69s/it]

epoch:  33%|███▎      | 1/3 [02:07<04:14, 127.38s/it]

batch:   0%|          | 0/2 [00:00<?, ?it/s][A

We could not parse the response

We could not parse the response

We could not parse the response

We could not parse the response

We could not parse the response

We could not parse the response

We could not parse the response

We could not parse the response

Syntax Error: unexpected indent (<string>, line 19)

Code snippet is empty, parsing failed

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Code snippet is empty, parsing failed

Code snippet is empty, parsing failed

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Code snippet is empty, parsing failed

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

[tensor(-1.), tensor(-2), tensor(-1.), tensor(-2), tensor(-2), tensor(-1.), tensor(-2), tensor(-1.)]




batch:  50%|█████     | 1/2 [01:03<01:03, 63.44s/it][A

We could not parse the response

We could not parse the response

We could not parse the response

We could not parse the response

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Code snippet is empty, parsing failed

Code snippet is empty, parsing failed

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

[tensor(-1.), tensor(-1.), tensor(-1.), tensor(-1.), tensor(-2), tensor(-2), tensor(-1.), tensor(-1.)]




batch: 100%|██████████| 2/2 [02:06<00:00, 63.27s/it]

epoch:  67%|██████▋   | 2/3 [04:13<02:06, 126.89s/it]

batch:   0%|          | 0/2 [00:00<?, ?it/s][A

We could not parse the response

We could not parse the response

We could not parse the response

We could not parse the response

We could not parse the response

We could not parse the response

Code snippet is empty, parsing failed

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Syntax Error: unexpected indent (<string>, line 18)

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Syntax Error: unexpected indent (<string>, line 19)

Code snippet is empty, parsing failed

Code snippet is empty, parsing failed

[tensor(-2), tensor(-1.), tensor(-1.), tensor(-1.), tensor(-1.), tensor(-1.), tensor(-2), tensor(-2)]




batch:  50%|█████     | 1/2 [01:05<01:05, 65.72s/it][A

We could not parse the response

We could not parse the response

Syntax Error: unindent does not match any outer indentation level (<string>, line 17)

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Code snippet is empty, parsing failed

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

Syntax Error: expected an indented block after function definition on line 7 (<string>, line 8)

### SCRIPT ###

from typing import Any, List, Dict, Tuple, Optional, Union

import math

import random

import collections

import datetime

import itertools

def bfs(graph: dict, start: str, end: str) -> List[str] :

            """Implement a breadth-first search algorithm to find the shortest path between two nodes in an undirected graph."""

            # TODO: Implement BFS algorithm here

            pass

# Te



batch: 100%|██████████| 2/2 [02:06<00:00, 63.27s/it]

epoch: 100%|██████████| 3/3 [06:20<00:00, 126.83s/it]
