In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

from dataclasses import dataclass, field
from typing import Optional
import contextlib

import torch
from datasets import load_dataset
from peft import LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
)
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel


In [2]:

model = "bigcode/starcoder2-7b"
tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model,
    quantization_config=None,
    device_map=None,
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,
)

# model = model.merge_and_unload()
if not hasattr(model, "hf_device_map"):
    model.cuda()


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [3]:

def get_code_completion(prefix, suffix):
    text = prompt = f"""<fim_prefix>{prefix}<fim_suffix>{suffix}<fim_middle>"""
    model.eval()
    outputs = model.generate(
        input_ids=tokenizer(text, return_tensors="pt").input_ids.cuda(),
        max_new_tokens=128,
        temperature=0.2,
        top_k=50,
        top_p=0.95,
        do_sample=True,
        repetition_penalty=1.0,
    )
    return tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]


In [4]:
prefix = """from accelerate import Accelerator

accelerator = Accelerator()

model, optimizer, training_dataloader, scheduler = """

suffix = """"""
print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:49152 for open-end generation.


<fim_prefix>from accelerate import Accelerator

accelerator = Accelerator()

model, optimizer, training_dataloader, scheduler = <fim_suffix><fim_middle>accelerator.prepare(model, optimizer, training_dataloader, scheduler)

# + [markdown] id="545b2b7c"
# ## Training

# + id="36859297"
from tqdm.auto import tqdm

for epoch in range(num_epochs):
    model.train()
    for batch in tqdm(training_dataloader):
        optimizer.zero_grad()
        outputs = model(batch["input_ids"], batch["attention_mask"], batch["labels"])
        loss = outputs.loss
        accelerator.backward(loss)
       


In [5]:
prefix = """\
from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM

peft_config = LoraConfig("""

suffix = ")"
print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:49152 for open-end generation.


<fim_prefix>from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM

peft_config = LoraConfig(<fim_suffix>)<fim_middle>r=8, lora_alpha=32, lora_dropout=0.05, target_modules=["q_proj", "v_proj"])
model = AutoModelForCausalLM.from_pretrained(model_name)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

# + id="5522040d"
from peft import PeftModel

model = PeftModel.from_pretrained(model, "microsoft/DialoGPT-small")



In [6]:
prefix = """
# Here is the correct implementation of the two sum code exercise
# time complexity: O(N)
# space complexity: O(N)
def two_sum(arr, target_sum):
"""

suffix = """"""
print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:49152 for open-end generation.


<fim_prefix>
# Here is the correct implementation of the two sum code exercise
# time complexity: O(N)
# space complexity: O(N)
def two_sum(arr, target_sum):
<fim_suffix><fim_middle>    # create a set to store the numbers we've seen
    # so far
    numbers_seen = set()

    # loop through the array
    for num in arr:
        # calculate the target
        target = target_sum - num

        # check if the target is in the set
        if target in numbers_seen:
            return True

        # add the current number to the set
        numbers_seen.add(num)

    # return False if we didn't find a match
    return False
<file_sep><fim_prefix><fim_suffix>   # if the current node is the target, return True
    if current_node.value == target:
       


In [7]:
prefix = """import math
import re
import warnings
from dataclasses import asdict, dataclass, field, replace
from enum import Enum
from typing import List, Optional, Tuple, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from transformers.pytorch_utils import Conv1D

from ..config import PeftConfig
from ..import_utils import is_bnb_4bit_available, is_bnb_available
from ..utils import (
    CLAMP_QUANTILE,
    COMMON_LAYERS_PATTERN,
    TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
    ModulesToSaveWrapper,
    PeftType,
    _freeze_adapter,
    _get_submodules,
    transpose,
)
from .tuners_utils import BaseTuner, BaseTunerLayer

@dataclass
class BottleneckAdapterConfig(PeftConfig):
    \"""
    """

suffix = """
    \""" \
"""

print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:49152 for open-end generation.


<fim_prefix>import math
import re
from dataclasses import asdict, dataclass, field, replace
from enum import Enum
from typing import List, Optional, Tuple, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from transformers.pytorch_utils import Conv1D

from..config import PeftConfig
from..import_utils import is_bnb_4bit_available, is_bnb_available
from..utils import (
    CLAMP_QUANTILE,
    COMMON_LAYERS_PATTERN,
    TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
    ModulesToSaveWrapper,
    PeftType,
    _freeze_adapter,
    _get_submodules,
    transpose,
)
from.tuners_utils import BaseTuner, BaseTunerLayer

@dataclass
class BottleneckAdapterConfig(PeftConfig):
    """
    <fim_suffix>
    """ <fim_middle>Configuration for the Bottleneck Adapter.
    """
    dim: int = field(
        default=4,
        metadata={
            "help": "The dimension of the bottleneck adapter."
        },
    )
    hidden_dim: int = field(
        def