In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

from dataclasses import dataclass, field
from typing import Optional
import contextlib

import torch
from datasets import load_dataset
from peft import LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
)
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel


In [2]:

model = "bigcode/starcoder2-7b"
tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model,
    quantization_config=None,
    device_map=None,
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,
)

# model = model.merge_and_unload()
if not hasattr(model, "hf_device_map"):
    model.cuda()

# model_id = "smangrul/peft-lora-starcoder15B-v2-personal-copilot-A100-40GB-colab"
model_id = "data/peft-lora-starcoder2-7b-personal-copilot-dual-3090-local"
model = PeftModel.from_pretrained(model, model_id, adapter_name="copilot")

# model_id = "smangrul/peft-lora-starcoder15B-v2-personal-copilot-A100-40GB-colab"
# model = PeftModel.from_pretrained(model, model_id, adapter_name="personal_copilot")
# model.add_weighted_adapter(["personal_copilot"], [0.8], "best_personal_copilot")
# model.set_adapter("best_personal_copilot")



Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [3]:

def get_code_completion(prefix, suffix):
    text = prompt = f"""<fim_prefix>{prefix}<fim_suffix>{suffix}<fim_middle>"""
    model.eval()
    outputs = model.generate(
        input_ids=tokenizer(text, return_tensors="pt").input_ids.cuda(),
        max_new_tokens=128,
        temperature=0.2,
        top_k=50,
        top_p=0.95,
        do_sample=True,
        repetition_penalty=1.0,
    )
    return tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]


In [4]:
prefix = """from accelerate import Accelerator

accelerator = Accelerator()

model, optimizer, training_dataloader, scheduler = """

suffix = """"""
print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:49152 for open-end generation.


<fim_prefix>from accelerate import Accelerator

accelerator = Accelerator()

model, optimizer, training_dataloader, scheduler = <fim_suffix><fim_middle>accelerator.prepare(model, optimizer, training_dataloader, scheduler)

for epoch in range(num_epochs):
    for batch in training_dataloader:
        outputs = model(**batch)
        loss = outputs.loss
        accelerator.backward(loss)
        optimizer.step()
        optimizer.zero_grad()
        scheduler.step()
        accelerator.sync_gradients(model)
```

## Inference

```python
import torch

model = torch.load("path/to/model.pt")

with torch.no_grad():
    outputs = model(**batch)



In [5]:
prefix = """\
from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM

peft_config = LoraConfig("""

suffix = ")"
print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:49152 for open-end generation.


<fim_prefix>from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM

peft_config = LoraConfig(<fim_suffix>)<fim_middle>
    r=64,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)
model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m")
model = get_peft_model(model, peft_config<|endoftext|># coding=utf-8
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.


In [6]:
prefix = """
# Here is the correct implementation of the two sum code exercise
# time complexity: O(N)
# space complexity: O(N)
def two_sum(arr, target_sum):
"""

suffix = """"""
print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:49152 for open-end generation.


<fim_prefix>
# Here is the correct implementation of the two sum code exercise
# time complexity: O(N)
# space complexity: O(N)
def two_sum(arr, target_sum):
<fim_suffix><fim_middle>    # create a hash table to store the values and their indices
    hash_table = {}
    for i, num in enumerate(arr):
        # check if the complement is already in the hash table
        if target_sum - num in hash_table:
            return [hash_table[target_sum - num], i]
        # store the value and its index in the hash table
        hash_table[num] = i
    return None
<fim_middle>    # iterate over the array
    for i, num in enumerate(arr):
        # check if the complement is in the array
        if target_sum - num in


In [7]:
prefix = """import math
import re
import warnings
from dataclasses import asdict, dataclass, field, replace
from enum import Enum
from typing import List, Optional, Tuple, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from transformers.pytorch_utils import Conv1D

from ..config import PeftConfig
from ..import_utils import is_bnb_4bit_available, is_bnb_available
from ..utils import (
    CLAMP_QUANTILE,
    COMMON_LAYERS_PATTERN,
    TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
    ModulesToSaveWrapper,
    PeftType,
    _freeze_adapter,
    _get_submodules,
    transpose,
)
from .tuners_utils import BaseTuner, BaseTunerLayer

@dataclass
class BottleneckAdapterConfig(PeftConfig):
    \"""
    """

suffix = """
    \""" \
"""

print(get_code_completion(prefix, suffix))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:49152 for open-end generation.


<fim_prefix>import math
import re
from dataclasses import asdict, dataclass, field, replace
from enum import Enum
from typing import List, Optional, Tuple, Union

import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
from transformers.pytorch_utils import Conv1D

from..config import PeftConfig
from..import_utils import is_bnb_4bit_available, is_bnb_available
from..utils import (
    CLAMP_QUANTILE,
    COMMON_LAYERS_PATTERN,
    TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
    ModulesToSaveWrapper,
    PeftType,
    _freeze_adapter,
    _get_submodules,
    transpose,
)
from.tuners_utils import BaseTuner, BaseTunerLayer

@dataclass
class BottleneckAdapterConfig(PeftConfig):
    """
    <fim_suffix>
    """ <fim_middle>This is the configuration class to store the configuration of a [`BottleneckAdapter`]. It is used to instantiate a
    BottleneckAdapter according to the specified arguments, defining the model architecture. Instantiating a
    co