In [1]:
%%time
import os
os.environ["CUDA_VISIBLE_DEVICES"]="2"

from dataclasses import dataclass, field
from typing import Optional
import contextlib

import torch
from datasets import load_dataset
from peft import LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
)


from trl import SFTTrainer

from peft import (
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model,
    PeftModel
)

model = "bigcode/starcoder"
tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True, padding_side="left")
model = AutoModelForCausalLM.from_pretrained(
    model, quantization_config=None, 
    device_map=None, 
    trust_remote_code=True, 
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True
)

model_id = "smangrul/peft-lora-starcoderplus-chat-asst-A100-40GB-colab"
model = PeftModel.from_pretrained(model, model_id, adapter_name="assistant")

model_id = "smangrul/peft-lora-starcoder15B-v2-personal-copilot-A100-40GB-colab"
_ = model.load_adapter(model_id, adapter_name="copilot")

if not hasattr(model, "hf_device_map"):
    model.cuda()

[2023-09-05 15:38:56,889] [INFO] [real_accelerator.py:133:get_accelerator] Setting ds_accelerator to cuda (auto detect)


Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

CPU times: user 5min 6s, sys: 2min 54s, total: 8min 1s
Wall time: 3min 27s


In [2]:
system_prompt = """You are a helpful, respectful and honest assistant. Always answer as helpfully \
as possible, while being safe. Your answers should not include any harmful, \
unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that \
your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why \
instead of answering something not correct. If you don’t know the answer to a \
question, please don’t share false information."""

def get_chat_prompt(query):
    return f"<|system|> {system_prompt} <|endoftext|> <|prompter|> {query} <|endoftext|> <|assistant|>"

def get_completion_prompt(prefix, suffix):
    return f"""<fim_prefix>{prefix}<fim_suffix>{suffix}<fim_middle>"""
    

In [3]:
chat_prompts = ["How to use scrapy? Explain with an example.", 
                "Write a function to compute the quantiles of a normal distribution given number of buckets `n`."]
                

chat_prompts  = [get_chat_prompt(prompt) for prompt in chat_prompts]

print(chat_prompts)

['<|system|> You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don’t know the answer to a question, please don’t share false information. <|endoftext|> <|prompter|> How to use scrapy? Explain with an example. <|endoftext|> <|assistant|>', '<|system|> You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually co

In [4]:
completion_prompt_prefixes = [
    """
# Here is the correct implementation of the two sum code exercise
# time complexity: O(N)
# space complexity: O(N)
def two_sum(arr, target_sum):
""",
    """\
from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM

peft_config = LoraConfig(""",
    """from accelerate import Accelerator

accelerator = Accelerator()

model, optimizer, training_dataloader, scheduler = """   
]

completion_prompt_suffixes = [
    """""",
    """)""",
    """"""   
]

completion_prompts = [get_completion_prompt(prefix, suffix) for prefix, suffix in zip(completion_prompt_prefixes, completion_prompt_suffixes)]

print(completion_prompts)


['<fim_prefix>\n# Here is the correct implementation of the two sum code exercise\n# time complexity: O(N)\n# space complexity: O(N)\ndef two_sum(arr, target_sum):\n<fim_suffix><fim_middle>', '<fim_prefix>from peft import LoraConfig, TaskType, get_peft_model\nfrom transformers import AutoModelForCausalLM\n\npeft_config = LoraConfig(<fim_suffix>)<fim_middle>', '<fim_prefix>from accelerate import Accelerator\n\naccelerator = Accelerator()\n\nmodel, optimizer, training_dataloader, scheduler = <fim_suffix><fim_middle>']


In [14]:
adapter_names = []
prompts = []
for chat_prompt, completion_prompt in zip(chat_prompts, completion_prompts):
    adapter_names.append("base")
    prompts.append(chat_prompt)
    
    adapter_names.append("assistant")
    prompts.append(chat_prompt)
    
    adapter_names.append("base")
    prompts.append(completion_prompt)
    
    adapter_names.append("copilot")
    prompts.append(completion_prompt)
    
print(adapter_names)
print(prompts)

['base', 'assistant', 'base', 'copilot', 'base', 'assistant', 'base', 'copilot']
['<|system|> You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don’t know the answer to a question, please don’t share false information. <|endoftext|> <|prompter|> How to use scrapy? Explain with an example. <|endoftext|> <|assistant|>', '<|system|> You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and pos

In [15]:
tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder", trust_remote_code=True, padding_side="left")
tokenizer.pad_token = 0


In [16]:
def get_model_pred(prompts, adapter_names):
    model.eval()
    inputs = tokenizer(prompts, return_tensors="pt", padding=True)
    if "token_type_ids" in inputs:
        del inputs["token_type_ids"]
    inputs = {k:v.cuda() for k,v in inputs.items()}
    outputs = model.generate(**inputs,
                             adapter_names=adapter_names,
                                 max_new_tokens=128,
                                 temperature=0.2,
                                 top_k=50,
                                 top_p=0.95,
                                 do_sample=True,
                                 repetition_penalty=1.1,
                                 eos_token_id = tokenizer.eos_token_id)
    return tokenizer.batch_decode(outputs, skip_special_tokens=False)

In [35]:
%%time
outputs = get_model_pred(prompts, adapter_names)

for output in outputs:
    print("------------------------------------------------")
    print(output)
    print("------------------------------------------------")

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


------------------------------------------------
0000000<|system|> You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don’t know the answer to a question, please don’t share false information. <|endoftext|> <|prompter|> How to use scrapy? Explain with an example. <|endoftext|> <|assistant|> What do you think about this article? Is it true? Why/why not?
 <|assistant|> I have never heard of this person before. Can you tell me more about them?
 <|assistant|> Who is this person?
 <|assistant|> What do you think about this movie?
 <|assistant|> Are you sure about this?
 <|assistant|> Do you agree with this statement

In [38]:
%%time
for prompt, adapter_name in zip(prompts, adapter_names):
    if adapter_name != "base":
        model.set_adapter(adapter_name)
        output = get_model_pred(prompt, None)
    else:
        with model.disable_adapter():
            output = get_model_pred(prompt, None)
    print("------------------------------------------------")
    print(output[0])
    print("------------------------------------------------")

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


------------------------------------------------
<|system|> You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don’t know the answer to a question, please don’t share false information. <|endoftext|> <|prompter|> How to use scrapy? Explain with an example. <|endoftext|> <|assistant|> I am a bot made by <NAME>. I can help you search for things on Google. Just say "search" followed by what you want to search for. For example: "search how to use scrapy".
 <|assistant|> I am a bot made by <NAME>. I can help you find information about movies and TV shows. Just say "movie/tv show" followed by the name of the movie or

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


------------------------------------------------
<|system|> You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don’t know the answer to a question, please don’t share false information. <|endoftext|> <|prompter|> How to use scrapy? Explain with an example. <|endoftext|> <|assistant|> Scrapy is a web crawling framework written in Python. It allows users to write their own programs to scrape data from websites.

An example: Suppose we want to scrape all the links on https://www.google.com/ using Scrapy. We can create a new project by running `scrapy startproject google`. This will create a directory called google

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


------------------------------------------------
<fim_prefix>
# Here is the correct implementation of the two sum code exercise
# time complexity: O(N)
# space complexity: O(N)
def two_sum(arr, target_sum):
<fim_suffix><fim_middle>    seen = {}
    for i in range(len(arr)):
        if arr[i] in seen:
            return [seen[arr[i]], i]
        else:
            seen[target_sum - arr[i]] = i

<|endoftext|>
------------------------------------------------


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


------------------------------------------------
<fim_prefix>
# Here is the correct implementation of the two sum code exercise
# time complexity: O(N)
# space complexity: O(N)
def two_sum(arr, target_sum):
<fim_suffix><fim_middle>    seen = {}
    for i in range(len(arr)):
        if arr[i] in seen:
            return [seen[arr[i]], i]
        else:
            seen[target_sum - arr[i]] = i

<|endoftext|>
------------------------------------------------


Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


------------------------------------------------
<|system|> You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don’t know the answer to a question, please don’t share false information. <|endoftext|> <|prompter|> Write a function to compute the quantiles of a normal distribution given number of buckets `n`. <|endoftext|> <|assistant|> I am an AI chatbot trained on Cornell Movie Dialogs dataset. I can answer questions about movies, TV shows, books, games etc. I was created by <NAME>.
 <|assistant|> I am an AI chatbot trained on Cornell Movie Dialogs dataset. I can answer questions about movies, TV shows, books, 

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


------------------------------------------------
<|system|> You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don’t know the answer to a question, please don’t share false information. <|endoftext|> <|prompter|> Write a function to compute the quantiles of a normal distribution given number of buckets `n`. <|endoftext|> <|assistant|> Here's an example solution:
```python
def get_quantiles(data, n):
    """
    Compute the quantiles of a normal distribution given data points and 
    number of buckets (n).

    Parameters
    ----------
    data : list
        A list of numerical values for which we want to cal

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


------------------------------------------------
<fim_prefix>from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM

peft_config = LoraConfig(<fim_suffix>)<fim_middle>
    task=TaskType.CausalLM,
    model="gpt2",
    tokenizer="gpt2"
)


def test_get_peft_model():

    # Test with a string
    model = get_peft_model("gpt2")
    assert isinstance(model, AutoModelForCausalLM<|endoftext|>
------------------------------------------------
------------------------------------------------
<fim_prefix>from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForCausalLM

peft_config = LoraConfig(<fim_suffix>)<fim_middle>task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1<|endoftext|>
------------------------------------------------
CPU times: user 22.1 s, sys: 18 µs, total: 22.1 s
Wall time: 22 s
