## Model Downloader

In [None]:
%%capture output
# bitsandbytes == 0.43.2是最低要求
! pip install datasets
! pip install peft
! pip install bitsandbytes==0.43.2
! pip install accelerate
! pip install trl
! pip install transformers

## 1. model and drive preparation

In [8]:
import os, pandas as pd
from google.colab import drive

# Access drive
drive.mount('/content/drive')
path = '/content/drive/MyDrive/ModelDownloader/'

Mounted at /content/drive


In [9]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
from peft import LoraConfig

# QLoRA configuration
compute_dtype = getattr(torch, 'float16')

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False
)


# Load LoRA configuration
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)

In [10]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name_deepseek_coder = 'deepseek-ai/deepseek-coder-6.7b-base'
model_name_llama_coder = 'codellama/CodeLlama-7b-hf'
max_length = 512
data_type = torch.bfloat16

In [11]:
tokenizer_llama_coder = AutoTokenizer.from_pretrained(model_name_llama_coder, trust_remote_code=True)
model_llama_coder = AutoModelForCausalLM.from_pretrained(
    model_name_llama_coder,
    trust_remote_code=True,
    quantization_config=bnb_config,
    torch_dtype=data_type,
    device_map = {"": 0}
)
model_llama_coder.config.use_cache = False
model_llama_coder.config.pretraining_tp = 1

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/749 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/637 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [17]:
tokenizer_save_path = os.path.join(path, 'CodeLlama/tokenizer')
model_save_path = os.path.join(path, 'CodeLlama/model')

In [15]:
tokenizer_llama_coder.save_pretrained(tokenizer_save_path)

('/content/drive/MyDrive/ModelDownloader/CodeLlama/tokenizer_config.json',
 '/content/drive/MyDrive/ModelDownloader/CodeLlama/special_tokens_map.json',
 '/content/drive/MyDrive/ModelDownloader/CodeLlama/tokenizer.model',
 '/content/drive/MyDrive/ModelDownloader/CodeLlama/added_tokens.json',
 '/content/drive/MyDrive/ModelDownloader/CodeLlama/tokenizer.json')

In [18]:
model_llama_coder.save_pretrained(model_save_path)

## 2. evaluation

In [22]:
# QA task
def getAnswer(model, tokenizer, question, skip_tokens=True):
    inputs = tokenizer(question, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_length=max_length)
    return tokenizer.decode(outputs[0], skip_special_tokens=skip_tokens)

# FIM task
prefix = '- (void)merge:(NSMutableArray *)left and:(NSMutableArray *)right into:(NSMutableArray *)array {\n\
    NSInteger leftIndex = 0;\n\
    NSInteger rightIndex = 0;\n\
    \n'
suffix = '    while (leftIndex < left.count) {\n\
        [array addObject:left[leftIndex]];\n\
        leftIndex++;\n\
    }\n\
\n\
    while (rightIndex < right.count) {\n\
        [array addObject:right[rightIndex]];\n\
        rightIndex++;\n\
    }\n\
}\n'

task_desp = prefix + '    // code to be completed' + '\n\n' + suffix
print(task_desp)

- (void)merge:(NSMutableArray *)left and:(NSMutableArray *)right into:(NSMutableArray *)array {
    NSInteger leftIndex = 0;
    NSInteger rightIndex = 0;
    
    // code to be completed

    while (leftIndex < left.count) {
        [array addObject:left[leftIndex]];
        leftIndex++;
    }

    while (rightIndex < right.count) {
        [array addObject:right[rightIndex]];
        rightIndex++;
    }
}



*original model*

In [20]:
question = 'write a merge sort in c++'
answer = getAnswer(model_llama_coder, tokenizer_llama_coder, question)
print(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


write a merge sort in c++

\begin{code}
#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;

void merge(vector<int> &arr, int l, int m, int r)
{
    int n1 = m - l + 1;
    int n2 = r - m;

    vector<int> L(n1);
    vector<int> R(n2);

    for (int i = 0; i < n1; i++)
        L[i] = arr[l + i];
    for (int j = 0; j < n2; j++)
        R[j] = arr[m + 1 + j];

    int i = 0;
    int j = 0;
    int k = l;
    while (i < n1 && j < n2)
    {
        if (L[i] <= R[j])
        {
            arr[k] = L[i];
            i++;
        }
        else
        {
            arr[k] = R[j];
            j++;
        }
        k++;
    }

    while (i < n1)
    {
        arr[k] = L[i];
        i++;
        k++;
    }

    while (j < n2)
    {
        arr[k] = R[j];
        j++;
        k++;
    }
}

void mergeSort(vector<int> &arr, int l, int r)
{
    if (l < r)
    {
        int m = l + (r - l) / 2;

        mergeSort(arr, l, m);
        mergeSort(arr, m + 1, r);

        me

In [24]:
question = f"<PRE> {prefix} <SUF>{suffix} <MID>"
answer = getAnswer(model_llama_coder, tokenizer_llama_coder, question, False)
print(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<s> <PRE> - (void)merge:(NSMutableArray *)left and:(NSMutableArray *)right into:(NSMutableArray *)array {
    NSInteger leftIndex = 0;
    NSInteger rightIndex = 0;
    
 <SUF>    while (leftIndex < left.count) {
        [array addObject:left[leftIndex]];
        leftIndex++;
    }

    while (rightIndex < right.count) {
        [array addObject:right[rightIndex]];
        rightIndex++;
    }
}
 <MID>    while (leftIndex < left.count && rightIndex < right.count) {
        if ([left[leftIndex] compare:right[rightIndex]] == NSOrderedAscending) {
            [array addObject:left[leftIndex]];
            leftIndex++;
        } else {
            [array addObject:right[rightIndex]];
            rightIndex++;
        }
    }

 <EOT></s>


In [25]:
start = answer.find('<MID>')
end = answer.find('<EOT>')
mid_code = answer[start+5:end-1]
print(prefix + '\033[92m' + mid_code + '\033[0m' + suffix)

- (void)merge:(NSMutableArray *)left and:(NSMutableArray *)right into:(NSMutableArray *)array {
    NSInteger leftIndex = 0;
    NSInteger rightIndex = 0;
    
[92m    while (leftIndex < left.count && rightIndex < right.count) {
        if ([left[leftIndex] compare:right[rightIndex]] == NSOrderedAscending) {
            [array addObject:left[leftIndex]];
            leftIndex++;
        } else {
            [array addObject:right[rightIndex]];
            rightIndex++;
        }
    }

[0m    while (leftIndex < left.count) {
        [array addObject:left[leftIndex]];
        leftIndex++;
    }

    while (rightIndex < right.count) {
        [array addObject:right[rightIndex]];
        rightIndex++;
    }
}



local model

In [29]:
tokenizer_llama_coder_local = AutoTokenizer.from_pretrained(tokenizer_save_path)

In [30]:
model_llama_coder_local = AutoModelForCausalLM.from_pretrained(model_save_path)

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now default to True since model is quantized.


In [31]:
question = 'write a merge sort in c++'
answer = getAnswer(model_llama_coder_local, tokenizer_llama_coder_local, question)
print(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


write a merge sort in c++

\begin{code}
#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;

void merge(vector<int> &arr, vector<int> &temp, int left, int mid, int right)
{
    int i = left;
    int j = mid + 1;
    int k = left;
    while (i <= mid && j <= right)
    {
        if (arr[i] <= arr[j])
        {
            temp[k++] = arr[i++];
        }
        else
        {
            temp[k++] = arr[j++];
        }
    }
    while (i <= mid)
    {
        temp[k++] = arr[i++];
    }
    while (j <= right)
    {
        temp[k++] = arr[j++];
    }
    for (int i = left; i <= right; i++)
    {
        arr[i] = temp[i];
    }
}

void mergeSort(vector<int> &arr, vector<int> &temp, int left, int right)
{
    if (left < right)
    {
        int mid = (left + right) / 2;
        mergeSort(arr, temp, left, mid);
        mergeSort(arr, temp, mid + 1, right);
        merge(arr, temp, left, mid, right);
    }
}

int main()
{
    vector<int> arr = {1, 3, 5, 7, 9, 2, 

In [33]:
question = f"<PRE> {prefix} <SUF>{suffix} <MID>"
answer = getAnswer(model_llama_coder_local, tokenizer_llama_coder_local, question, False)
print(answer)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<s> <PRE> - (void)merge:(NSMutableArray *)left and:(NSMutableArray *)right into:(NSMutableArray *)array {
    NSInteger leftIndex = 0;
    NSInteger rightIndex = 0;
    
 <SUF>    while (leftIndex < left.count) {
        [array addObject:left[leftIndex]];
        leftIndex++;
    }

    while (rightIndex < right.count) {
        [array addObject:right[rightIndex]];
        rightIndex++;
    }
}
 <MID>    while (leftIndex < left.count && rightIndex < right.count) {
        if ([left[leftIndex] compare:right[rightIndex]] == NSOrderedAscending) {
            [array addObject:left[leftIndex]];
            leftIndex++;
        } else {
            [array addObject:right[rightIndex]];
            rightIndex++;
        }
    }

 <EOT></s>


In [34]:
start = answer.find('<MID>')
end = answer.find('<EOT>')
mid_code = answer[start+5:end-1]
print(prefix + '\033[92m' + mid_code + '\033[0m' + suffix)

- (void)merge:(NSMutableArray *)left and:(NSMutableArray *)right into:(NSMutableArray *)array {
    NSInteger leftIndex = 0;
    NSInteger rightIndex = 0;
    
[92m    while (leftIndex < left.count && rightIndex < right.count) {
        if ([left[leftIndex] compare:right[rightIndex]] == NSOrderedAscending) {
            [array addObject:left[leftIndex]];
            leftIndex++;
        } else {
            [array addObject:right[rightIndex]];
            rightIndex++;
        }
    }

[0m    while (leftIndex < left.count) {
        [array addObject:left[leftIndex]];
        leftIndex++;
    }

    while (rightIndex < right.count) {
        [array addObject:right[rightIndex]];
        rightIndex++;
    }
}

