In [1]:
! pip install accelerate peft bitsandbytes transformers trl



In [2]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Model Training

In [None]:
import torch
from datasets import load_dataset, Dataset
from peft import LoraConfig, AutoPeftModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
import os

def finetune_llama_v2():
    data = load_dataset("code_x_glue_tc_nl_code_search_adv", split="validation")
    data_df = data.to_pandas()
    data_df["text"] = data_df[["docstring", "code"]].apply(lambda x: "<s>[INST] Docstring: " + x["docstring"] + " [/INST] Code: " + x["code"] + "</s>", axis=1)
    data = Dataset.from_pandas(data_df)
    tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf")
    tokenizer.pad_token = tokenizer.eos_token
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
    )
    model = AutoModelForCausalLM.from_pretrained(
        "codellama/CodeLlama-7b-hf", quantization_config=bnb_config, device_map="auto"
    )
    model.config.use_cache=False
    model.config.pretraining_tp=1
    peft_config = LoraConfig(
        r=8, lora_alpha=16, lora_dropout=0.05, bias="none", task_type="CAUSAL_LM"
    )
    training_arguments = TrainingArguments(
        output_dir="codellama2-finetuned-codex",
        per_device_train_batch_size=8,
        gradient_accumulation_steps=4,
        optim="paged_adamw_32bit",
        learning_rate=2e-4,
        lr_scheduler_type="cosine",
        save_strategy="epoch",
        logging_steps=10,
        num_train_epochs=1,
        max_steps=100,
        fp16=True,
        push_to_hub=True
    )
    trainer = SFTTrainer(
        model=model,
        train_dataset=data,
        peft_config=peft_config,
        dataset_text_field="text",
        args=training_arguments,
        tokenizer=tokenizer,
        packing=False,
        max_seq_length=512
    )
    trainer.train()
    trainer.push_to_hub()

if __name__ == "__main__":
    finetune_llama_v2()

Downloading builder script:   0%|          | 0.00/8.38k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/11.4k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/2.35k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/633 [00:00<?, ?B/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/941M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/25.1M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading data files:   0%|          | 0/2 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/2 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/251820 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/9604 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/19210 [00:00<?, ? examples/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/745 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]



Map:   0%|          | 0/9604 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,1.4478
20,1.469
30,1.2248
40,1.019
50,0.9477


Step,Training Loss
10,1.4478
20,1.469
30,1.2248
40,1.019
50,0.9477
60,0.9238
70,0.9299
80,0.9213
90,0.9555
100,0.9012


# Model Saving

In [None]:
! cp -r /content/codellama2-finetuned-codex /content/drive/MyDrive/

# Model Push to Hub

In [None]:
from peft import AutoPeftModelForCausalLM, PeftModel
from transformers import AutoModelForCausalLM
import torch
import os
model = AutoModelForCausalLM.from_pretrained(
            "codellama/CodeLlama-7b-hf", torch_dtype=torch.float16, load_in_8bit=False, device_map="auto", trust_remote_code=True)

In [2]:
peft_model = PeftModel.from_pretrained(model, "Vasanth/codellama2-finetuned-codex", from_transformers=True, device_map={"":0})

In [3]:
model = peft_model.merge_and_unload()

In [7]:
model.push_to_hub("codellama2-finetuned-codex-fin")

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Vasanth/codellama2-finetuned-codex-fin/commit/7871aed2a1228aaab9c46c896ee1b8c138c6a585', commit_message='Upload LlamaForCausalLM', commit_description='', oid='7871aed2a1228aaab9c46c896ee1b8c138c6a585', pr_url=None, pr_revision=None, pr_num=None)

In [8]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-hf")
tokenizer.push_to_hub("codellama2-finetuned-codex-fin")

Downloading (…)okenizer_config.json:   0%|          | 0.00/745 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Vasanth/codellama2-finetuned-codex-fin/commit/ad96c8326d023cff54220e8e5d04222df696f4cf', commit_message='Upload tokenizer', commit_description='', oid='ad96c8326d023cff54220e8e5d04222df696f4cf', pr_url=None, pr_revision=None, pr_num=None)

# Model Inferencing

In [3]:
from transformers import AutoTokenizer
from transformers import pipeline
import torch

tokenizer = AutoTokenizer.from_pretrained("Vasanth/codellama2-finetuned-codex-fin")
pipe = pipeline(
    "text-generation",
    model="Vasanth/codellama2-finetuned-codex-fin",
    torch_dtype=torch.float16,
    device_map="auto",
)

sequences = pipe(
    'def fibonacci(',
    do_sample=True,
    temperature=0.2,
    top_p=0.9,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    max_length=100,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Result: def fibonacci(n):
    if n == 0:
        return 0
    elif n == 1:
        return 1
    else:
        return fibonacci(n-1) + fibonacci(n-2)

def main():
    n = int(input("Enter the number of terms: "))
    print("The Fibonacci series is:")
    for i in range(n):
