In [1]:
%%capture
%pip install -U transformers datasets accelerate peft trl bitsandbytes wandb

In [2]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format
from accelerate import Accelerator

In [3]:
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

hf_token = user_secrets.get_secret("HF_TOKEN")
login(token = hf_token)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [4]:
wb_token = user_secrets.get_secret("wandb")

wandb.login(key=wb_token)
run = wandb.init(
    project='Fine-tune CodeLlama 2 7B for JUnit Test case Generation', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mwalim[0m ([33miHateNLP[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [5]:
base_model = "codellama/CodeLlama-7b-hf"
dataset_name = "CodexAI/dataset"
new_model = "CODEX-CodeLlama-7b-hf"

In [6]:
acc = Accelerator()

In [7]:
# Set torch dtype and attention implementation
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -qqq flash-attn
    torch_dtype = torch.bfloat16
    attn_implementation = "flash_attention_2"
else:
    torch_dtype = torch.float16
    attn_implementation = "eager"

In [8]:
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name()}")
    device={"":0}
    torch_type=torch.bfloat16
else:
    device="cpu"
    torch_type=torch.bfloat16
    print("I am begging for mercy already!")

CUDA device: Tesla P100-PCIE-16GB


In [9]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_type,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map=device,
    attn_implementation=attn_implementation,
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

config.json:   0%|          | 0.00/637 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/749 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

In [10]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32016, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-0

In [11]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
peft_config

LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type='CAUSAL_LM', inference_mode=False, r=16, target_modules={'o_proj', 'up_proj', 'v_proj', 'q_proj', 'down_proj', 'gate_proj', 'k_proj'}, lora_alpha=32, lora_dropout=0.05, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', loftq_config={}, use_dora=False, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False))

In [12]:
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)
model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32018, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora

In [13]:
model.print_trainable_parameters()

trainable params: 39,976,960 || all params: 6,778,540,032 || trainable%: 0.5898


In [14]:
dataset = load_dataset(dataset_name)
dataset

README.md:   0%|          | 0.00/426 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/35.2M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/1.74M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/100000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/4687 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['focal_method', 'test_case'],
        num_rows: 100000
    })
    test: Dataset({
        features: ['focal_method', 'test_case'],
        num_rows: 4687
    })
})

In [15]:
train=dataset['train']
train=train.shuffle(True).select(range(100))
train

Dataset({
    features: ['focal_method', 'test_case'],
    num_rows: 100
})

In [16]:
test=dataset['test']
test=test.shuffle(True).select(range(10))
test

Dataset({
    features: ['focal_method', 'test_case'],
    num_rows: 10
})

In [17]:
def format_chat_template(row):
    row_json = [
        {"role": "system", "content": "You are a coding assistant for generating JUnit test cases."},
        {"role": "user", "content": row["focal_method"]},
        {"role": "assistant", "content": row["test_case"]}
    ]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row


In [18]:
train = train.map(
    format_chat_template,
    num_proc= 4,
)

train

  self.pid = os.fork()


Map (num_proc=4):   0%|          | 0/100 [00:00<?, ? examples/s]

  self.pid = os.fork()


Dataset({
    features: ['focal_method', 'test_case', 'text'],
    num_rows: 100
})

In [19]:
test = test.map(
    format_chat_template,
    num_proc= 4,
)

test

Map (num_proc=4):   0%|          | 0/10 [00:00<?, ? examples/s]

Dataset({
    features: ['focal_method', 'test_case', 'text'],
    num_rows: 10
})

In [20]:
train['text'][3]

'<|im_start|>system\nYou are a coding assistant for generating JUnit test cases.<|im_end|>\n<|im_start|>user\npublic OutputStream createRawOutputStream() throws IOException\n    {\n        checkClosed();\n        if (isWriting)\n        {\n            throw new IllegalStateException("Cannot have more than one open stream writer.");\n        }\n        if (randomAccess != null)\n            randomAccess.clear();\n        else\n            randomAccess = getStreamCache().createBuffer();\n        OutputStream out = new RandomAccessOutputStream(randomAccess);\n        isWriting = true;\n        return new FilterOutputStream(out)\n        {\n            @Override\n            public void write(byte[] b, int off, int len) throws IOException\n            {\n                this.out.write(b, off, len);\n            }\n            \n            @Override\n            public void close() throws IOException\n            {\n                super.close();\n                setInt(COSName.LENGTH, (in

In [21]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir="./results",
    run_name ="./loggings",
    overwrite_output_dir=True,
    eval_strategy="steps",
    eval_steps=0.10,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    warmup_steps=10,
    logging_strategy="steps",
    logging_steps=50,
    logging_first_step=True,
    learning_rate=2e-4,
    fp16=False,
    bf16=True,
    group_by_length=True,
    report_to="wandb",
)

In [22]:
# Setting sft parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train,
    eval_dataset=test,
    peft_config=peft_config,
    max_seq_length= 512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]



In [23]:
trainer.train_dataset

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 100
})

In [24]:
trainer.eval_dataset

Dataset({
    features: ['input_ids', 'attention_mask'],
    num_rows: 10
})

In [25]:
trainer.train()

Step,Training Loss,Validation Loss
3,1.255,1.760233
6,1.255,1.632642
9,1.255,1.403932
12,1.255,1.076716
15,1.255,0.975671
18,1.255,0.954687
21,1.255,0.934519
24,1.255,0.9227


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


TrainOutput(global_step=25, training_loss=1.1696454191207886, metrics={'train_runtime': 405.675, 'train_samples_per_second': 0.247, 'train_steps_per_second': 0.062, 'total_flos': 1243434988929024.0, 'train_loss': 1.1696454191207886, 'epoch': 1.0})

In [26]:
trainer.model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32018, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora

In [27]:
trainer.model.save_pretrained(new_model)
wandb.finish()
model.config.use_cache = True



VBox(children=(Label(value='0.324 MB of 0.324 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/loss,█▇▅▂▁▁▁▁
eval/runtime,▆▆█▂▃▁▃▄
eval/samples_per_second,██▁█████
eval/steps_per_second,██▁█████
train/epoch,▁▂▂▃▄▅▆▇██
train/global_step,▁▂▂▃▄▅▆▇██
train/grad_norm,▁
train/learning_rate,▁
train/loss,▁

0,1
eval/loss,0.9227
eval/runtime,13.8935
eval/samples_per_second,0.72
eval/steps_per_second,0.72
total_flos,1243434988929024.0
train/epoch,1.0
train/global_step,25.0
train/grad_norm,0.29396
train/learning_rate,2e-05
train/loss,1.255


In [28]:
messages = [{"role": "user", "content": """
public class SimpleCalculator {
    // Method to add two numbers
    public int add(int a, int b) {
        return a + b;
    }

    // Method to subtract two numbers
    public int subtract(int a, int b) {
        return a - b;
    }

    // Method to multiply two numbers
    public int multiply(int a, int b) {
        return a * b;
    }

    // Method to divide two numbers
    // Throws ArithmeticException if divisor is zero
    public double divide(int a, int b) {
        if (b == 0) {
            throw new ArithmeticException("Cannot divide by zero");
        }
        return (double) a / b;
    }
}
"""}]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_new_tokens=512, num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print(text.split("assistant")[1])

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.



@Test
    public void testAdd() {
        SimpleCalculator calculator = new SimpleCalculator();
        assertEquals(4, calculator.add(2, 2));
    }

    @Test
    public void testSubtract() {
        SimpleCalculator calculator = new SimpleCalculator();
        assertEquals(2, calculator.subtract(4, 2));
    }

    @Test
    public void testMultiply() {
        SimpleCalculator calculator = new SimpleCalculator();
        assertEquals(4, calculator.multiply(2, 2));
    }

    @Test
    public void testDivide() {
        SimpleCalculator calculator = new SimpleCalculator();
        assertEquals(2.0, calculator.divide(4, 2), 0.0001);
    }

    @Test(expected = ArithmeticException.class)
    public void testDivideByZero() {
        SimpleCalculator calculator = new SimpleCalculator();
        calculator.divide(4, 0);
    }

    @Test
    public void testDivideByZeroWithMessage() {
        SimpleCalculator calculator = new SimpleCalculator();
        try {
            calculator.divide(

In [29]:
from huggingface_hub import HfApi, create_repo

In [30]:
repo_name = new_model
organization_name = "CodexAI"
repo_url = f"{organization_name}/{repo_name}"

In [31]:
create_repo(repo_url, repo_type="model", private=True,exist_ok=True)

RepoUrl('https://huggingface.co/CodexAI/CODEX-CodeLlama-7b-hf', endpoint='https://huggingface.co', repo_type='model', repo_id='CodexAI/CODEX-CodeLlama-7b-hf')

In [32]:
api = HfApi()
api.upload_folder(folder_path=new_model,repo_id=repo_url)

adapter_model.safetensors:   0%|          | 0.00/685M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/CodexAI/CODEX-CodeLlama-7b-hf/commit/5a67c77f554abb4485f53da21e77ba75902d69f8', commit_message='Upload folder using huggingface_hub', commit_description='', oid='5a67c77f554abb4485f53da21e77ba75902d69f8', pr_url=None, pr_revision=None, pr_num=None)

In [33]:
print('END')

END
