In [1]:
!pip install -r requirements.txt
!pip install --upgrade bitsandbytes

Collecting trl (from -r requirements.txt (line 2))
  Downloading trl-0.15.0-py3-none-any.whl.metadata (11 kB)
Collecting accelerate>=0.34.0 (from trl->-r requirements.txt (line 2))
  Downloading accelerate-1.4.0-py3-none-any.whl.metadata (19 kB)
Collecting datasets (from -r requirements.txt (line 1))
  Downloading datasets-3.3.1-py3-none-any.whl.metadata (19 kB)
Collecting rich (from trl->-r requirements.txt (line 2))
  Downloading rich-13.9.4-py3-none-any.whl.metadata (18 kB)
Collecting transformers>=4.46.0 (from trl->-r requirements.txt (line 2))
  Downloading transformers-4.49.0-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.0/44.0 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
Collecting requests>=2.32.2 (from datasets->-r requirements.txt (line 1))
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting tqdm>=4.66.3 (from datasets->-r requirements.txt (line 1))
  Downloading tqdm-4.67.1-py3-none-any.whl.metad

In [2]:
from huggingface_hub import login
import json

with open("config.json", "r") as config_file:
    config = json.load(config_file)
    access_token = config["HF_ACCESS_TOKEN"]

login(token=access_token)

## Data

In [3]:
from datasets import load_dataset
from datasets.arrow_dataset import Dataset

def format_sample(sample):
    """ Helper function to format a single input sample"""
    instruction=sample['instruction']
    input_text=sample['input']
    output_text=sample['output']

    if input_text is None or input_text=="":
        formatted_prompt=(
            f"<|start_header_id|>user<|end_header_id|>\n\n"
            f"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n"
            f"### Instruction:\n{instruction}\n\n"
            f"### Response:\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
            f"{output_text}<|eot_id|>"
        )
    else:
        formatted_prompt=(
            f"<|start_header_id|>user<|end_header_id|>\n\n"
            f"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n"
            f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n"
            f"### Response:\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
            f"{output_text}<|eot_id|>"
        )
    formatted_prompt="".join(formatted_prompt) # exclude trailing white spaces
    return formatted_prompt                    # stream text into the dataloader, one by one



def gen_train_input():
    """ Format all data input in alpaca style
        Return:
            A generator on train data "train_gen"
    """
    # load data
    ds=load_dataset("iamtarun/python_code_instructions_18k_alpaca",streaming=True, split="train")
    # datata set has 18.6k samples, we use 16.8k (90%) for training + 1.8k for validation
    num_samples=16800
    counter=0
    for sample in iter(ds):
        if counter>=num_samples:
            break
        formatted_prompt=format_sample(sample)
        yield {'text': formatted_prompt}
        counter+=1


def gen_val_input():
    """ Format all data input in alpaca style
        Return:
            A generator on val data "val_gen"
    """
    # load data
    ds=load_dataset("iamtarun/python_code_instructions_18k_alpaca",streaming=True, split="train")
    # datata set has 18.6k samples, we use 16.8k (90%) for training + 1.8k for validation
    num_samples=16800
    counter=0
    for sample in iter(ds):
        if counter<num_samples:
            counter+=1
            continue

        formatted_prompt=format_sample(sample)
        yield {'text': formatted_prompt}
        counter+=1

dataset_train = Dataset.from_generator(gen_train_input)
dataset_val=Dataset.from_generator(gen_val_input)

Generating train split: 0 examples [00:00, ? examples/s]

README.md:   0%|          | 0.00/905 [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

In [6]:
print(f"Train dataset size: {len(dataset_train)}")
print(f"Validation dataset size: {len(dataset_val)}")

print(f"Sample train:\n{dataset_train[0]}")


Train dataset size: 16800
Validation dataset size: 1812
Sample train:
{'text': '<|start_header_id|>user<|end_header_id|>\n\nBelow is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nCreate a function to calculate the sum of a sequence of integers.\n\n### Input:\n[1, 2, 3, 4, 5]\n\n### Response:\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n# Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum<|eot_id|>'}


## Model and tokenizer

In [10]:
import torch
from peft import LoraConfig, AutoPeftModelForCausalLM
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from trl import SFTTrainer

model_name="meta-llama/Llama-3.2-1B-Instruct"
def create_and_prepare_model():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=True,
    )

    model=AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        token=access_token
    )

    peft_config=LoraConfig(
        lora_alpha=16,
        r=8,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=['q_proj', 'k_proj', 'v_proj'],
    )
    
    tokenizer=AutoTokenizer.from_pretrained(model_name,token=access_token)
    tokenizer.pad_token = "<|finetune_right_pad_id|>"
    tokenizer.padding_side = "right"

    return model,peft_config,tokenizer

model,peft_config,tokenizer=create_and_prepare_model()

## Train

In [11]:
from trl import SFTConfig, SFTTrainer

args=SFTConfig(
    output_dir="./llama32-python",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    gradient_accumulation_steps=8,
    gradient_checkpointing=True, # to save memmory
    optim="adamw_torch_fused",
    logging_steps=50,            # log train loss
    # Add evaluation strategy to compute validation loss during training
    evaluation_strategy="steps",  # Evaluate at the end of each epoch
    eval_steps=50,  # Evaluate every 50 steps    
    save_strategy="epoch",
    learning_rate=2e-4,
    bf16=True, 
    tf32=True, # enable true for faster speed (supported in higher-end gpu)
    max_grad_norm=0.3,
    warmup_ratio=0.03, # follow QLoRA paper
    lr_scheduler_type="cosine",
    report_to="tensorboard",
    gradient_checkpointing_kwargs={"use_reentrant": False},
    dataset_text_field="text",
)

trainer=SFTTrainer(
    model=model,
    args=args,
    train_dataset=dataset_train,
    eval_dataset=dataset_val,
    peft_config=peft_config,
    tokenizer=tokenizer
)
trainer.train()

  trainer=SFTTrainer(


Applying chat template to train dataset:   0%|          | 0/16800 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/16800 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/16800 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/1812 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/1812 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/1812 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss,Validation Loss
50,1.2909,0.875727
100,0.8701,0.839814
150,0.8453,0.829108
200,0.8373,0.823679
250,0.8346,0.819919
300,0.8481,0.818405
350,0.8237,0.817808


TrainOutput(global_step=393, training_loss=0.8984256191108063, metrics={'train_runtime': 4617.4541, 'train_samples_per_second': 10.915, 'train_steps_per_second': 0.085, 'total_flos': 1.5350980937613312e+17, 'train_loss': 0.8984256191108063})

In [17]:
# save model state_dict
model_file_name="LLAMA32_ft_python_code.pth"
torch.save(model.state_dict(), model_file_name)
print(f"Model saved as {model_file_name}")

# free the memory 
del model
del trainer
torch.cuda.empty_cache()

NameError: name 'model' is not defined

## Load finetune model and run inference

In [18]:
from peft import PeftModel, LoraConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

def load_fine_tune_model(base_model_id, saved_weights):
    # Load tokenizer and base model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    tokenizer = AutoTokenizer.from_pretrained(base_model_id)
    base_model = AutoModelForCausalLM.from_pretrained(base_model_id)
    base_model.to(device)
    
    # Create LoRA config - make sure these parameters match your training configuration
    peft_config = LoraConfig(
        lora_alpha=16,
        r=8,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=['q_proj','k_proj','v_proj'],
    )
    
    # Initialize PeftModel
    lora_model = PeftModel(base_model, peft_config)
    
    # Load the saved weights
    state_dict = torch.load(saved_weights,map_location=device)
        
    # Create new state dict with correct prefixes and structure
    new_state_dict = {}
    for key, value in state_dict.items():
        # key start with "model" -> add "base_" to the new key for base_model
        new_key = f"base_{key}"        
        new_state_dict[new_key] = value
    
    # Load the weights with strict=False to allow partial loading
    lora_model.load_state_dict(new_state_dict, strict=False)
    
    # Set to evaluation mode
    lora_model = lora_model.eval()
    
    return lora_model, tokenizer

# Original model and saved weight
base_model_id = "meta-llama/Llama-3.2-1B-Instruct"
lora_weights = "LLAMA32_ft_python_code.pth"

# Load model
print("Loading fine-tuned model ...")
model_ft, tokenizer = load_fine_tune_model(base_model_id, lora_weights)
total_params=sum(p.numel() for p in model_ft.parameters())
trainable_params=sum(p.numel() for p in model_ft.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

Loading fine-tuned model ...
Total parameters: 1,236,994,048
Trainable parameters: 1,179,648


In [19]:
def generate(model, prompt, tokenizer, max_new_tokens, context_size=512, temperature=0.0, top_k=1, eos_id=[128001, 128009]):
    """
    Generate text using a language model with proper dtype handling and improved sampling.
    """
    # Get model's expected dtype
    model_dtype = next(model.parameters()).dtype
    model_device = next(model.parameters()).device
    
    formatted_prompt = (
        f"<|start_header_id|>user<|end_header_id|>\n\n"
        f"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n"
        f"### Instruction:\n{prompt}"
        f"### Response:\n<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
    )
    
    # Encode and prepare input
    idx = tokenizer.encode(formatted_prompt)
    idx = torch.tensor(idx, dtype=torch.long, device=model_device).unsqueeze(0)
    num_tokens = idx.shape[1]
    
    # Generation loop
    for _ in range(max_new_tokens):
        # Get conditioning context
        idx_cond = idx[:, -context_size:]
        
        # Generate logits
        with torch.no_grad():            
            # Forward pass - get output directly from model
            outputs = model(
                input_ids=idx_cond,
                use_cache=False
            )
            
            # Get logits directly from the output
            logits = outputs.logits
        
        # Focus on last time step
        logits = logits[:, -1, :]
        
        # Apply top-k filtering
        if top_k is not None and top_k > 0:
            top_logits, _ = torch.topk(logits, top_k)
            min_val = top_logits[:, [-1]]
            logits = torch.where(
                logits < min_val,
                torch.tensor(float('-inf'), device=model_device, dtype=model_dtype),
                logits
            )
        
        # Apply temperature and sample
        if temperature > 0.0:
            logits = logits / temperature
            probs = torch.softmax(logits, dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)
        else:
            idx_next = torch.argmax(logits, dim=-1, keepdim=True)
        
        # Check for EOS
        if idx_next.item() in eos_id:
            break
            
        # Append new token
        idx = torch.cat((idx, idx_next), dim=1)
    
    # Decode generated text
    generated_ids = idx.squeeze(0)[num_tokens:]
    generated_text = tokenizer.decode(generated_ids)
    
    return generated_text



In [20]:
prompt = ("Write a function that computes fibonacci numbers.")
print(generate(model_ft, prompt, tokenizer, max_new_tokens=512))

```python
def fibonacci(n):
    """
    This function computes the nth Fibonacci number.

    Args:
        n (int): The position of the Fibonacci number to be computed.

    Returns:
        int: The nth Fibonacci number.
    """

    # Base cases for the Fibonacci sequence
    if n <= 0:
        return "Input should be a positive integer."
    elif n == 1:
        return 0
    elif n == 2:
        return 1

    # Initialize the first two Fibonacci numbers
    a, b = 0, 1

    # Compute the nth Fibonacci number
    for _ in range(2, n):
        # Update a and b to be the sum of the previous two
        a, b = b, a + b

    # Return the nth Fibonacci number
    return b
```


In [21]:
prompt = "Given an array nums of integers, return how many of them contain an even number of digits."
print(generate(model_ft, prompt, tokenizer, max_new_tokens=512))

### Solution

```python
def findNumbers(nums):
    """
    This function takes an array of integers as input and returns the count of numbers 
    that contain an even number of digits.

    Args:
        nums (list): A list of integers.

    Returns:
        int: The count of numbers with an even number of digits.
    """
    count = 0  # Initialize a counter variable to store the count of numbers with even digits
    for num in nums:  # Iterate over each number in the input list
        digits = len(str(abs(num)))  # Convert the number to a string and count the number of digits
        if digits % 2 == 0:  # Check if the number of digits is even
            count += 1  # If the number of digits is even, increment the counter
    return count  # Return the total count of numbers with an even number of digits
```

### Example Use Cases

```python
# Test the function with an array of numbers
print(findNumbers([1, 2, 3, 4, 5]))  # Output: 2

# Test the function with an array of numbers w