# Lora Fine-tuning on Deepseek-llm-7b-base

### Training

In [None]:
import os
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, BitsAndBytesConfig
from peft import get_peft_model, LoraConfig, TaskType


os.environ["WANDB_DISABLED"] = "true"
os.environ["NCCL_P2P_DISABLE"] = "1"
os.environ["NCCL_IB_DISABLE"] = "1"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


dataset = load_dataset("dair-ai/emotion")


model_name = "deepseek-ai/deepseek-llm-7b-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  

def tokenize_function(examples):
    inputs = tokenizer(
        examples["text"], 
        truncation=True, 
        padding="max_length", 
        max_length=128,
        return_tensors="pt"  
    )
    
    inputs["labels"] = torch.tensor(
        [label[0] if isinstance(label, list) and len(label) > 0 else 0 for label in examples["label"]], 
        dtype=torch.long
    )
    
    inputs = {k: v.to("cpu") for k, v in inputs.items()}
    
    return inputs


tokenized_datasets = dataset.map(tokenize_function, batched=True)


train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["test"]


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,  
    bnb_4bit_use_double_quant=True,  
)


base_model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=6, 
    quantization_config=bnb_config,
    device_map="auto",  
    pad_token_id=tokenizer.pad_token_id
)


lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,  
    r=32,  
    lora_alpha=32, 
    target_modules=[
        "q_proj", "v_proj" 
    ],
    lora_dropout=0.05,
    bias="none"
)

model = get_peft_model(base_model, lora_config)

# Training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=3e-4,
    per_device_train_batch_size=32,  
    gradient_accumulation_steps=2, 
    num_train_epochs=10,  
    weight_decay=0.01,
    logging_dir="./logs",
    fp16=False, 
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset, 
    eval_dataset=eval_dataset,   
)

# Train model
trainer.train()


model.save_pretrained("models/")
tokenizer.save_pretrained("models/")

Using device: cuda


Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.65s/it]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at deepseek-ai/deepseek-llm-7b-base and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automat

Epoch,Training Loss,Validation Loss
1,No log,
2,211.966000,
3,211.966000,




('models/tokenizer_config.json',
 'models/special_tokens_map.json',
 'models/tokenizer.json')

### Testing

In [None]:
# Load dataset
dataset = load_dataset("dair-ai/emotion")


model_name = "deepseek-ai/deepseek-llm-7b-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token  

def tokenize_function(examples):
    inputs = tokenizer(
        examples["text"], 
        truncation=True, 
        padding="max_length", 
        max_length=128,
        return_tensors="pt"  # Ensure PyTorch tensors are returned
    )
    
    # Ensure labels are formatted correctly as torch tensors
    inputs["labels"] = torch.tensor(
        [label[0] if isinstance(label, list) and len(label) > 0 else 0 for label in examples["label"]], 
        dtype=torch.long
    )
    
    inputs = {k: v.to("cpu") for k, v in inputs.items()}
    
    return inputs

# Tokenize the full dataset
tokenized_datasets = dataset.map(tokenize_function, batched=True)


train_dataset = tokenized_datasets["train"]
eval_dataset = tokenized_datasets["test"]


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from peft import PeftModel, LoraConfig, TaskType

import os
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments, BitsAndBytesConfig
from peft import get_peft_model, LoraConfig, TaskType


os.environ["WANDB_DISABLED"] = "true"
os.environ["NCCL_P2P_DISABLE"] = "1"
os.environ["NCCL_IB_DISABLE"] = "1"

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


model_path = "models/"


tokenizer = AutoTokenizer.from_pretrained(model_path)

# Configure 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,  # Use bfloat16 for optimized speed & memory
    bnb_4bit_use_double_quant=True,  # Enable double quantization for efficiency
)

# Convert labels to tensor format
label_map = {label: i for i, label in enumerate(dataset["train"].features["label"].names)}
num_labels = len(label_map)



model = AutoModelForSequenceClassification.from_pretrained(
    model_path,
    num_labels=num_labels,  # 6 emotion classes
    quantization_config=bnb_config,
    device_map="auto",  # Automatically split the model across available GPUs
    pad_token_id=tokenizer.pad_token_id
)


lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=32,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none"
)
model = PeftModel(model, lora_config)

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set model to evaluation mode
model.eval()

def predict_emotion(texts):
    # Tokenize input text
    inputs = tokenizer(
        texts,
        truncation=True,
        padding="max_length",
        max_length=128,
        return_tensors="pt"
    )


    with torch.no_grad():
        outputs = model(**inputs)
    

    predictions = torch.argmax(outputs.logits, dim=-1)
    
    return predictions.cpu().numpy()


test_sentences = [
    "I feel completely exhausted and unmotivated to work.",
    "Ilove love you and This is the best day of my life!",
    "I'm worried about what might happen next.",
    "I love you"
]

# Get predictions
predicted_labels = predict_emotion(test_sentences)
print("Predicted Labels:", predicted_labels)


Using device: cuda


Loading checkpoint shards: 100%|██████████| 2/2 [00:04<00:00,  2.12s/it]
Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at deepseek-ai/deepseek-llm-7b-base and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
We've detected an older driver with an RTX 4000 series GPU. These drivers have issues with P2P. This can affect the multi-gpu inference when using accelerate device_map.Please make sure to update your driver to the latest version which resolves this.


Predicted Labels: [0 0 0 0]


: 