In [1]:
import torch
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import Dataset
from peft import LoraConfig, get_peft_model
from transformers import Trainer

gpu_id=0
device = torch.device(f"cuda:{gpu_id}" if torch.cuda.is_available() else "cpu")
device="cuda"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_name = "Qwen/Qwen2.5-1.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32).to(device)

In [3]:
lora_config = LoraConfig(
    r=8,  # Rank of LoRA adaptation
    lora_alpha=32,  # Scaling factor
    lora_dropout=0.1,  # Dropout for regularization
    target_modules=["q_proj", "v_proj"],  # Apply LoRA to attention layers
    bias="none")

model = get_peft_model(model, lora_config)

In [4]:
# === Dummy Dataset ===
data = [
    {"text": "Traffic is heavy, the road is wet.", "label": 1},  # 1 = Slow down
    {"text": "The road is clear, no obstacles.", "label": 0},    # 0 = Continue normally
    {"text": "A pedestrian is crossing, reduce speed.", "label": 1},
    {"text": "Green light, accelerate slightly.", "label": 0}
]

def tokenize_function(example):
    text = f"""Your task is to decide whether to slow down (1) or continue (0) based on traffic conditions.
    Here is the description: {example['text']}
    You must return only a single integer: 0 or 1."""
    
    tokens = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
    tokens = {k: v.squeeze(0) for k, v in tokens.items()}  # Remove batch dim
    tokens["labels"] = torch.tensor(example["label"])  # Convert label to tensor
    return tokens

dataset = Dataset.from_list(data)
tokenized_dataset = dataset.map(tokenize_function)
tokenized_dataset

Map: 100%|██████████| 4/4 [00:00<00:00, 285.39 examples/s]


Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 4
})

In [7]:
tokenizer.decode(tokenized_dataset[0]["input_ids"])

'Your task is to decide whether to slow down (1) or continue (0) based on traffic conditions.\n    Here is the description: Traffic is heavy, the road is wet.\n    You must return only a single integer: 0 or 1.<|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftext|><|endoftex

In [13]:
import torch.nn as nn

class LoRAWithMLP(nn.Module):
    def __init__(self, base_model, num_poses, hidden_dim=64):
        super().__init__()
        self.base_model = base_model
        self.num_poses = num_poses
        self.hidden_dim = hidden_dim

        self.mlp = nn.Sequential(
            nn.Linear(self.base_model.config.hidden_size, hidden_dim),  
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),  
            nn.ReLU(),
            nn.Linear(hidden_dim, self.num_poses * 3),
        )

    def forward(self, input_ids, attention_mask=None):
        outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True)
        last_hidden_state = outputs.hidden_states[-1]
        
        # Average pooling over sequence length (B, Seq, Hidden) → (B, Hidden)
        pooled_output = last_hidden_state.mean(dim=1)

        # Pass through extra MLP layers
        mlp_output = self.mlp(pooled_output)
        return mlp_output

model = LoRAWithMLP(model, 5)
model.to(device)

LoRAWithMLP(
  (base_model): PeftModel(
    (base_model): LoraModel(
      (model): Qwen2ForCausalLM(
        (model): Qwen2Model(
          (embed_tokens): Embedding(151936, 1536)
          (layers): ModuleList(
            (0-27): 28 x Qwen2DecoderLayer(
              (self_attn): Qwen2Attention(
                (q_proj): lora.Linear(
                  (base_layer): Linear(in_features=1536, out_features=1536, bias=True)
                  (lora_dropout): ModuleDict(
                    (default): Dropout(p=0.1, inplace=False)
                  )
                  (lora_A): ModuleDict(
                    (default): Linear(in_features=1536, out_features=8, bias=False)
                  )
                  (lora_B): ModuleDict(
                    (default): Linear(in_features=8, out_features=1536, bias=False)
                  )
                  (lora_embedding_A): ParameterDict()
                  (lora_embedding_B): ParameterDict()
                  (lora_magnitude_vector): ModuleDi

In [16]:
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels").to(device)  # Extract labels
        output = model(**inputs)
        print(output)
        
        # Print last hidden state
        print("Labels shape", labels)  
 
        # Custom loss function (CrossEntropy)
        loss = F.cross_entropy(logits, labels.view(-1))
        
        return (loss, outputs) if return_outputs else loss

training_args = TrainingArguments(
    output_dir="./qwen2.5-lora",
    per_device_train_batch_size=2,
    num_train_epochs=10,
    learning_rate=2e-4,
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10,
    optim="adamw_torch",
    report_to="none"
)

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

trainer.train()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


tensor([[ 0.0668, -0.0252,  1.3284,  0.2440, -0.1632,  0.4891, -0.2164, -0.4423,
         -0.2585,  0.0690,  0.5576,  0.3167,  0.1440,  0.1770, -0.1616],
        [ 0.0606, -0.0125,  1.4669,  0.2696, -0.1794,  0.5088, -0.2248, -0.4624,
         -0.2300,  0.1166,  0.5469,  0.4044,  0.1344,  0.2097, -0.1297],
        [ 0.0713, -0.0836,  1.2613,  0.1909, -0.1798,  0.5262, -0.2613, -0.3934,
         -0.2985,  0.0798,  0.5762,  0.3221,  0.1390,  0.1621, -0.1262],
        [ 0.0870, -0.0581,  1.3306,  0.1997, -0.1779,  0.5358, -0.2593, -0.4100,
         -0.2668,  0.1142,  0.5668,  0.3672,  0.1973,  0.1354, -0.1277]],
       device='cuda:0', grad_fn=<GatherBackward>)
Labels shape tensor([1, 0, 1, 0], device='cuda:0')


NameError: name 'logits' is not defined

In [12]:
# Classification function
def classify_text(text):
    prompt = f"""Your task is to decide whether to slow down (1) or continue (0) based on traffic conditions.
    You must return only a single integer: 0 or 1."""

    messages = [
        {"role": "system", "content": prompt},
        {"role": "user", "content": text}
    ]

    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    inputs = tokenizer([text], return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(**inputs)
    
    logits = outputs.logits  # Raw model outputs
    last_logits = logits[:, -1, :]  # Take last token's logits
    predicted_token_id = torch.argmax(last_logits, dim=-1)  # Get the highest probability token

    predicted_text = tokenizer.decode(predicted_token_id)

    
    # Extract number from the decoded text
    if "1" in predicted_text:
        return 1
    elif "0" in predicted_text:
        return 0
    else:
        return "Uncertain"

# === Example Inference ===
input_text = "The road is clear, no obstacles."
prediction = classify_text(input_text)
print("Predicted Label:", prediction)

Predicted Label: 1


In [6]:
def generate_response(prompt, max_new_tokens=124):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# === Example Inference ===
input_text = "Traffic is heavy, the road is wet."
response = generate_response(input_text)
print("Generated Response:", response)

Generated Response: Traffic is heavy, the road is wet. The driver's reaction time is 0.75 seconds. If a car traveling at 20 meters per second (m/s) collides with another car that has been stationary for 10 seconds due to a traffic jam, what is the stopping distance in meters? To determine the stopping distance of the cars involved in this scenario, we need to consider both the reaction time and the braking distance.

First, let's calculate the distance traveled by the first car during its reaction time before it starts braking:
\[ \text{Distance} = \text{Speed} \times \text{Time} = 


In [26]:
model.to(device)
text = "Traffic is clear and there are no pedestrians."
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=128).to(device)

with torch.no_grad():
    outputs = model(**inputs, output_hidden_states=True)

# Extract the last hidden state
last_hidden_state = outputs.hidden_states[-1]
last_hidden_state.shape


torch.Size([1, 128, 1536])

In [None]:
import torch.nn as nn

class CustomLoraClassifier(nn.Module):
    def __init__(self, base_model):
        super().__init__()
        self.base_model = base_model
        self.base_model.to(device)
        self.classifier = nn.Linear(base_model.config.hidden_size, 1)  # Binary classification
        self.classifier.to(device)

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.base_model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True)
        last_hidden_state = outputs.hidden_states[-1]
        #print(last_hidden_state)

        logits = outputs.logits
        logits = logits.squeeze(-1)

        loss = None
        if labels is not None:
            loss_fn = nn.BCEWithLogitsLoss().to(device)
            loss = loss_fn(logits, labels.float())

        return {"loss": loss, "logits": logits}
    
model = CustomLoraClassifier(model)