In [15]:
!pip install transformers datasets accelerate evaluate peft bitsandbytes sentencepiece scipy -q

from huggingface_hub import login
#login(Token)

In [16]:
import os
import sys
import random
import platform
import torch
import numpy as np
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"
SEED = 42

def pick_device():
    if torch.cuda.is_available():
        return torch.device("cuda")
    if torch.backends.mps.is_available():
        return torch.device("mps")
    return torch.device("cpu")

device = pick_device()
print(f"Python: {sys.version}")
print(f"Device: {device}")

# Seeding
def set_seed(seed, device):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if device.type == "cuda":
        torch.cuda.manual_seed_all(seed)

set_seed(SEED, device)
DTYPE = torch.float16 if device.type in ("cuda", "mps") else torch.float32

Python: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
Device: cuda


In [17]:
from datasets import ClassLabel, Features, Value, load_dataset

def load_cardiff_english():
    try:
        return load_dataset(
            "cardiffnlp/tweet_sentiment_multilingual",
            name="english",
            revision="main",
            download_mode="reuse_cache_if_exists",
        )
    except Exception:
        print("[INFO] Fallback to direct JSON URLs.", flush=True)
        base = "https://huggingface.co/datasets/cardiffnlp/tweet_sentiment_multilingual/resolve/main/data/english"
        data_files = {
            "train": f"{base}/train.jsonl",
            "validation": f"{base}/validation.jsonl",
            "test": f"{base}/test.jsonl",
        }
        ds = load_dataset("json", data_files=data_files)
        feats = Features({
            "text": Value("string"),
            "label": ClassLabel(names=["negative","neutral","positive"]),
        })
        return ds.cast(feats)

ds = load_cardiff_english()
label_names = ds["train"].features["label"].names
print("Labels:", label_names)

[INFO] Fallback to direct JSON URLs.
Labels: ['negative', 'neutral', 'positive']


In [18]:
print(f"Loading model: {MODEL_ID}...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=DTYPE,
    device_map="auto" if device.type == "cuda" else None
)

if device.type != "cuda":
    model.to(device)

print("Model loaded.")

Loading model: meta-llama/Llama-3.2-1B-Instruct...
Model loaded.


In [19]:
tokenizer.padding_side = "left"

if tokenizer.pad_token_id is None:
    tokenizer.pad_token_id = tokenizer.eos_token_id

In [20]:
from sklearn.metrics import classification_report
from tqdm import tqdm

def evaluate_model(dataset, prompt_fn, batch_size=8):
    prompts = [prompt_fn(row["text"]) for row in dataset]
    labels = [label_names[row["label"]] for row in dataset]
    preds = []

    model.eval()

    debug_printed = False

    with torch.inference_mode():
        for i in tqdm(range(0, len(prompts), batch_size), desc="Evaluating"):
            batch_prompts = prompts[i:i+batch_size]

            inputs = tokenizer(batch_prompts, return_tensors="pt", padding=True, truncation=True).to(device)
            input_len = inputs.input_ids.shape[1]

            outputs = model.generate(
                **inputs,
                max_new_tokens=10,
                pad_token_id=tokenizer.pad_token_id,
                eos_token_id=tokenizer.eos_token_id,
                do_sample=False
            )

            generated_tokens = outputs[:, input_len:]
            decoded = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

            if not debug_printed:
                print("\n\n[DEBUG] checking model outputs...")
                for j in range(min(3, len(decoded))):
                    print(f"  Prompt end: '...{batch_prompts[j][-30:]}'")
                    print(f"  Raw Model Output: '{decoded[j]}'")
                    print("-" * 30)
                debug_printed = True

            for response in decoded:
                response = response.lower().strip()
                pred = "neutral" # default

                for label in label_names:
                    if label in response:
                        pred = label
                        break
                preds.append(pred)

    print(classification_report(labels, preds, labels=label_names))
    return preds

print("Retesting with Left-Padding and Debug Prints...")
evaluate_model(ds["test"], zero_shot_prompt)

Retesting with Left-Padding and Debug Prints...


Evaluating:   2%|▏         | 2/109 [00:00<00:19,  5.47it/s]



[DEBUG] checking model outputs...
  Prompt end: '...<|end_header_id|>

Sentiment: '
  Raw Model Output: ' Negative'
------------------------------
  Prompt end: '...<|end_header_id|>

Sentiment: '
  Raw Model Output: ' Negative'
------------------------------
  Prompt end: '...<|end_header_id|>

Sentiment: '
  Raw Model Output: ' Positive'
------------------------------


Evaluating: 100%|██████████| 109/109 [00:19<00:00,  5.60it/s]

              precision    recall  f1-score   support

    negative       0.54      0.90      0.68       290
     neutral       0.46      0.25      0.33       290
    positive       0.77      0.61      0.68       290

    accuracy                           0.59       870
   macro avg       0.59      0.59      0.56       870
weighted avg       0.59      0.59      0.56       870






['negative',
 'negative',
 'positive',
 'negative',
 'negative',
 'positive',
 'negative',
 'negative',
 'neutral',
 'negative',
 'negative',
 'positive',
 'negative',
 'positive',
 'positive',
 'negative',
 'neutral',
 'positive',
 'negative',
 'negative',
 'positive',
 'negative',
 'negative',
 'positive',
 'negative',
 'negative',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'negative',
 'negative',
 'negative',
 'negative',
 'neutral',
 'neutral',
 'negative',
 'negative',
 'negative',
 'negative',
 'neutral',
 'positive',
 'negative',
 'negative',
 'positive',
 'negative',
 'neutral',
 'positive',
 'negative',
 'positive',
 'negative',
 'negative',
 'neutral',
 'positive',
 'negative',
 'neutral',
 'positive',
 'negative',
 'positive',
 'negative',
 'negative',
 'negative',
 'positive',
 'negative',
 'positive',
 'neutral',
 'negative',
 'neutral',
 'positive',
 'negative',
 'negative',
 'positive',
 'negative',
 'negative',
 'positive',
 'negative',
 'neutral',
 'positive',

In [21]:
from peft import LoraConfig, get_peft_model, TaskType
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

print("Reloading base model for LoRA experiment...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=DTYPE,
    device_map="auto" if device.type == "cuda" else None
)
if device.type != "cuda":
    model.to(device)

lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=8,              # Rank
    lora_alpha=32,    # Alpha
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"] # Standard for Llama attention
)

model = get_peft_model(model, lora_config)
print("\nLoRA Trainable Parameters:")
model.print_trainable_parameters()

tokenizer.padding_side = "right"

def tokenize_function_train(examples):
    prompts = [
        zero_shot_prompt(text) + label_names[label] + tokenizer.eos_token
        for text, label in zip(examples["text"], examples["label"])
    ]
    return tokenizer(prompts, padding="max_length", truncation=True, max_length=128)

tokenized_datasets_lora = ds.map(tokenize_function_train, batched=True)

lora_training_args = TrainingArguments(
    output_dir="./results_lora",
    per_device_train_batch_size=8,
    num_train_epochs=1,
    learning_rate=2e-4,  # Standard LoRA LR
    logging_steps=10,
    fp16=(device.type == "cuda"),
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=lora_training_args,
    train_dataset=tokenized_datasets_lora["train"],
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

print("Starting LoRA Fine-Tuning...")
trainer.train()

tokenizer.padding_side = "left"
print("\n=== LoRA Evaluation ===")
evaluate_model(ds["test"], zero_shot_prompt)

Reloading base model for LoRA experiment...

LoRA Trainable Parameters:
trainable params: 851,968 || all params: 1,236,666,368 || trainable%: 0.0689


Map:   0%|          | 0/1839 [00:00<?, ? examples/s]

Map:   0%|          | 0/324 [00:00<?, ? examples/s]

Map:   0%|          | 0/870 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.


Starting LoRA Fine-Tuning...


Step,Training Loss
10,3.8118
20,2.9201
30,2.4908
40,2.537
50,2.4799
60,2.4129
70,2.4668
80,2.3339
90,2.3244
100,2.4123



=== LoRA Evaluation ===


Evaluating:   1%|          | 1/109 [00:00<00:41,  2.63it/s]



[DEBUG] checking model outputs...
  Prompt end: '...<|end_header_id|>

Sentiment: '
  Raw Model Output: ' negative  #caveman'
------------------------------
  Prompt end: '...<|end_header_id|>

Sentiment: '
  Raw Model Output: ' neutral  #newmexico #politics'
------------------------------
  Prompt end: '...<|end_header_id|>

Sentiment: '
  Raw Model Output: ' positive  #user  #vicepresident '
------------------------------


Evaluating: 100%|██████████| 109/109 [00:41<00:00,  2.62it/s]

              precision    recall  f1-score   support

    negative       0.74      0.73      0.73       290
     neutral       0.52      0.69      0.59       290
    positive       0.86      0.58      0.69       290

    accuracy                           0.67       870
   macro avg       0.70      0.67      0.67       870
weighted avg       0.70      0.67      0.67       870






['negative',
 'neutral',
 'positive',
 'neutral',
 'negative',
 'positive',
 'negative',
 'negative',
 'neutral',
 'negative',
 'positive',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'negative',
 'neutral',
 'neutral',
 'negative',
 'neutral',
 'positive',
 'negative',
 'negative',
 'positive',
 'negative',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'negative',
 'neutral',
 'negative',
 'negative',
 'neutral',
 'neutral',
 'negative',
 'neutral',
 'negative',
 'negative',
 'neutral',
 'positive',
 'negative',
 'neutral',
 'positive',
 'negative',
 'negative',
 'positive',
 'negative',
 'positive',
 'neutral',
 'negative',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'negative',
 'positive',
 'neutral',
 'negative',
 'neutral',
 'positive',
 'negative',
 'positive',
 'neutral',
 'negative',
 'neutral',
 'neutral',
 'negative',
 'negative',
 'positive',
 'neutral',
 'neutral',
 'neutral',
 'negative',
 'neutral',
 'positive',
 'negative',
 'n

In [22]:
from peft import IA3Config, get_peft_model, TaskType
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

print("Reloading base model for IA3 experiment...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=DTYPE,
    device_map="auto" if device.type == "cuda" else None
)
if device.type != "cuda":
    model.to(device)

ia3_config = IA3Config(
    task_type=TaskType.CAUSAL_LM,
    target_modules=["k_proj", "v_proj", "down_proj"],
    feedforward_modules=["down_proj"]
)

model = get_peft_model(model, ia3_config)
print("\nIA3 Trainable Parameters:")
model.print_trainable_parameters()

tokenizer.padding_side = "right"

def tokenize_function_train(examples):
    prompts = [
        zero_shot_prompt(text) + label_names[label] + tokenizer.eos_token
        for text, label in zip(examples["text"], examples["label"])
    ]
    return tokenizer(prompts, padding="max_length", truncation=True, max_length=128)

print("Tokenizing training data...")
tokenized_datasets_ia3 = ds.map(tokenize_function_train, batched=True)

ia3_training_args = TrainingArguments(
    output_dir="./results_ia3",
    per_device_train_batch_size=8,
    num_train_epochs=1,
    learning_rate=5e-3,
    logging_steps=10,
    fp16=(device.type == "cuda"),
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=ia3_training_args,
    train_dataset=tokenized_datasets_ia3["train"],
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

print("Starting IA3 Fine-Tuning...")
trainer.train()

tokenizer.padding_side = "left"

print("\n=== IA3 Evaluation (Fixed) ===")
evaluate_model(ds["test"], zero_shot_prompt)

Reloading base model for IA3 experiment...

IA3 Trainable Parameters:
trainable params: 147,456 || all params: 1,235,961,856 || trainable%: 0.0119
Tokenizing training data...


Map:   0%|          | 0/1839 [00:00<?, ? examples/s]

Map:   0%|          | 0/324 [00:00<?, ? examples/s]

Map:   0%|          | 0/870 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.


Starting IA3 Fine-Tuning...


Step,Training Loss
10,3.4159
20,2.7073
30,2.4795
40,2.5186
50,2.4633
60,2.3975
70,2.4564
80,2.3198
90,2.3136
100,2.3957



=== IA3 Evaluation (Fixed) ===


Evaluating:   1%|          | 1/109 [00:00<00:12,  8.71it/s]



[DEBUG] checking model outputs...
  Prompt end: '...<|end_header_id|>

Sentiment: '
  Raw Model Output: ' neutral'
------------------------------
  Prompt end: '...<|end_header_id|>

Sentiment: '
  Raw Model Output: ' neutral'
------------------------------
  Prompt end: '...<|end_header_id|>

Sentiment: '
  Raw Model Output: ' positive'
------------------------------


Evaluating: 100%|██████████| 109/109 [00:13<00:00,  8.24it/s]

              precision    recall  f1-score   support

    negative       0.86      0.38      0.53       290
     neutral       0.45      0.90      0.60       290
    positive       0.88      0.49      0.63       290

    accuracy                           0.59       870
   macro avg       0.73      0.59      0.59       870
weighted avg       0.73      0.59      0.59       870






['neutral',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'neutral',
 'negative',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'neutral',
 'negative',
 'neutral',
 'positive',
 'neutral',
 'negative',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'negative',
 'neutral',
 'negative',
 'neutral',
 'neutral',
 'neutral',
 'negative',
 'neutral',
 'neutral',
 'negative',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'negative',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'negative',
 'neutral',
 'positive',
 'negative',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'negative',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'negative',
 'neutral',
 'neutral'