In [1]:
pip install torch torchvision transformers datasets peft




In [2]:
import torch
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AdamW, DataCollatorWithPadding
from datasets import load_dataset
from tqdm import tqdm
from peft import LoraConfig, get_peft_model

# Load the MRPC dataset
dataset = load_dataset('glue', 'mrpc')

# Load the tokenizer for google/gemma-2b
!huggingface-cli login # ask for token for gemma
tokenizer = AutoTokenizer.from_pretrained('google/gemma-2b')

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], padding='max_length', truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Ensure the labels are present and correctly formatted
tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# Data collator that will dynamically pad the inputs
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Create DataLoaders
train_dataloader = DataLoader(tokenized_datasets['train'], batch_size=2, shuffle=True, collate_fn=data_collator)
val_dataloader = DataLoader(tokenized_datasets['validation'], batch_size=2, collate_fn=data_collator)

# Device configuration
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Load a pretrained google/gemma-2b model for sequence classification
model = AutoModelForSequenceClassification.from_pretrained('google/gemma-2b', num_labels=2)

# Define LoRA configuration
best_r = 4  # Example value, should be tuned
lora_config = LoraConfig(
     r=16,
    lora_alpha=32, #should be about r*2.
    lora_dropout=0.05,
    task_type="classification",
    target_modules=['q_proj', 'v_proj'], # other layer names in this model
    modules_to_save=['pooler', 'classifier','score.weight']
)

# Integrate LoRA with the model
model = get_peft_model(model, lora_config)
model.to(device)

def train(model, train_dataloader, optimizer, device, criterion=torch.nn.CrossEntropyLoss()):
    model.train()
    total_loss = 0

    for batch in tqdm(train_dataloader, desc="Training"):
        torch.cuda.empty_cache()
        inputs = {key: val.to(device) for key, val in batch.items() if key != 'labels'}
        labels = batch['labels'].to(device)

        optimizer.zero_grad()

        outputs = model(**inputs, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    average_loss = total_loss / len(train_dataloader)
    return average_loss

# Optimizer
optimizer = AdamW(model.parameters(), lr=2e-5)

def evaluate(model, val_dataloader, device):
    model.eval()
    total_correct = 0
    total_samples = 0

    with torch.no_grad():
        for batch in tqdm(val_dataloader, desc="Evaluating"):
            torch.cuda.empty_cache()
            inputs = {key: val.to(device) for key, val in batch.items() if key != 'labels'}
            labels = batch['labels'].to(device)

            outputs = model(**inputs)
            predictions = torch.argmax(outputs.logits, dim=-1)

            total_correct += (predictions == labels).sum().item()
            total_samples += labels.size(0)

    accuracy = total_correct / total_samples
    return accuracy

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    train_loss = train(model, train_dataloader, optimizer, device)
    print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}")
    val_accuracy = evaluate(model, val_dataloader, device)
    print(f"Validation Accuracy: {val_accuracy:.4f}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/649k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/75.7k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/308k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3668 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/408 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1725 [00:00<?, ? examples/s]


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


tokenizer_config.json:   0%|          | 0.00/33.6k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/1725 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/627 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/13.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of GemmaForSequenceClassification were not initialized from the model checkpoint at google/gemma-2b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Training: 100%|██████████| 1834/1834 [19:04<00:00,  1.60it/s]


Epoch 1/5, Train Loss: 0.6066


Evaluating: 100%|██████████| 204/204 [01:06<00:00,  3.07it/s]


Validation Accuracy: 0.7672


Training: 100%|██████████| 1834/1834 [19:09<00:00,  1.60it/s]


Epoch 2/5, Train Loss: 0.4310


Evaluating: 100%|██████████| 204/204 [01:06<00:00,  3.06it/s]


Validation Accuracy: 0.8456


Training: 100%|██████████| 1834/1834 [19:09<00:00,  1.60it/s]


Epoch 3/5, Train Loss: 0.3157


Evaluating: 100%|██████████| 204/204 [01:06<00:00,  3.05it/s]


Validation Accuracy: 0.8333


Training: 100%|██████████| 1834/1834 [18:58<00:00,  1.61it/s]


Epoch 4/5, Train Loss: 0.2151


Evaluating: 100%|██████████| 204/204 [01:04<00:00,  3.17it/s]


Validation Accuracy: 0.8333


Training: 100%|██████████| 1834/1834 [18:40<00:00,  1.64it/s]


Epoch 5/5, Train Loss: 0.1279


Evaluating: 100%|██████████| 204/204 [01:04<00:00,  3.19it/s]

Validation Accuracy: 0.8431



