In [1]:
!pip install transformers datasets accelerate bitsandbytes
!pip install peft
!pip install -U "huggingface_hub[cli]"
!huggingface-cli login --token 


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
The token `my token` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `my token`


In [2]:
import os
import torch
import time
import numpy as np
from datasets import load_dataset
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from sklearn.metrics import accuracy_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [3]:
dataset = load_dataset("stanfordnlp/snli")

train_indices = list(range(0, 550000, 550))[:1000]
val_indices = list(range(0, 10000, 100))[:100]
test_indices = list(range(0, 10000, 100))[:100]

train_dataset = dataset['train'].select(train_indices)
val_dataset = dataset['validation'].select(val_indices)
test_dataset = dataset['test'].select(test_indices)

train_dataset = train_dataset.filter(lambda x: x['label'] != -1)
val_dataset = val_dataset.filter(lambda x: x['label'] != -1)
test_dataset = test_dataset.filter(lambda x: x['label'] != -1)

print(f"Train Dataset: {len(train_dataset)} samples")
print(f"Validation Dataset: {len(val_dataset)} samples")
print(f"Test Dataset: {len(test_dataset)} samples")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Train Dataset: 1000 samples
Validation Dataset: 99 samples
Test Dataset: 100 samples


In [4]:

MODEL_NAME = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

def tokenize_function(examples):
    custom_prompts = [
        f"Premise: '{premise}' | Hypothesis: '{hypothesis}' | Choose: entailment (0), neutral (1), or contradiction (2)."
        for premise, hypothesis in zip(examples['premise'], examples['hypothesis'])
    ]
    return tokenizer(
        custom_prompts,
        padding='max_length',
        truncation=True,
        max_length=128,
        return_tensors="pt"
    )

def prepare_dataset(dataset):
    return dataset.map(tokenize_function, batched=True, remove_columns=['premise', 'hypothesis'])

train_tokenized = prepare_dataset(train_dataset)
val_tokenized = prepare_dataset(val_dataset)
test_tokenized = prepare_dataset(test_dataset)


Map:   0%|          | 0/99 [00:00<?, ? examples/s]

In [5]:

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)


model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=3,
    quantization_config=bnb_config,
    trust_remote_code=True
)


model = prepare_model_for_kbit_training(model)


lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none"
)


model = get_peft_model(model, lora_config)


model.config.pad_token_id = tokenizer.pad_token_id
model.print_trainable_parameters()

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of PhiForSequenceClassification were not initialized from the model checkpoint at microsoft/phi-2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
def collate_fn(batch):
    input_ids = torch.stack([item['input_ids'] for item in batch])
    attention_mask = torch.stack([item['attention_mask'] for item in batch])
    labels = torch.tensor([item['labels'] for item in batch])

    return {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'labels': labels
    }

def evaluate_model(model, test_dataloader):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in test_dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            predictions = torch.argmax(logits, dim=-1)

            all_preds.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    return accuracy

test_dataloader = torch.utils.data.DataLoader(
    test_tokenized,
    batch_size=8,
    shuffle=False,
    collate_fn=collate_fn
)

try:
    print("Starting evaluation...")
    pretrained_accuracy = evaluate_model(model, test_dataloader)
    print(f"Accuracy of the pretrained model on the test set: {pretrained_accuracy:.4f}")
except Exception as e:
    print(f"Error during evaluation: {str(e)}")

    print("\nDebug information:")
    sample_batch = next(iter(test_dataloader))
    print("Sample batch keys:", sample_batch.keys())
    print("Input IDs shape:", sample_batch["input_ids"].shape)
    print("Attention mask shape:", sample_batch["attention_mask"].shape)
    print("Labels shape:", sample_batch["labels"].shape)


Starting evaluation...
Error during evaluation: expected Tensor as element 0 in argument 0, but got list

Debug information:


TypeError: expected Tensor as element 0 in argument 0, but got list

In [7]:

training_args = TrainingArguments(
    output_dir="./phi2_nli_lora",
    num_train_epochs=5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_checkpointing=True,
    max_grad_norm=0.3,
    warmup_steps=50,
    weight_decay=0.001,
    learning_rate=2e-4,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
    logging_dir="./logs",
    save_total_limit=5,
    report_to=[],
)


os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["WANDB_DISABLED"] = "true"




In [8]:

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {'accuracy': accuracy_score(labels, predictions)}


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized,
    compute_metrics=compute_metrics
)


In [9]:

start_time = time.time()
print("Starting fine-tuning...")

trainer.train()

end_time = time.time()
print(f"Time taken for fine-tuning: {end_time - start_time:.2f} seconds")


trainer.model.save_pretrained("./final_phi2_model")


Starting fine-tuning...


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0128,0.93474,0.464646
2,0.3854,0.435418,0.848485
3,0.2847,0.551514,0.848485
4,0.5534,0.643052,0.868687
5,0.1688,0.664466,0.868687


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


Time taken for fine-tuning: 3191.58 seconds


In [10]:
print("\nResource Usage:")
print(f"Training time: {end_time - start_time:.2f} seconds")
print(f"GPU memory used: {torch.cuda.max_memory_allocated() / 1e9:.2f} GB")


Resource Usage:
Training time: 3191.58 seconds
GPU memory used: 3.65 GB


In [11]:
test_dataloader = torch.utils.data.DataLoader(
    test_tokenized,
    batch_size=8,
    shuffle=False,
    collate_fn=collate_fn
)

try:
    print("Starting evaluation on the fine-tuned model...")
    fine_tuned_accuracy = evaluate_model(model, test_dataloader)
    print(f"Accuracy of the fine-tuned model on the test set: {fine_tuned_accuracy:.4f}")
except Exception as e:
    print(f"Error during evaluation: {str(e)}")

    print("\nDebug information:")
    sample_batch = next(iter(test_dataloader))
    print("Sample batch keys:", sample_batch.keys())
    print("Input IDs shape:", sample_batch["input_ids"].shape)
    print("Attention mask shape:", sample_batch["attention_mask"].shape)
    print("Labels shape:", sample_batch["labels"].shape)


Starting evaluation on the fine-tuned model...
Error during evaluation: 'labels'

Debug information:


KeyError: 'labels'

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

model_dir = "/kaggle/working/final_phi2_model"
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
model = AutoModelForSequenceClassification.from_pretrained(model_dir, trust_remote_code=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


model.eval()


def predict(model, premise, hypothesis):
    custom_prompt = f"Premise: '{premise}' | Hypothesis: '{hypothesis}' | Choose: entailment (0), neutral (1), or contradiction (2)."

    inputs = tokenizer(
        custom_prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=128
    ).to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        prediction = torch.argmax(logits, dim=-1).item()

    return prediction


premise = "A man inspects the uniform of a figure in some East Asian country."
hypothesis = "The man is sleeping."
predicted_label = predict(model, premise, hypothesis)

label_map = {0: "entailment", 1: "neutral", 2: "contradiction"}
print(f"Prediction: {label_map[predicted_label]}")
