In [6]:
import torch
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, Trainer, TrainingArguments
from datasets import load_dataset
import evaluate
from peft import BOFTConfig, get_peft_model

In [7]:
torch.cuda.is_available()
torch.cuda.current_device()

AssertionError: Torch not compiled with CUDA enabled

In [8]:
# Load the model and tokenizer
model_name = "distilbert-base-uncased"
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
model1 = DistilBertForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Check if CUDA is available and set the device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model to the GPU (if available)
model1 = model1.to(device)

# Define BOFT configuration
boft_config = BOFTConfig(
    boft_block_size=4,  # Define block size
    boft_n_butterfly_factor=2,  # Butterfly factor for BOFT
    target_modules=["q_lin", "k_lin", "v_lin", "out_lin"],  # Target modules for BOFT
    boft_dropout=0.1,  # Dropout for BOFT
    bias="boft_only",  # Only train bias parameters in BOFT
    modules_to_save=["classifier"],  # Save the classification head
)

# Apply the PEFT configuration to the model
model = get_peft_model(model1, boft_config)

# Load the dataset
dataset = load_dataset("glue", "mrpc")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, padding=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    per_device_train_batch_size=16,  # batch size (16 if more memory, 8 otherwise)
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    fp16=False,
    report_to="none",  # Disable reporting (optional)
)

# Define a compute metrics function for evaluation
metric = evaluate.load("glue", "mrpc")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(logits, dim=-1)
    return metric.compute(predictions=predictions, references=labels)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# Fine-tune the model
trainer.train()

# Save the model, tokenizer, and training arguments after training
trainer.save_model("./results/model_checkpoint")
tokenizer.save_pretrained("./results/model_checkpoint")




Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using /Users/valentinpiquerez/Library/Caches/torch_extensions/py311_cpu as PyTorch extensions root...
Using /Users/valentinpiquerez/Library/Caches/torch_extensions/py311_cpu as PyTorch extensions root...
No modifications detected for re-loaded extension module fbd_cuda, skipping build step...
Loading extension module fbd_cuda...
Using /Users/valentinpiquerez/Library/Caches/torch_extensions/py311_cpu as PyTorch extensions root...
No modifications detected for re-loaded extension module fbd_cuda, skipping build step...
Loading extension module fbd_cuda...
Using /Users/valentinpiquerez/Library/Caches/torch_extensions/py311_cpu as PyTorch exte

{'loss': 0.6668, 'grad_norm': 0.9638813138008118, 'learning_rate': 4.9275362318840584e-05, 'epoch': 0.04}


  3%|▎         | 18/690 [01:55<1:13:28,  6.56s/it]

KeyboardInterrupt: 

In [4]:
# Load the tokenizer and model from the saved checkpoint
trainer = "./results/model_checkpoint"

# Load the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained(trainer)

# Load the trained model
model = DistilBertForSequenceClassification.from_pretrained(trainer)

# Move the model to the appropriate device (GPU/CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Using C:\Users\Vali\AppData\Local\torch_extensions\torch_extensions\Cache\py312_cu124 as PyTorch extensions root...
Using C:\Users\Vali\AppData\Local\torch_extensions\torch_extensions\Cache\py312_cu124 as PyTorch extensions root...
No modifications detected for re-loaded extension module fbd_cuda, skipping build step...
Loading extension module fbd_cuda...
Using C:\Users\Vali\AppData\Local\torch_extensions\torch_extensions\Cache\py312_cu124 as PyTorch extensions root...
No modifications detected for re-loaded extension module fbd_cuda, skipping build step...
Loading extension module fbd_cuda...
Using C:\Users\Vali\AppData\Local\torch_exten

In [1]:
import numpy as np

# Evaluate the model and retrieve logits and labels from eval_results
eval_results = trainer.predict(tokenized_datasets["validation"])

# Extract the logits (model's raw predictions) and labels
logits = eval_results.predictions
labels = eval_results.label_ids

# Convert logits (if they are NumPy arrays) to a consolidated NumPy array and then to a PyTorch tensor
logits = torch.tensor(np.array(logits))

# Compute the evaluation metrics
metrics = compute_metrics((logits, labels))
print(f"Evaluation Metrics: {metrics}")


NameError: name 'trainer' is not defined