In [1]:
import torch
import os
import wandb  # Import the wandb library
from datasets import load_dataset
from transformers import AutoTokenizer, TrainingArguments
from adapters import AutoAdapterModel, AdapterConfig, AdapterTrainer
from adapters.composition import Stack
from sklearn.metrics import accuracy_score, precision_recall_fscore_support



In [None]:
# 1. Setup and Imports
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device running on -----------------------> {device}")

# Initialize Weights & Biases
#wandb.init(project="sentiment-analysis-adapter", entity="adapterrun", settings=wandb.Settings(init_timeout=600, _service_wait=600))
wandb.init(project="sentiment-analysis-adapter", entity="adapterrun",mode="disabled")


# 2. Load Model and Tokenizer
model_name = "bert-base-uncased"
model = AutoAdapterModel.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# 3. Load Dataset
dataset = load_dataset("glue", "sst2")

# 4. Tokenize Dataset
def tokenize_function(examples):
    return tokenizer(examples["sentence"], truncation=True, padding="max_length", max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# 5. Add Adapters
adapter_config = AdapterConfig.load("pfeiffer", reduction_factor=16)
model.add_adapter("adapter1", config=adapter_config)
model.add_adapter("adapter2", config=adapter_config)
model.add_classification_head("sst2", num_labels=2)

# 6. Set Active Adapters and Train Adapter
model.set_active_adapters(Stack("adapter1", "adapter2"))
model.train_adapter(["adapter1", "adapter2"])

# 7. Prepare for Training
training_args = TrainingArguments(
    output_dir="./results",
    learning_rate=1e-4,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=2,
    weight_decay=0.01,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

# Log hyperparameters to W&B
wandb.config.update({
    "learning_rate": training_args.learning_rate,
    "num_train_epochs": training_args.num_train_epochs,
    "train_batch_size": training_args.per_device_train_batch_size,
})

# 8. Define Metrics Function
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    
    # Log metrics to W&B
    wandb.log({
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    })
    
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# 9. Initialize Trainer
trainer = AdapterTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    compute_metrics=compute_metrics,
)

# 10. Train the Model and log loss to W&B
trainer.train()

# 11. Evaluate the Model and log evaluation metrics to W&B
eval_results = trainer.evaluate()
print(f"Evaluation Results: {eval_results}")
wandb.log(eval_results)

# 12. Inference
text = "This movie was fantastic!"
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128).to(device)
with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.softmax(outputs.logits, dim=-1)
    label = torch.argmax(predictions, dim=-1)

print(f"Input: {text}")
print(f"Prediction: {'Positive' if label.item() == 1 else 'Negative'}")
print(f"Confidence: {predictions[0][label.item()].item():.4f}")



Device running on -----------------------> cuda


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

BertAdapterModel has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.
Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2169,0.246904,0.91055,0.913907,0.896104,0.932432
2,0.1726,0.242401,0.911697,0.914729,0.899782,0.93018


Overwriting existing adapter 'adapter1'.
  state_dict = torch.load(weights_file, map_location="cpu")
Overwriting existing adapter 'adapter2'.


Evaluation Results: {'eval_loss': 0.24240106344223022, 'eval_accuracy': 0.911697247706422, 'eval_f1': 0.9147286821705426, 'eval_precision': 0.8997821350762527, 'eval_recall': 0.9301801801801802, 'eval_runtime': 4.3438, 'eval_samples_per_second': 200.747, 'eval_steps_per_second': 6.446, 'epoch': 2.0}
Input: This movie was fantastic!
Prediction: Positive
Confidence: 0.9995


In [13]:
# Save adapters and head for future use
save_directory = "./adapters_save_directory"
os.makedirs(save_directory, exist_ok=True)
model.save_all_adapters(save_directory)
model.save_head(os.path.join(save_directory, "sst2_head"))

# 14. Load the Model and Adapters (for future use)
loaded_model = AutoAdapterModel.from_pretrained(model_name).to(device)

try:
    loaded_model.load_adapter(os.path.join(save_directory, "adapter1"))
    loaded_model.load_adapter(os.path.join(save_directory, "adapter2"))
    
    # Ensure that the head is loaded correctly with a check for its existence.
    head_path = os.path.join(save_directory, "sst2_head")
    
    if os.path.exists(head_path):
        loaded_model.load_head(head_path)
        print("Head loaded successfully.")
        
        loaded_model.set_active_adapters(Stack("adapter1", "adapter2"))
        
    else:
        print(f"Warning: Head not found at {head_path}. Please check if it was saved correctly.")
    
except Exception as e:
    print(f"Error loading model components: {e}")

# Finish the W&B run
wandb.finish()


Some weights of BertAdapterModel were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['heads.default.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Error loading model components: 'NoneType' object has no attribute 'get'
