In [1]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [6]:
from datasets import load_dataset

# Load the CoLA dataset
cola_dataset = load_dataset("glue", "cola")

# Accessing the train, validation, and test sets
train_data = cola_dataset['train']
test_data = cola_dataset['validation']
# test_data = cola_dataset['test']

# Split the train data into new train and development sets
train_dev_split = cola_dataset['train'].train_test_split(test_size=0.1, seed=42)

# New training and development sets
train_data = train_dev_split['train']
dev_data = train_dev_split['test']

In [7]:
from transformers import AutoTokenizer, AutoModel
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
from sklearn.metrics import accuracy_score
from tqdm import tqdm

# Define the dataset class
class CoLADataset(Dataset):
    def __init__(self, data, tokenizer, max_length=128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sentence = self.data[idx]["sentence"]
        label = self.data[idx]["label"]
        encoding = self.tokenizer(
            sentence,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "label": torch.tensor(label, dtype=torch.long),
        }

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# Prepare the dataset for batch processing
train_dataset = CoLADataset(train_data, tokenizer)
dev_dataset = CoLADataset(dev_data, tokenizer)
test_dataset = CoLADataset(test_data, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Define the BERT-based classification model
class BertClassifier(nn.Module):
    def __init__(self, num_classes=2):
        super(BertClassifier, self).__init__()
        self.bert = AutoModel.from_pretrained("bert-base-uncased")
        self.fc = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0]
        logits = self.fc(cls_output)
        return logits

# Initialize model
model = BertClassifier(num_classes=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training configuration
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()

# Training function
def train(model, data_loader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch in tqdm(data_loader, desc="Training", total=len(data_loader)):
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(data_loader)
    return avg_loss

# Evaluation function
def evaluate(model, data_loader, device, desc):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for batch in tqdm(data_loader, desc=desc, total=len(data_loader)):
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    return accuracy

# Run training and evaluation
epochs = 3
for epoch in range(epochs):
    train_loss = train(model, train_loader, optimizer, criterion, device)
    print(f"{epoch + 1} epoch Training Loss: {train_loss:.4f}")

    dev_accuracy = evaluate(model, dev_loader, device, desc="Evaluating")
    print(f"{epoch + 1} epoch, Dev Accuracy: {dev_accuracy * 100:.2f}%")

# Evaluate on the test dataset
print("\n****************************************")
test_accuracy = evaluate(model, test_loader, device, desc="Test")
print(f"\nTest Accuracy: {test_accuracy * 100:.2f}%")

Training: 100%|██████████| 481/481 [02:46<00:00,  2.88it/s]


1 epoch Training Loss: 0.4790


Evaluating: 100%|██████████| 54/54 [00:06<00:00,  8.81it/s]


1 epoch, Dev Accuracy: 82.94%


Training: 100%|██████████| 481/481 [02:45<00:00,  2.90it/s]


2 epoch Training Loss: 0.2679


Evaluating: 100%|██████████| 54/54 [00:06<00:00,  8.81it/s]


2 epoch, Dev Accuracy: 85.28%


Training: 100%|██████████| 481/481 [02:45<00:00,  2.91it/s]


3 epoch Training Loss: 0.1550


Evaluating: 100%|██████████| 54/54 [00:06<00:00,  8.76it/s]


3 epoch, Dev Accuracy: 84.46%

****************************************


Test: 100%|██████████| 66/66 [00:07<00:00,  8.75it/s]


Test Accuracy: 80.63%



