In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer
from transformers import MobileBertModel


In [2]:
from datasets import load_dataset

dataset = load_dataset("tweet_eval", "stance_abortion")

Found cached dataset tweet_eval (C:/Users/User/.cache/huggingface/datasets/tweet_eval/stance_abortion/1.1.0/12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343)


  0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
dataset["train"]

Dataset({
    features: ['text', 'label'],
    num_rows: 587
})

In [4]:
dataset["validation"]

Dataset({
    features: ['text', 'label'],
    num_rows: 66
})

In [5]:
dataset["validation"]

Dataset({
    features: ['text', 'label'],
    num_rows: 66
})

In [6]:
# Load a pre-trained tokenizer
tokenizer = AutoTokenizer.from_pretrained("google/mobilebert-uncased")



In [7]:
def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=128)

In [8]:
# Tokenize the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Get the input_ids from the tokenized dataset for train, validation, and test sets
train_input_ids = [example['input_ids'] for example in tokenized_dataset['train']]
val_input_ids = [example['input_ids'] for example in tokenized_dataset['validation']]
test_input_ids = [example['input_ids'] for example in tokenized_dataset['test']]

# Convert input_ids to PyTorch tensors
train_input_ids = torch.tensor(train_input_ids, dtype=torch.long)
val_input_ids = torch.tensor(val_input_ids, dtype=torch.long)
test_input_ids = torch.tensor(test_input_ids, dtype=torch.long)

# Concatenate the input_ids tensors along the first dimension
input_ids = torch.cat((train_input_ids, val_input_ids, test_input_ids), dim=0)


Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_abortion\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-2499fd4575881af1.arrow
Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_abortion\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-6e1bcf84be3be599.arrow


Map:   0%|          | 0/66 [00:00<?, ? examples/s]

In [9]:
# Tokenize the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Get the attention_masks from the tokenized dataset for train, validation, and test sets
train_attention_mask = [example['attention_mask'] for example in tokenized_dataset['train']]
val_attention_mask = [example['attention_mask'] for example in tokenized_dataset['validation']]
test_attention_mask = [example['attention_mask'] for example in tokenized_dataset['test']]

# Convert attention_mask to PyTorch tensors
train_attention_mask = torch.tensor(train_attention_mask, dtype=torch.long)
val_attention_mask = torch.tensor(val_attention_mask, dtype=torch.long)
test_attention_mask = torch.tensor(test_attention_mask, dtype=torch.long)

# Concatenate the input_ids tensors along the first dimension
attention_masks = torch.cat((train_attention_mask, val_attention_mask, test_attention_mask), dim=0)


Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_abortion\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-d171cf49e0cf1c7c.arrow
Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_abortion\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-4e36ed9751115a98.arrow
Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_abortion\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-8989cb1beecbb0d4.arrow


In [10]:
print(input_ids)

tensor([[  101,  2057, 10825,  ...,     0,     0,     0],
        [  101,  1030,  5310,  ...,     0,     0,     0],
        [  101,  2166,  2003,  ...,     0,     0,     0],
        ...,
        [  101,  2166,  2003,  ...,     0,     0,     0],
        [  101,  1030,  5310,  ...,     0,     0,     0],
        [  101,  2632,  9923,  ...,     0,     0,     0]])


In [11]:
print(attention_masks)

tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])


In [12]:
# Split the dataset into train, validation, and test sets
train_dataset = tokenized_dataset["train"]
val_dataset = tokenized_dataset["validation"]
test_dataset = tokenized_dataset["test"]

In [13]:
# Convert the datasets to PyTorch tensors
train_dataset = train_dataset.remove_columns(["text"])
val_dataset = val_dataset.remove_columns(["text"])
test_dataset = test_dataset.remove_columns(["text"])

In [14]:
train_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
val_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

In [15]:


class StanceAbortionModel(nn.Module):
    def __init__(self, num_classes=3):
        super(StanceAbortionModel, self).__init__()
        self.mobilebert = MobileBertModel.from_pretrained('google/mobilebert-uncased')
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(self.mobilebert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.mobilebert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        pooled_output = self.dropout(pooled_output)
        logits = self.fc(pooled_output)
        return logits


In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [17]:
# instantiate your model
stance_abortion_model = StanceAbortionModel(num_classes=3).to(device)

Some weights of the model checkpoint at google/mobilebert-uncased were not used when initializing MobileBertModel: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.dense.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing MobileBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MobileBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [22]:
# define your loss function
criterion = nn.CrossEntropyLoss()

# define your optimizer
optimizer = torch.optim.Adam(stance_abortion_model.parameters(), lr=0.001)

In [23]:
# set up your training, validation, and test dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)

In [24]:
for batch in train_loader:
    print(batch)


{'label': tensor([1, 1, 2, 1, 1, 1, 1, 2, 0, 2, 0, 0, 2, 2, 1, 2]), 'input_ids': tensor([[  101,  1037, 10032,  ...,     0,     0,     0],
        [  101,  2017,  2097,  ...,     0,     0,     0],
        [  101, 19387,  1030,  ...,     0,     0,     0],
        ...,
        [  101,  8840,  2140,  ...,     0,     0,     0],
        [  101,  1999,  2277,  ...,     0,     0,     0],
        [  101,  2204,  2851,  ...,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}
{'label': tensor([2, 0, 0, 1, 2, 1, 1, 0, 0, 1, 1, 1, 2, 2, 1, 2]), 'input_ids': tensor([[ 101, 1045, 1005,  ...,    0,    0,    0],
        [ 101, 1030, 5310,  ...,    0,    0,    0],
        [ 101, 1030, 5310,  ...,    0,    0,    0],
        ...,
        [ 101, 1012, 1030,  ...,    0,    0,    0],
        [ 101, 10

In [25]:
from tqdm import tqdm
num_epochs = 1
for epoch in range(num_epochs):
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    for batch in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
        # get the inputs
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = stance_abortion_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        # print running loss for each batch
        running_loss += loss.item()
        avg_loss = running_loss / len(train_loader)
        avg_acc = correct_predictions / total_predictions
        tqdm.write(f'Train Loss: {avg_loss:.3f}, Train Acc: {avg_acc:.3f}', end='\r')
    tqdm.write(f'Epoch {epoch+1}, Train Loss: {avg_loss:.3f}, Train Acc: {avg_acc:.3f}')

    print(f"Epoch {epoch+1} finished")


Epoch 1/1:   3%|▎         | 1/37 [00:18<11:12, 18.67s/it]

Train Loss: nan, Train Acc: 0.500

Epoch 1/1:   5%|▌         | 2/37 [00:34<09:47, 16.80s/it]

Train Loss: nan, Train Acc: 0.438

Epoch 1/1:   8%|▊         | 3/37 [00:45<08:08, 14.37s/it]

Train Loss: nan, Train Acc: 0.312

Epoch 1/1:  11%|█         | 4/37 [00:57<07:19, 13.30s/it]

Train Loss: nan, Train Acc: 0.297

Epoch 1/1:  14%|█▎        | 5/37 [01:09<06:49, 12.78s/it]

Train Loss: nan, Train Acc: 0.300

Epoch 1/1:  16%|█▌        | 6/37 [01:20<06:23, 12.38s/it]

Train Loss: nan, Train Acc: 0.281

Epoch 1/1:  19%|█▉        | 7/37 [01:44<08:05, 16.17s/it]

Train Loss: nan, Train Acc: 0.277

Epoch 1/1:  22%|██▏       | 8/37 [01:58<07:25, 15.36s/it]

Train Loss: nan, Train Acc: 0.281

Epoch 1/1:  24%|██▍       | 9/37 [02:09<06:35, 14.12s/it]

Train Loss: nan, Train Acc: 0.285

Epoch 1/1:  27%|██▋       | 10/37 [02:21<06:05, 13.54s/it]

Train Loss: nan, Train Acc: 0.269

Epoch 1/1:  30%|██▉       | 11/37 [02:34<05:42, 13.16s/it]

Train Loss: nan, Train Acc: 0.273

Epoch 1/1:  32%|███▏      | 12/37 [02:45<05:17, 12.71s/it]

Train Loss: nan, Train Acc: 0.276

Epoch 1/1:  35%|███▌      | 13/37 [02:57<04:56, 12.36s/it]

Train Loss: nan, Train Acc: 0.274

Epoch 1/1:  38%|███▊      | 14/37 [03:09<04:38, 12.10s/it]

Train Loss: nan, Train Acc: 0.277

Epoch 1/1:  41%|████      | 15/37 [03:19<04:14, 11.55s/it]

Train Loss: nan, Train Acc: 0.275

Epoch 1/1:  43%|████▎     | 16/37 [03:29<03:55, 11.19s/it]

Train Loss: nan, Train Acc: 0.285

Epoch 1/1:  46%|████▌     | 17/37 [03:39<03:36, 10.84s/it]

Train Loss: nan, Train Acc: 0.279

Epoch 1/1:  49%|████▊     | 18/37 [03:50<03:24, 10.78s/it]

Train Loss: nan, Train Acc: 0.278

Epoch 1/1:  51%|█████▏    | 19/37 [04:00<03:09, 10.54s/it]

Train Loss: nan, Train Acc: 0.283

Epoch 1/1:  54%|█████▍    | 20/37 [04:09<02:54, 10.27s/it]

Train Loss: nan, Train Acc: 0.278

Epoch 1/1:  57%|█████▋    | 21/37 [04:20<02:44, 10.26s/it]

Train Loss: nan, Train Acc: 0.271

Epoch 1/1:  59%|█████▉    | 22/37 [04:30<02:35, 10.37s/it]

Train Loss: nan, Train Acc: 0.270

Epoch 1/1:  62%|██████▏   | 23/37 [04:42<02:32, 10.89s/it]

Train Loss: nan, Train Acc: 0.272

Epoch 1/1:  65%|██████▍   | 24/37 [04:54<02:25, 11.20s/it]

Train Loss: nan, Train Acc: 0.273

Epoch 1/1:  68%|██████▊   | 25/37 [05:08<02:22, 11.89s/it]

Train Loss: nan, Train Acc: 0.275

Epoch 1/1:  70%|███████   | 26/37 [05:20<02:11, 11.95s/it]

Train Loss: nan, Train Acc: 0.272

Epoch 1/1:  73%|███████▎  | 27/37 [05:30<01:53, 11.34s/it]

Train Loss: nan, Train Acc: 0.264

Epoch 1/1:  76%|███████▌  | 28/37 [05:40<01:40, 11.12s/it]

Train Loss: nan, Train Acc: 0.263

Epoch 1/1:  78%|███████▊  | 29/37 [05:51<01:28, 11.09s/it]

Train Loss: nan, Train Acc: 0.261

Epoch 1/1:  81%|████████  | 30/37 [06:01<01:15, 10.77s/it]

Train Loss: nan, Train Acc: 0.269

Epoch 1/1:  84%|████████▍ | 31/37 [06:11<01:02, 10.42s/it]

Train Loss: nan, Train Acc: 0.268

Epoch 1/1:  86%|████████▋ | 32/37 [06:21<00:51, 10.25s/it]

Train Loss: nan, Train Acc: 0.268

Epoch 1/1:  89%|████████▉ | 33/37 [06:31<00:40, 10.17s/it]

Train Loss: nan, Train Acc: 0.273

Epoch 1/1:  92%|█████████▏| 34/37 [06:41<00:30, 10.10s/it]

Train Loss: nan, Train Acc: 0.274

Epoch 1/1:  95%|█████████▍| 35/37 [06:51<00:20, 10.08s/it]

Train Loss: nan, Train Acc: 0.280

Epoch 1/1:  97%|█████████▋| 36/37 [07:02<00:10, 10.47s/it]

Train Loss: nan, Train Acc: 0.273

Epoch 1/1: 100%|██████████| 37/37 [07:10<00:00, 11.63s/it]

Epoch 1, Train Loss: nan, Train Acc: 0.271
Epoch 1 finished





In [26]:
# Validation loop
with torch.no_grad():
    stance_abortion_model.eval()  # Set the model to evaluation mode
    valid_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    for batch in val_loader:
        # get the inputs
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # forward
        outputs = stance_abortion_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)

        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        # calculate running loss
        valid_loss += loss.item()

    avg_loss = valid_loss / len(val_loader)
    avg_acc = correct_predictions / total_predictions
    print(f'Validation Loss: {avg_loss:.3f}, Validation Acc: {avg_acc:.3f}')

Validation Loss: nan, Validation Acc: 0.273


In [27]:

# Test loop
with torch.no_grad():
    stance_abortion_model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    for batch in test_loader:
        # get the inputs
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # forward
        outputs = stance_abortion_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)

        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        # calculate running loss
        test_loss += loss.item()

    avg_loss = test_loss / len(test_loader)
    avg_acc = correct_predictions / total_predictions
    print(f'Test Loss: {avg_loss:.3f}, Test Acc: {avg_acc:.3f}')


Test Loss: nan, Test Acc: 0.161


In [28]:
# save the model
torch.save(stance_abortion_model.state_dict(), 'Models/stance_abortion_model.pth')
