In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer
from transformers import AlbertModel

In [2]:
from datasets import load_dataset

dataset = load_dataset("tweet_eval", "stance_feminist")

Downloading and preparing dataset tweet_eval/stance_feminist to C:/Users/User/.cache/huggingface/datasets/tweet_eval/stance_feminist/1.1.0/12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343...


Downloading data files:   0%|          | 0/6 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/232 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/13.4k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/3.59k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/6 [00:00<?, ?it/s]

Generating train split:   0%|          | 0/597 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/285 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/67 [00:00<?, ? examples/s]

Dataset tweet_eval downloaded and prepared to C:/Users/User/.cache/huggingface/datasets/tweet_eval/stance_feminist/1.1.0/12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
dataset["train"]

Dataset({
    features: ['text', 'label'],
    num_rows: 597
})

In [4]:
dataset["validation"]

Dataset({
    features: ['text', 'label'],
    num_rows: 67
})

In [5]:
dataset["validation"]

Dataset({
    features: ['text', 'label'],
    num_rows: 67
})

In [6]:
# Load a pre-trained tokenizer
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")


In [7]:
def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=128)

In [8]:
# Tokenize the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Get the input_ids from the tokenized dataset for train, validation, and test sets
train_input_ids = [example['input_ids'] for example in tokenized_dataset['train']]
val_input_ids = [example['input_ids'] for example in tokenized_dataset['validation']]
test_input_ids = [example['input_ids'] for example in tokenized_dataset['test']]

# Convert input_ids to PyTorch tensors
train_input_ids = torch.tensor(train_input_ids, dtype=torch.long)
val_input_ids = torch.tensor(val_input_ids, dtype=torch.long)
test_input_ids = torch.tensor(test_input_ids, dtype=torch.long)

# Concatenate the input_ids tensors along the first dimension
input_ids = torch.cat((train_input_ids, val_input_ids, test_input_ids), dim=0)


Map:   0%|          | 0/597 [00:00<?, ? examples/s]

Map:   0%|          | 0/285 [00:00<?, ? examples/s]

Map:   0%|          | 0/67 [00:00<?, ? examples/s]

In [9]:
# Tokenize the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Get the attention_masks from the tokenized dataset for train, validation, and test sets
train_attention_mask = [example['attention_mask'] for example in tokenized_dataset['train']]
val_attention_mask = [example['attention_mask'] for example in tokenized_dataset['validation']]
test_attention_mask = [example['attention_mask'] for example in tokenized_dataset['test']]

# Convert attention_mask to PyTorch tensors
train_attention_mask = torch.tensor(train_attention_mask, dtype=torch.long)
val_attention_mask = torch.tensor(val_attention_mask, dtype=torch.long)
test_attention_mask = torch.tensor(test_attention_mask, dtype=torch.long)

# Concatenate the input_ids tensors along the first dimension
attention_masks = torch.cat((train_attention_mask, val_attention_mask, test_attention_mask), dim=0)


Map:   0%|          | 0/597 [00:00<?, ? examples/s]

Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_feminist\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-d6fc5813b617ce88.arrow
Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_feminist\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-4a19f2a3383c5d78.arrow


In [10]:
print(input_ids)

tensor([[    2,    13,     1,  ...,     0,     0,     0],
        [    2,    13,     1,  ...,     0,     0,     0],
        [    2,    13,  5256,  ...,     0,     0,     0],
        ...,
        [    2,    31,   765,  ...,     0,     0,     0],
        [    2,    13,     1,  ...,     0,     0,     0],
        [    2,  6926, 10817,  ...,     0,     0,     0]])


In [11]:
print(attention_masks)

tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])


In [12]:
# Split the dataset into train, validation, and test sets
train_dataset = tokenized_dataset["train"]
val_dataset = tokenized_dataset["validation"]
test_dataset = tokenized_dataset["test"]

In [13]:
# Convert the datasets to PyTorch tensors
train_dataset = train_dataset.remove_columns(["text"])
val_dataset = val_dataset.remove_columns(["text"])
test_dataset = test_dataset.remove_columns(["text"])

In [14]:
train_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
val_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

In [15]:
class StanceFeministModel(nn.Module):
    def __init__(self, num_classes=3):
        super(StanceFeministModel, self).__init__()
        self.albert = AlbertModel.from_pretrained('albert-base-v2')
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(self.albert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.albert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        pooled_output = self.dropout(pooled_output)
        logits = self.fc(pooled_output)
        return logits

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [17]:
# instantiate your model
stance_feminist_model = StanceFeministModel(num_classes=3).to(device)

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.LayerNorm.weight', 'predictions.bias', 'predictions.LayerNorm.bias', 'predictions.dense.bias', 'predictions.decoder.bias', 'predictions.dense.weight', 'predictions.decoder.weight']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [18]:
# define your loss function
criterion = nn.CrossEntropyLoss()

# define your optimizer
optimizer = torch.optim.Adam(stance_feminist_model.parameters(), lr=0.01)

In [19]:
# set up your training, validation, and test dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)

In [20]:
for batch in train_loader:
    print(batch)


{'label': tensor([1, 0, 1, 0, 0, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1]), 'input_ids': tensor([[   2, 9321, 7617,  ...,    0,    0,    0],
        [   2,  148,   72,  ...,    0,    0,    0],
        [   2, 9704,   18,  ...,    0,    0,    0],
        ...,
        [   2,   48, 6926,  ...,    0,    0,    0],
        [   2,  221,   22,  ...,    0,    0,    0],
        [   2,   13,    1,  ...,    0,    0,    0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}
{'label': tensor([1, 2, 0, 1, 1, 0, 0, 1, 2, 1, 2, 1, 2, 1, 1, 0]), 'input_ids': tensor([[    2, 22890,    25,  ...,     0,     0,     0],
        [    2,    31,  1376,  ...,     0,     0,     0],
        [    2,    13,     1,  ...,     0,     0,     0],
        ...,
        [    2,    76,   144,  ...,     0,     0,     0],
        [    2,    31,  2321,

In [21]:
from tqdm import tqdm
num_epochs = 1
for epoch in range(num_epochs):
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    for batch in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
        # get the inputs
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = stance_feminist_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        # print running loss for each batch
        running_loss += loss.item()
        avg_loss = running_loss / len(train_loader)
        avg_acc = correct_predictions / total_predictions
        tqdm.write(f'Train Loss: {avg_loss:.3f}, Train Acc: {avg_acc:.3f}', end='\r')
    tqdm.write(f'Epoch {epoch+1}, Train Loss: {avg_loss:.3f}, Train Acc: {avg_acc:.3f}')

    print(f"Epoch {epoch+1} finished")


Epoch 1/1:   3%|▎         | 1/38 [00:43<26:58, 43.74s/it]

Train Loss: 0.030, Train Acc: 0.375

Epoch 1/1:   5%|▌         | 2/38 [01:08<19:38, 32.72s/it]

Train Loss: 0.147, Train Acc: 0.406

Epoch 1/1:   8%|▊         | 3/38 [02:00<24:04, 41.26s/it]

Train Loss: 0.259, Train Acc: 0.417

Epoch 1/1:  11%|█         | 4/38 [02:57<26:54, 47.48s/it]

Train Loss: 0.523, Train Acc: 0.375

Epoch 1/1:  13%|█▎        | 5/38 [03:41<25:32, 46.44s/it]

Train Loss: 0.634, Train Acc: 0.350

Epoch 1/1:  16%|█▌        | 6/38 [04:24<24:05, 45.16s/it]

Train Loss: 0.687, Train Acc: 0.365

Epoch 1/1:  18%|█▊        | 7/38 [04:52<20:27, 39.58s/it]

Train Loss: 0.810, Train Acc: 0.375

Epoch 1/1:  21%|██        | 8/38 [05:18<17:40, 35.35s/it]

Train Loss: 0.935, Train Acc: 0.375

Epoch 1/1:  24%|██▎       | 9/38 [05:45<15:45, 32.60s/it]

Train Loss: 1.009, Train Acc: 0.403

Epoch 1/1:  26%|██▋       | 10/38 [06:09<13:57, 29.92s/it]

Train Loss: 1.125, Train Acc: 0.394

Epoch 1/1:  29%|██▉       | 11/38 [06:42<13:51, 30.79s/it]

Train Loss: 1.203, Train Acc: 0.386

Epoch 1/1:  32%|███▏      | 12/38 [07:11<13:07, 30.30s/it]

Train Loss: 1.263, Train Acc: 0.406

Epoch 1/1:  34%|███▍      | 13/38 [07:34<11:47, 28.29s/it]

Train Loss: 1.344, Train Acc: 0.404

Epoch 1/1:  37%|███▋      | 14/38 [07:58<10:43, 26.80s/it]

Train Loss: 1.382, Train Acc: 0.411

Epoch 1/1:  39%|███▉      | 15/38 [08:21<09:53, 25.79s/it]

Train Loss: 1.414, Train Acc: 0.408

Epoch 1/1:  42%|████▏     | 16/38 [08:45<09:14, 25.20s/it]

Train Loss: 1.470, Train Acc: 0.395

Epoch 1/1:  45%|████▍     | 17/38 [09:09<08:41, 24.83s/it]

Train Loss: 1.541, Train Acc: 0.386

Epoch 1/1:  47%|████▋     | 18/38 [09:33<08:10, 24.55s/it]

Train Loss: 1.579, Train Acc: 0.389

Epoch 1/1:  50%|█████     | 19/38 [09:59<07:54, 24.98s/it]

Train Loss: 1.607, Train Acc: 0.398

Epoch 1/1:  53%|█████▎    | 20/38 [10:25<07:34, 25.23s/it]

Train Loss: 1.653, Train Acc: 0.406

Epoch 1/1:  55%|█████▌    | 21/38 [10:49<07:05, 25.03s/it]

Train Loss: 1.699, Train Acc: 0.414

Epoch 1/1:  58%|█████▊    | 22/38 [11:13<06:34, 24.69s/it]

Train Loss: 1.745, Train Acc: 0.415

Epoch 1/1:  61%|██████    | 23/38 [11:37<06:06, 24.44s/it]

Train Loss: 1.793, Train Acc: 0.410

Epoch 1/1:  63%|██████▎   | 24/38 [12:02<05:43, 24.52s/it]

Train Loss: 1.854, Train Acc: 0.406

Epoch 1/1:  66%|██████▌   | 25/38 [12:27<05:21, 24.72s/it]

Train Loss: 1.913, Train Acc: 0.398

Epoch 1/1:  68%|██████▊   | 26/38 [12:52<04:58, 24.89s/it]

Train Loss: 1.935, Train Acc: 0.409

Epoch 1/1:  71%|███████   | 27/38 [13:19<04:38, 25.33s/it]

Train Loss: 1.975, Train Acc: 0.410

Epoch 1/1:  74%|███████▎  | 28/38 [13:47<04:22, 26.29s/it]

Train Loss: 2.085, Train Acc: 0.404

Epoch 1/1:  76%|███████▋  | 29/38 [14:07<03:38, 24.31s/it]

Train Loss: 2.204, Train Acc: 0.399

Epoch 1/1:  79%|███████▉  | 30/38 [14:23<02:54, 21.86s/it]

Train Loss: 2.229, Train Acc: 0.408

Epoch 1/1:  82%|████████▏ | 31/38 [14:41<02:25, 20.75s/it]

Train Loss: 2.268, Train Acc: 0.411

Epoch 1/1:  84%|████████▍ | 32/38 [15:06<02:11, 21.99s/it]

Train Loss: 2.297, Train Acc: 0.414

Epoch 1/1:  87%|████████▋ | 33/38 [15:29<01:51, 22.40s/it]

Train Loss: 2.327, Train Acc: 0.417

Epoch 1/1:  89%|████████▉ | 34/38 [16:00<01:38, 24.73s/it]

Train Loss: 2.360, Train Acc: 0.414

Epoch 1/1:  92%|█████████▏| 35/38 [16:28<01:17, 25.89s/it]

Train Loss: 2.402, Train Acc: 0.407

Epoch 1/1:  95%|█████████▍| 36/38 [17:10<01:01, 30.75s/it]

Train Loss: 2.449, Train Acc: 0.410

Epoch 1/1:  97%|█████████▋| 37/38 [17:49<00:33, 33.17s/it]

Train Loss: 2.492, Train Acc: 0.412

Epoch 1/1: 100%|██████████| 38/38 [18:04<00:00, 28.54s/it]

Epoch 1, Train Loss: 2.512, Train Acc: 0.414
Epoch 1 finished





In [22]:
# Validation loop
with torch.no_grad():
    stance_feminist_model.eval()  # Set the model to evaluation mode
    valid_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    for batch in val_loader:
        # get the inputs
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # forward
        outputs = stance_feminist_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)

        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        # calculate running loss
        valid_loss += loss.item()

    avg_loss = valid_loss / len(val_loader)
    avg_acc = correct_predictions / total_predictions
    print(f'Validation Loss: {avg_loss:.3f}, Validation Acc: {avg_acc:.3f}')

Validation Loss: 2.522, Validation Acc: 0.194


In [23]:

# Test loop
with torch.no_grad():
    stance_feminist_model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    for batch in test_loader:
        # get the inputs
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # forward
        outputs = stance_feminist_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)

        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        # calculate running loss
        test_loss += loss.item()

    avg_loss = test_loss / len(test_loader)
    avg_acc = correct_predictions / total_predictions
    print(f'Test Loss: {avg_loss:.3f}, Test Acc: {avg_acc:.3f}')


Test Loss: 2.192, Test Acc: 0.154


In [24]:
# save the model
torch.save(stance_feminist_model.state_dict(), 'Models/stance_feminist_model.pth')
