In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer
from transformers import AlbertModel

In [2]:
from datasets import load_dataset

dataset = load_dataset("tweet_eval", "stance_climate")

Found cached dataset tweet_eval (C:/Users/User/.cache/huggingface/datasets/tweet_eval/stance_climate/1.1.0/12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343)


  0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
dataset["train"]

Dataset({
    features: ['text', 'label'],
    num_rows: 355
})

In [4]:
dataset["validation"]

Dataset({
    features: ['text', 'label'],
    num_rows: 40
})

In [5]:
dataset["validation"]

Dataset({
    features: ['text', 'label'],
    num_rows: 40
})

In [6]:
# Load a pre-trained tokenizer
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")


In [7]:
def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=128)

In [8]:
# Tokenize the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Get the input_ids from the tokenized dataset for train, validation, and test sets
train_input_ids = [example['input_ids'] for example in tokenized_dataset['train']]
val_input_ids = [example['input_ids'] for example in tokenized_dataset['validation']]
test_input_ids = [example['input_ids'] for example in tokenized_dataset['test']]

# Convert input_ids to PyTorch tensors
train_input_ids = torch.tensor(train_input_ids, dtype=torch.long)
val_input_ids = torch.tensor(val_input_ids, dtype=torch.long)
test_input_ids = torch.tensor(test_input_ids, dtype=torch.long)

# Concatenate the input_ids tensors along the first dimension
input_ids = torch.cat((train_input_ids, val_input_ids, test_input_ids), dim=0)


Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_climate\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-22c2d61fa626c275.arrow


Map:   0%|          | 0/169 [00:00<?, ? examples/s]

Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_climate\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-c468eff055c85d56.arrow


In [9]:
# Tokenize the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Get the attention_masks from the tokenized dataset for train, validation, and test sets
train_attention_mask = [example['attention_mask'] for example in tokenized_dataset['train']]
val_attention_mask = [example['attention_mask'] for example in tokenized_dataset['validation']]
test_attention_mask = [example['attention_mask'] for example in tokenized_dataset['test']]

# Convert attention_mask to PyTorch tensors
train_attention_mask = torch.tensor(train_attention_mask, dtype=torch.long)
val_attention_mask = torch.tensor(val_attention_mask, dtype=torch.long)
test_attention_mask = torch.tensor(test_attention_mask, dtype=torch.long)

# Concatenate the input_ids tensors along the first dimension
attention_masks = torch.cat((train_attention_mask, val_attention_mask, test_attention_mask), dim=0)


Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_climate\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-b86cb2279c9c7cb0.arrow
Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_climate\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-87fcbc7a7e61f00f.arrow
Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_climate\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-c468eff055c85d56.arrow


In [10]:
print(input_ids)

tensor([[   2,  483,   25,  ...,    0,    0,    0],
        [   2,   95,  555,  ...,    0,    0,    0],
        [   2,   32,   22,  ...,    0,    0,    0],
        ...,
        [   2,  100,   95,  ...,    0,    0,    0],
        [   2, 1511,  414,  ...,    0,    0,    0],
        [   2,   32,   22,  ...,    0,    0,    0]])


In [11]:
print(attention_masks)

tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])


In [12]:
# Split the dataset into train, validation, and test sets
train_dataset = tokenized_dataset["train"]
val_dataset = tokenized_dataset["validation"]
test_dataset = tokenized_dataset["test"]

In [13]:
# Convert the datasets to PyTorch tensors
train_dataset = train_dataset.remove_columns(["text"])
val_dataset = val_dataset.remove_columns(["text"])
test_dataset = test_dataset.remove_columns(["text"])

In [14]:
train_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
val_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

In [15]:


class StanceClimateModel(nn.Module):
    def __init__(self, num_classes=3):
        super(StanceClimateModel, self).__init__()
        self.albert = AlbertModel.from_pretrained('albert-base-v2')
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(self.albert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.albert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        pooled_output = self.dropout(pooled_output)
        logits = self.fc(pooled_output)
        return logits


In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [17]:
# instantiate your model
stance_climate_model = StanceClimateModel(num_classes=3).to(device)

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.bias', 'predictions.LayerNorm.bias', 'predictions.dense.bias', 'predictions.decoder.bias', 'predictions.dense.weight', 'predictions.decoder.weight', 'predictions.LayerNorm.weight']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [18]:
# define your loss function
criterion = nn.CrossEntropyLoss()

# define your optimizer
optimizer = torch.optim.Adam(stance_climate_model.parameters(), lr=0.01)

In [19]:
# set up your training, validation, and test dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)

In [20]:
for batch in train_loader:
    print(batch)


{'label': tensor([0, 1, 0, 2, 2, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2]), 'input_ids': tensor([[   2,  750,   25,  ...,    0,    0,    0],
        [   2,  636,  177,  ...,    0,    0,    0],
        [   2,   13,    1,  ...,    0,    0,    0],
        ...,
        [   2,   95,  376,  ...,    0,    0,    0],
        [   2, 9227, 1953,  ...,    0,    0,    0],
        [   2, 7677,  279,  ...,    0,    0,    0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}
{'label': tensor([2, 2, 2, 0, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2]), 'input_ids': tensor([[   2,   86, 6926,  ...,    0,    0,    0],
        [   2, 3414,   28,  ...,    0,    0,    0],
        [   2, 9644,   30,  ...,    0,    0,    0],
        ...,
        [   2,   13,    1,  ...,    0,    0,    0],
        [   2, 1679,   17,  ...,    0,    0,    0],
 

In [21]:
from tqdm import tqdm
num_epochs = 1
for epoch in range(num_epochs):
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    for batch in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
        # get the inputs
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = stance_climate_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        # print running loss for each batch
        running_loss += loss.item()
        avg_loss = running_loss / len(train_loader)
        avg_acc = correct_predictions / total_predictions
        tqdm.write(f'Train Loss: {avg_loss:.3f}, Train Acc: {avg_acc:.3f}', end='\r')
    tqdm.write(f'Epoch {epoch+1}, Train Loss: {avg_loss:.3f}, Train Acc: {avg_acc:.3f}')

    print(f"Epoch {epoch+1} finished")


Epoch 1/1:   4%|▍         | 1/23 [00:32<11:55, 32.54s/it]

Train Loss: 0.056, Train Acc: 0.000

Epoch 1/1:   9%|▊         | 2/23 [01:03<11:02, 31.57s/it]

Train Loss: 0.092, Train Acc: 0.250

Epoch 1/1:  13%|█▎        | 3/23 [01:46<12:12, 36.64s/it]

Train Loss: 0.190, Train Acc: 0.250

Epoch 1/1:  17%|█▋        | 4/23 [02:21<11:29, 36.27s/it]

Train Loss: 0.450, Train Acc: 0.312

Epoch 1/1:  22%|██▏       | 5/23 [02:57<10:48, 36.03s/it]

Train Loss: 0.559, Train Acc: 0.338

Epoch 1/1:  26%|██▌       | 6/23 [03:32<10:07, 35.74s/it]

Train Loss: 0.661, Train Acc: 0.333

Epoch 1/1:  30%|███       | 7/23 [04:08<09:33, 35.83s/it]

Train Loss: 0.729, Train Acc: 0.357

Epoch 1/1:  35%|███▍      | 8/23 [04:44<08:57, 35.81s/it]

Train Loss: 0.803, Train Acc: 0.391

Epoch 1/1:  39%|███▉      | 9/23 [05:21<08:25, 36.13s/it]

Train Loss: 1.052, Train Acc: 0.347

Epoch 1/1:  43%|████▎     | 10/23 [05:56<07:47, 35.98s/it]

Train Loss: 1.101, Train Acc: 0.381

Epoch 1/1:  48%|████▊     | 11/23 [06:32<07:10, 35.86s/it]

Train Loss: 1.187, Train Acc: 0.386

Epoch 1/1:  52%|█████▏    | 12/23 [07:07<06:33, 35.74s/it]

Train Loss: 1.256, Train Acc: 0.391

Epoch 1/1:  57%|█████▋    | 13/23 [07:42<05:54, 35.41s/it]

Train Loss: 1.320, Train Acc: 0.413

Epoch 1/1:  61%|██████    | 14/23 [08:07<04:50, 32.28s/it]

Train Loss: 1.356, Train Acc: 0.433

Epoch 1/1:  65%|██████▌   | 15/23 [08:32<04:00, 30.05s/it]

Train Loss: 1.414, Train Acc: 0.417

Epoch 1/1:  70%|██████▉   | 16/23 [08:57<03:19, 28.55s/it]

Train Loss: 1.449, Train Acc: 0.430

Epoch 1/1:  74%|███████▍  | 17/23 [09:22<02:45, 27.58s/it]

Train Loss: 1.496, Train Acc: 0.434

Epoch 1/1:  78%|███████▊  | 18/23 [09:47<02:14, 26.81s/it]

Train Loss: 1.527, Train Acc: 0.438

Epoch 1/1:  83%|████████▎ | 19/23 [10:13<01:45, 26.39s/it]

Train Loss: 1.556, Train Acc: 0.447

Epoch 1/1:  87%|████████▋ | 20/23 [10:38<01:17, 25.98s/it]

Train Loss: 1.598, Train Acc: 0.447

Epoch 1/1:  91%|█████████▏| 21/23 [11:03<00:51, 25.66s/it]

Train Loss: 1.655, Train Acc: 0.446

Epoch 1/1:  96%|█████████▌| 22/23 [11:28<00:25, 25.46s/it]

Train Loss: 1.698, Train Acc: 0.440

Epoch 1/1: 100%|██████████| 23/23 [11:33<00:00, 30.16s/it]

Epoch 1, Train Loss: 1.728, Train Acc: 0.442
Epoch 1 finished





In [22]:
# Validation loop
with torch.no_grad():
    stance_climate_model.eval()  # Set the model to evaluation mode
    valid_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    for batch in val_loader:
        # get the inputs
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # forward
        outputs = stance_climate_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)

        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        # calculate running loss
        valid_loss += loss.item()

    avg_loss = valid_loss / len(val_loader)
    avg_acc = correct_predictions / total_predictions
    print(f'Validation Loss: {avg_loss:.3f}, Validation Acc: {avg_acc:.3f}')

Validation Loss: 1.287, Validation Acc: 0.525


In [23]:

# Test loop
with torch.no_grad():
    stance_climate_model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    for batch in test_loader:
        # get the inputs
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # forward
        outputs = stance_climate_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)

        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        # calculate running loss
        test_loss += loss.item()

    avg_loss = test_loss / len(test_loader)
    avg_acc = correct_predictions / total_predictions
    print(f'Test Loss: {avg_loss:.3f}, Test Acc: {avg_acc:.3f}')


Test Loss: 1.219, Test Acc: 0.728


In [24]:
# save the model
torch.save(stance_climate_model.state_dict(), 'Models/stance_climate_model.pth')
