In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer
from transformers import AlbertModel

In [2]:
from datasets import load_dataset

dataset = load_dataset("tweet_eval", "stance_hillary")

Found cached dataset tweet_eval (C:/Users/User/.cache/huggingface/datasets/tweet_eval/stance_hillary/1.1.0/12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343)


  0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
dataset["train"]

Dataset({
    features: ['text', 'label'],
    num_rows: 620
})

In [4]:
dataset["validation"]

Dataset({
    features: ['text', 'label'],
    num_rows: 69
})

In [5]:
dataset["validation"]

Dataset({
    features: ['text', 'label'],
    num_rows: 69
})

In [6]:
# Load a pre-trained tokenizer
tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")


In [7]:
def tokenize_function(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=128)

In [8]:
# Tokenize the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Get the input_ids from the tokenized dataset for train, validation, and test sets
train_input_ids = [example['input_ids'] for example in tokenized_dataset['train']]
val_input_ids = [example['input_ids'] for example in tokenized_dataset['validation']]
test_input_ids = [example['input_ids'] for example in tokenized_dataset['test']]

# Convert input_ids to PyTorch tensors
train_input_ids = torch.tensor(train_input_ids, dtype=torch.long)
val_input_ids = torch.tensor(val_input_ids, dtype=torch.long)
test_input_ids = torch.tensor(test_input_ids, dtype=torch.long)

# Concatenate the input_ids tensors along the first dimension
input_ids = torch.cat((train_input_ids, val_input_ids, test_input_ids), dim=0)


Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_hillary\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-c5bcd81f5d767195.arrow


Map:   0%|          | 0/295 [00:00<?, ? examples/s]

Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_hillary\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-9ca6dda2aac0766a.arrow


In [9]:
# Tokenize the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Get the attention_masks from the tokenized dataset for train, validation, and test sets
train_attention_mask = [example['attention_mask'] for example in tokenized_dataset['train']]
val_attention_mask = [example['attention_mask'] for example in tokenized_dataset['validation']]
test_attention_mask = [example['attention_mask'] for example in tokenized_dataset['test']]

# Convert attention_mask to PyTorch tensors
train_attention_mask = torch.tensor(train_attention_mask, dtype=torch.long)
val_attention_mask = torch.tensor(val_attention_mask, dtype=torch.long)
test_attention_mask = torch.tensor(test_attention_mask, dtype=torch.long)

# Concatenate the input_ids tensors along the first dimension
attention_masks = torch.cat((train_attention_mask, val_attention_mask, test_attention_mask), dim=0)


Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_hillary\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-80043f9f65fa7acc.arrow
Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_hillary\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-21b69fc9da4849ea.arrow
Loading cached processed dataset at C:\Users\User\.cache\huggingface\datasets\tweet_eval\stance_hillary\1.1.0\12aee5282b8784f3e95459466db4cdf45c6bf49719c25cdb0743d71ed0410343\cache-9ca6dda2aac0766a.arrow


In [10]:
print(input_ids)

tensor([[    2,   100,    21,  ...,     0,     0,     0],
        [    2,    95,    22,  ...,     0,     0,     0],
        [    2,   100,    42,  ...,     0,     0,     0],
        ...,
        [    2,    13,     1,  ...,     0,     0,     0],
        [    2, 20733,    92,  ...,     0,     0,     0],
        [    2,    32,    22,  ...,     0,     0,     0]])


In [11]:
print(attention_masks)

tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])


In [12]:
# Split the dataset into train, validation, and test sets
train_dataset = tokenized_dataset["train"]
val_dataset = tokenized_dataset["validation"]
test_dataset = tokenized_dataset["test"]

In [13]:
# Convert the datasets to PyTorch tensors
train_dataset = train_dataset.remove_columns(["text"])
val_dataset = val_dataset.remove_columns(["text"])
test_dataset = test_dataset.remove_columns(["text"])

In [14]:
train_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
val_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
test_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])

In [15]:
class StanceHiliaryModel(nn.Module):
    def __init__(self, num_classes=3):
        super(StanceHiliaryModel, self).__init__()
        self.albert = AlbertModel.from_pretrained('albert-base-v2')
        self.dropout = nn.Dropout(0.1)
        self.fc = nn.Linear(self.albert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.albert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        pooled_output = self.dropout(pooled_output)
        logits = self.fc(pooled_output)
        return logits

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [17]:
# instantiate your model
stance_hillary_model = StanceHiliaryModel(num_classes=3).to(device)

Some weights of the model checkpoint at albert-base-v2 were not used when initializing AlbertModel: ['predictions.decoder.bias', 'predictions.LayerNorm.weight', 'predictions.dense.bias', 'predictions.dense.weight', 'predictions.LayerNorm.bias', 'predictions.bias', 'predictions.decoder.weight']
- This IS expected if you are initializing AlbertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [18]:
# define your loss function
criterion = nn.CrossEntropyLoss()

# define your optimizer
optimizer = torch.optim.Adam(stance_hillary_model.parameters(), lr=0.01)

In [19]:
# set up your training, validation, and test dataloaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
test_loader = DataLoader(test_dataset, batch_size=16)

In [20]:
for batch in train_loader:
    print(batch)


{'label': tensor([1, 0, 1, 0, 0, 1, 2, 0, 1, 1, 2, 1, 2, 2, 1, 0]), 'input_ids': tensor([[   2,  419,   31,  ...,    0,    0,    0],
        [   2,   31,  221,  ...,    0,    0,    0],
        [   2,   13,    1,  ...,    0,    0,    0],
        ...,
        [   2,   13,    1,  ...,    0,    0,    0],
        [   2, 6926, 4205,  ...,    0,    0,    0],
        [   2,   13, 5256,  ...,    0,    0,    0]]), 'attention_mask': tensor([[1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        ...,
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0],
        [1, 1, 1,  ..., 0, 0, 0]])}
{'label': tensor([1, 2, 2, 2, 1, 0, 1, 1, 2, 2, 2, 0, 1, 0, 1, 0]), 'input_ids': tensor([[   2,   32,   22,  ...,    0,    0,    0],
        [   2,  441,   21,  ...,    0,    0,    0],
        [   2,  104,   19,  ...,    0,    0,    0],
        ...,
        [   2,   13,    1,  ...,    0,    0,    0],
        [   2, 8083,   26,  ...,    0,    0,    0],
 

In [21]:
from tqdm import tqdm
num_epochs = 1
for epoch in range(num_epochs):
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    for batch in tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}'):
        # get the inputs
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = stance_hillary_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        # print running loss for each batch
        running_loss += loss.item()
        avg_loss = running_loss / len(train_loader)
        avg_acc = correct_predictions / total_predictions
        tqdm.write(f'Train Loss: {avg_loss:.3f}, Train Acc: {avg_acc:.3f}', end='\r')
    tqdm.write(f'Epoch {epoch+1}, Train Loss: {avg_loss:.3f}, Train Acc: {avg_acc:.3f}')

    print(f"Epoch {epoch+1} finished")


Epoch 1/1:   3%|▎         | 1/39 [00:41<26:08, 41.29s/it]

Train Loss: 0.032, Train Acc: 0.188

Epoch 1/1:   5%|▌         | 2/39 [01:20<24:38, 39.97s/it]

Train Loss: 0.158, Train Acc: 0.250

Epoch 1/1:   8%|▊         | 3/39 [01:54<22:27, 37.44s/it]

Train Loss: 0.204, Train Acc: 0.396

Epoch 1/1:  10%|█         | 4/39 [02:30<21:20, 36.58s/it]

Train Loss: 0.502, Train Acc: 0.375

Epoch 1/1:  13%|█▎        | 5/39 [03:04<20:21, 35.92s/it]

Train Loss: 0.569, Train Acc: 0.325

Epoch 1/1:  15%|█▌        | 6/39 [03:40<19:45, 35.94s/it]

Train Loss: 0.680, Train Acc: 0.375

Epoch 1/1:  18%|█▊        | 7/39 [04:15<18:58, 35.59s/it]

Train Loss: 0.789, Train Acc: 0.339

Epoch 1/1:  21%|██        | 8/39 [04:51<18:23, 35.60s/it]

Train Loss: 0.837, Train Acc: 0.359

Epoch 1/1:  23%|██▎       | 9/39 [05:26<17:43, 35.46s/it]

Train Loss: 0.884, Train Acc: 0.326

Epoch 1/1:  26%|██▌       | 10/39 [06:01<17:05, 35.35s/it]

Train Loss: 0.918, Train Acc: 0.312

Epoch 1/1:  28%|██▊       | 11/39 [06:36<16:26, 35.22s/it]

Train Loss: 0.984, Train Acc: 0.324

Epoch 1/1:  31%|███       | 12/39 [07:04<14:56, 33.19s/it]

Train Loss: 1.023, Train Acc: 0.344

Epoch 1/1:  33%|███▎      | 13/39 [07:29<13:16, 30.63s/it]

Train Loss: 1.050, Train Acc: 0.341

Epoch 1/1:  36%|███▌      | 14/39 [07:54<12:00, 28.82s/it]

Train Loss: 1.080, Train Acc: 0.339

Epoch 1/1:  38%|███▊      | 15/39 [08:19<11:02, 27.59s/it]

Train Loss: 1.112, Train Acc: 0.325

Epoch 1/1:  41%|████      | 16/39 [08:44<10:16, 26.80s/it]

Train Loss: 1.142, Train Acc: 0.340

Epoch 1/1:  44%|████▎     | 17/39 [09:08<09:36, 26.20s/it]

Train Loss: 1.162, Train Acc: 0.364

Epoch 1/1:  46%|████▌     | 18/39 [09:33<09:03, 25.86s/it]

Train Loss: 1.188, Train Acc: 0.382

Epoch 1/1:  49%|████▊     | 19/39 [09:58<08:29, 25.48s/it]

Train Loss: 1.231, Train Acc: 0.388

Epoch 1/1:  51%|█████▏    | 20/39 [10:23<08:00, 25.31s/it]

Train Loss: 1.264, Train Acc: 0.378

Epoch 1/1:  54%|█████▍    | 21/39 [10:48<07:33, 25.20s/it]

Train Loss: 1.290, Train Acc: 0.387

Epoch 1/1:  56%|█████▋    | 22/39 [11:15<07:20, 25.89s/it]

Train Loss: 1.321, Train Acc: 0.389

Epoch 1/1:  59%|█████▉    | 23/39 [11:45<07:14, 27.15s/it]

Train Loss: 1.355, Train Acc: 0.399

Epoch 1/1:  62%|██████▏   | 24/39 [12:12<06:45, 27.05s/it]

Train Loss: 1.399, Train Acc: 0.404

Epoch 1/1:  64%|██████▍   | 25/39 [12:42<06:31, 27.94s/it]

Train Loss: 1.424, Train Acc: 0.412

Epoch 1/1:  67%|██████▋   | 26/39 [13:12<06:10, 28.50s/it]

Train Loss: 1.457, Train Acc: 0.406

Epoch 1/1:  69%|██████▉   | 27/39 [13:30<05:02, 25.18s/it]

Train Loss: 1.490, Train Acc: 0.403

Epoch 1/1:  72%|███████▏  | 28/39 [13:45<04:04, 22.20s/it]

Train Loss: 1.517, Train Acc: 0.400

Epoch 1/1:  74%|███████▍  | 29/39 [14:00<03:21, 20.19s/it]

Train Loss: 1.553, Train Acc: 0.403

Epoch 1/1:  77%|███████▋  | 30/39 [14:22<03:05, 20.56s/it]

Train Loss: 1.594, Train Acc: 0.402

Epoch 1/1:  79%|███████▉  | 31/39 [14:41<02:40, 20.09s/it]

Train Loss: 1.621, Train Acc: 0.405

Epoch 1/1:  82%|████████▏ | 32/39 [15:00<02:18, 19.83s/it]

Train Loss: 1.651, Train Acc: 0.406

Epoch 1/1:  85%|████████▍ | 33/39 [15:20<01:58, 19.81s/it]

Train Loss: 1.689, Train Acc: 0.396

Epoch 1/1:  87%|████████▋ | 34/39 [15:40<01:40, 20.02s/it]

Train Loss: 1.712, Train Acc: 0.404

Epoch 1/1:  90%|████████▉ | 35/39 [16:03<01:23, 20.81s/it]

Train Loss: 1.769, Train Acc: 0.405

Epoch 1/1:  92%|█████████▏| 36/39 [16:25<01:03, 21.12s/it]

Train Loss: 1.831, Train Acc: 0.408

Epoch 1/1:  95%|█████████▍| 37/39 [16:45<00:41, 20.91s/it]

Train Loss: 1.863, Train Acc: 0.412

Epoch 1/1:  97%|█████████▋| 38/39 [17:09<00:21, 21.67s/it]

Train Loss: 1.894, Train Acc: 0.408

Epoch 1/1: 100%|██████████| 39/39 [17:22<00:00, 26.72s/it]

Epoch 1, Train Loss: 1.947, Train Acc: 0.402
Epoch 1 finished





In [22]:
# Validation loop
with torch.no_grad():
    stance_hillary_model.eval()  # Set the model to evaluation mode
    valid_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    for batch in val_loader:
        # get the inputs
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # forward
        outputs = stance_hillary_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)

        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        # calculate running loss
        valid_loss += loss.item()

    avg_loss = valid_loss / len(val_loader)
    avg_acc = correct_predictions / total_predictions
    print(f'Validation Loss: {avg_loss:.3f}, Validation Acc: {avg_acc:.3f}')

Validation Loss: 1.180, Validation Acc: 0.565


In [23]:

# Test loop
with torch.no_grad():
    stance_hillary_model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    for batch in test_loader:
        # get the inputs
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)

        # forward
        outputs = stance_hillary_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)

        # calculate accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

        # calculate running loss
        test_loss += loss.item()

    avg_loss = test_loss / len(test_loader)
    avg_acc = correct_predictions / total_predictions
    print(f'Test Loss: {avg_loss:.3f}, Test Acc: {avg_acc:.3f}')


Test Loss: 1.126, Test Acc: 0.583


In [24]:
# save the model
torch.save(stance_hillary_model.state_dict(), 'Models/stance_hillary_model.pth')
