In [1]:
import json
import torch
import torch.nn as nn 
from transformers import BertTokenizer, BertForSequenceClassification, RobertaTokenizer, RobertaForSequenceClassification
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import csv

def load_json(filename):
    with open(filename, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

file_path = "/Users/hsinhochien/Documents/college/大三下/NLP/fp_2.2/Task 2.2/train.json"
train_data = load_json(file_path)

file_path = "/Users/hsinhochien/Documents/college/大三下/NLP/fp_2.2/Task 2.2/dev.json"
val_data = load_json(file_path)

## BERT

In [5]:
class CustomDataset(Dataset):
    def __init__(self, data, max_seq_length=128):
        self.data = data
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.max_seq_length = max_seq_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        premise, hypothesis, label = self.data[idx]
        encoding = self.tokenizer(premise, hypothesis, return_tensors='pt', padding='max_length', truncation ='only_first')
        input_ids = encoding['input_ids'].flatten()
        attention_mask = encoding['attention_mask'].flatten()

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': torch.tensor(label, dtype=torch.long)
        }

train_dataset = CustomDataset(train_data)
val_dataset = CustomDataset(val_data)

In [7]:
# Define the function to evaluate the model
def evaluate_model(model, dataloader):
    model.eval()
    correct_predictions = 0
    total_predictions = 0
    
    with torch.no_grad():
        for batch in tqdm(dataloader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)
            
            outputs = model(input_ids, attention_mask=attention_mask)
            _, predicted = torch.max(outputs.logits, dim=1)
            
            total_predictions += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()
    
    accuracy = correct_predictions / total_predictions
    return accuracy

# Assuming train_data and val_data are your datasets and device is your current device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create dataset and dataloader for validation
val_dataset = CustomDataset(val_data)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Load pre-trained model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)
model.to(device)

# Evaluate the model
accuracy = evaluate_model(model, val_dataloader)

print("Validation Accuracy:", accuracy)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 22/22 [01:49<00:00,  4.96s/it]

Validation Accuracy: 0.36231884057971014





### fine-tune

In [25]:
# Model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
loss_fn = nn.CrossEntropyLoss()

# Training
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
num_epoch = 10

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

best_val_acc = 0.0
best_model_state_dict = None

for epoch in range(num_epoch):
    model.train()
    train_loss = 0
    train_samples = 0
    for batch in tqdm(train_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * input_ids.size(0)
        train_samples += input_ids.size(0)

    train_loss /= train_samples

    # Validation
    model.eval()
    val_acc = 0
    with torch.no_grad():
        for batch in tqdm(val_loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            val_acc += (preds == labels).sum().item()

    val_acc /= len(val_dataset)
    print(f'Epoch {epoch+1}, Training Loss: {train_loss}, Validation Accuracy: {val_acc}')

    # Save the best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_state_dict = model.state_dict()

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 173/173 [1:03:05<00:00, 21.88s/it]
100%|██████████| 22/22 [01:46<00:00,  4.83s/it]


Epoch 1, Training Loss: 0.6767629601669968, Validation Accuracy: 0.6985507246376812


100%|██████████| 173/173 [1:02:24<00:00, 21.65s/it]
100%|██████████| 22/22 [01:47<00:00,  4.87s/it]


Epoch 2, Training Loss: 0.585999337697543, Validation Accuracy: 0.755072463768116


100%|██████████| 173/173 [1:02:21<00:00, 21.63s/it]
100%|██████████| 22/22 [01:47<00:00,  4.88s/it]


Epoch 3, Training Loss: 0.37109132243593285, Validation Accuracy: 0.7811594202898551


100%|██████████| 173/173 [1:02:16<00:00, 21.60s/it]
100%|██████████| 22/22 [01:47<00:00,  4.88s/it]


Epoch 4, Training Loss: 0.19589034476167502, Validation Accuracy: 0.7782608695652173


100%|██████████| 173/173 [1:02:58<00:00, 21.84s/it]
100%|██████████| 22/22 [01:45<00:00,  4.78s/it]


Epoch 5, Training Loss: 0.10121153384478136, Validation Accuracy: 0.7855072463768116


100%|██████████| 173/173 [1:03:12<00:00, 21.92s/it]
100%|██████████| 22/22 [01:44<00:00,  4.75s/it]


Epoch 6, Training Loss: 0.07622860009321332, Validation Accuracy: 0.7855072463768116


100%|██████████| 173/173 [1:03:13<00:00, 21.93s/it]
100%|██████████| 22/22 [01:44<00:00,  4.75s/it]


Epoch 7, Training Loss: 0.04605794754476462, Validation Accuracy: 0.7913043478260869


100%|██████████| 173/173 [1:03:11<00:00, 21.91s/it]
100%|██████████| 22/22 [01:44<00:00,  4.76s/it]


Epoch 8, Training Loss: 0.029522528273391347, Validation Accuracy: 0.7710144927536232


100%|██████████| 173/173 [1:03:05<00:00, 21.88s/it]
100%|██████████| 22/22 [01:43<00:00,  4.71s/it]


Epoch 9, Training Loss: 0.021574606653049878, Validation Accuracy: 0.7855072463768116


100%|██████████| 173/173 [1:03:03<00:00, 21.87s/it]
100%|██████████| 22/22 [01:44<00:00,  4.74s/it]

Epoch 10, Training Loss: 0.021334778287732566, Validation Accuracy: 0.7840579710144927





In [26]:
# Print the best model
print("Best model:")
best_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=3)
best_model.load_state_dict(best_model_state_dict)
print(best_model)

Best model:


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12,

In [27]:
# Define the path to save the CSV file
output_csv_file = "bert_predictions.csv"

with open(output_csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(['Prediction', 'True Label'])

    # Using the best model to make predictions on validation data
    best_model.eval()
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = best_model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            for i in range(len(preds)):
                writer.writerow([preds[i].item(), labels[i].item()])

print(f"Predictions saved to {output_csv_file}")

Predictions saved to bert_predictions.csv


## RoBERTa

In [8]:
class CustomDataset(Dataset):
    def __init__(self, data, max_seq_length=128):
        self.data = data
        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
        self.max_seq_length = max_seq_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        premise, hypothesis, label = self.data[idx]
        encoding = self.tokenizer(premise, hypothesis, return_tensors='pt', padding='max_length', truncation ='only_first')
        input_ids = encoding['input_ids'].flatten()
        attention_mask = encoding['attention_mask'].flatten()

        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': torch.tensor(label, dtype=torch.long)
        }

train_dataset = CustomDataset(train_data)
val_dataset = CustomDataset(val_data)

In [9]:
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=3)
model.to(device)

# Evaluate the model
accuracy = evaluate_model(model, val_dataloader)

print("Validation Accuracy:", accuracy)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 22/22 [01:49<00:00,  4.97s/it]

Validation Accuracy: 0.6985507246376812





### fine-tune

In [29]:
# Model
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=3)
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
loss_fn = nn.CrossEntropyLoss()

# Training
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
num_epoch = 10

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

best_val_acc = 0.0
best_model_state_dict = None

for epoch in range(num_epoch):
    model.train()
    train_loss = 0
    train_samples = 0
    for batch in tqdm(train_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

        train_loss += loss.item() * input_ids.size(0)
        train_samples += input_ids.size(0)

    train_loss /= train_samples

    # Validation
    model.eval()
    val_acc = 0
    with torch.no_grad():
        for batch in tqdm(val_loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            val_acc += (preds == labels).sum().item()

    val_acc /= len(val_dataset)
    print(f'Epoch {epoch+1}, Training Loss: {train_loss}, Validation Accuracy: {val_acc}')

    # Save the best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_model_state_dict = model.state_dict()

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 173/173 [1:03:23<00:00, 21.98s/it]
100%|██████████| 22/22 [01:46<00:00,  4.82s/it]


Epoch 1, Training Loss: 0.6854413969860931, Validation Accuracy: 0.6985507246376812


100%|██████████| 173/173 [1:03:30<00:00, 22.03s/it]
100%|██████████| 22/22 [01:47<00:00,  4.90s/it]


Epoch 2, Training Loss: 0.5932296378373711, Validation Accuracy: 0.7884057971014493


100%|██████████| 173/173 [1:03:24<00:00, 21.99s/it]
100%|██████████| 22/22 [01:46<00:00,  4.85s/it]


Epoch 3, Training Loss: 0.44201898957007046, Validation Accuracy: 0.7927536231884058


100%|██████████| 173/173 [1:03:19<00:00, 21.96s/it]
100%|██████████| 22/22 [01:43<00:00,  4.72s/it]


Epoch 4, Training Loss: 0.2808865818332188, Validation Accuracy: 0.7884057971014493


100%|██████████| 173/173 [1:04:22<00:00, 22.33s/it]
100%|██████████| 22/22 [01:46<00:00,  4.85s/it]


Epoch 5, Training Loss: 0.18438220419849288, Validation Accuracy: 0.8144927536231884


100%|██████████| 173/173 [1:02:31<00:00, 21.68s/it]
100%|██████████| 22/22 [01:48<00:00,  4.94s/it]


Epoch 6, Training Loss: 0.12625700909576215, Validation Accuracy: 0.7782608695652173


100%|██████████| 173/173 [1:02:02<00:00, 21.52s/it]
100%|██████████| 22/22 [01:46<00:00,  4.85s/it]


Epoch 7, Training Loss: 0.0945013350928754, Validation Accuracy: 0.8057971014492754


100%|██████████| 173/173 [1:02:16<00:00, 21.60s/it]
100%|██████████| 22/22 [01:46<00:00,  4.85s/it]


Epoch 8, Training Loss: 0.061436555127187646, Validation Accuracy: 0.8028985507246377


100%|██████████| 173/173 [1:02:18<00:00, 21.61s/it]
100%|██████████| 22/22 [01:47<00:00,  4.89s/it]


Epoch 9, Training Loss: 0.046587630352007046, Validation Accuracy: 0.8043478260869565


100%|██████████| 173/173 [1:02:40<00:00, 21.74s/it]
100%|██████████| 22/22 [01:46<00:00,  4.84s/it]

Epoch 10, Training Loss: 0.03801810173909882, Validation Accuracy: 0.7927536231884058





In [30]:
# Print the best model
print("Best model:")
best_model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=3)
best_model.load_state_dict(best_model_state_dict)
print(best_model)

Best model:


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

In [31]:
# Define the path to save the CSV file
output_csv_file = "roberta_predictions.csv"

with open(output_csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(['Prediction', 'True Label'])

    # Using the best model to make predictions on validation data
    best_model.eval()
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = best_model(input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            for i in range(len(preds)):
                writer.writerow([preds[i].item(), labels[i].item()])

print(f"Predictions saved to {output_csv_file}")

Predictions saved to roberta_predictions.csv
