In [1]:
!pip install datasets==2.21.0
!pip install torchmetrics

Collecting datasets==2.21.0
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets==2.21.0)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets==2.21.0)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets==2.21.0)
  Downloading multiprocess-0.70.17-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.6.1,>=2023.1.0 (from fsspec[http]<=2024.6.1,>=2023.1.0->datasets==2.21.0)
  Downloading fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB)
INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.
Collecting multiprocess (from datasets==2.21.0)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-2.21.0-py3-none-any.whl (527 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
import transformers as T
from datasets import load_dataset
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm
from torchmetrics import SpearmanCorrCoef, Accuracy, F1Score
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
import random
import numpy as np
def set_seed(seed=42, loader=None):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    try:
        loader.sampler.generator.manual_seed(seed)
    except AttributeError:
        pass

set_seed()

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

g = torch.Generator()
g.manual_seed(0)

<torch._C.Generator at 0x7a88486cf610>

In [5]:
# 有些中文的標點符號在tokenizer編碼以後會變成[UNK]，所以將其換成英文標點
token_replacement = [
    ["：" , ":"],
    ["，" , ","],
    ["“" , "\""],
    ["”" , "\""],
    ["？" , "?"],
    ["……" , "..."],
    ["！" , "!"]
]

In [6]:
tokenizer = T.BertTokenizer.from_pretrained("google-bert/bert-base-uncased", cache_dir="./cache/")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [7]:
class SemevalDataset(Dataset):
    def __init__(self, split="train") -> None:
        super().__init__()
        assert split in ["train", "validation", "test"]
        self.data = load_dataset(
            "sem_eval_2014_task_1", split=split, cache_dir="./cache/"
        ).to_list()

    def __getitem__(self, index):
        d = self.data[index]
        # 把中文標點替換掉
        for k in ["premise", "hypothesis"]:
            for tok in token_replacement:
                d[k] = d[k].replace(tok[0], tok[1])
        return d

    def __len__(self):
        return len(self.data)

data_sample = SemevalDataset(split="train").data[:3]
print(f"Dataset example: \n{data_sample[0]} \n{data_sample[1]} \n{data_sample[2]}")

Downloading builder script:   0%|          | 0.00/5.20k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/3.56k [00:00<?, ?B/s]

The repository for sem_eval_2014_task_1 contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/sem_eval_2014_task_1.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0.00/87.3k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/93.4k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/16.4k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/4500 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/4927 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/500 [00:00<?, ? examples/s]

Dataset example: 
{'sentence_pair_id': 1, 'premise': 'A group of kids is playing in a yard and an old man is standing in the background', 'hypothesis': 'A group of boys in a yard is playing and a man is standing in the background', 'relatedness_score': 4.5, 'entailment_judgment': 0} 
{'sentence_pair_id': 2, 'premise': 'A group of children is playing in the house and there is no man standing in the background', 'hypothesis': 'A group of kids is playing in a yard and an old man is standing in the background', 'relatedness_score': 3.200000047683716, 'entailment_judgment': 0} 
{'sentence_pair_id': 3, 'premise': 'The young boys are playing outdoors and the man is smiling nearby', 'hypothesis': 'The kids are playing outdoors near a man with a smile', 'relatedness_score': 4.699999809265137, 'entailment_judgment': 1}


In [8]:
# Define the hyperparameters
lr = 3e-5
epochs = 10
train_batch_size = 16
validation_batch_size = 16
test_batch_size = 16

In [9]:
# TODO1: Create batched data for DataLoader
# `collate_fn` is a function that defines how the data batch should be packed.
# This function will be called in the DataLoader to pack the data batch.

def collate_fn(batch):
    # TODO1-1: Implement the collate_fn function
    # Write your code here
    # The input parameter is a data batch (tuple), and this function packs it into tensors.
    # Use tokenizer to pack tokenize and pack the data and its corresponding labels.
    # Return the data batch and labels for each sub-task.
    premises = [data['premise'] for data in batch]
    hypotheses = [data['hypothesis'] for data in batch]
    input_ret = tokenizer(premises, hypotheses, return_tensors="pt", padding=True, truncation=True)['input_ids']
    relatedness_score_ret = torch.tensor([])
    entailment_judgment_ret = torch.tensor([])
    for data in batch:
      relatedness_score_ret = torch.cat((relatedness_score_ret,torch.tensor([data['relatedness_score']])),dim=0)
      entailment_judgment_ret = torch.cat((entailment_judgment_ret,torch.tensor([data['entailment_judgment']])),dim=0)
    return input_ret , relatedness_score_ret , entailment_judgment_ret

# TODO1-2: Define your DataLoader
dl_train = DataLoader(SemevalDataset(split="train"),collate_fn=collate_fn,batch_size=train_batch_size,shuffle=True,worker_init_fn=seed_worker,generator=g)
dl_validation = DataLoader(SemevalDataset(split="validation"),collate_fn=collate_fn,batch_size=validation_batch_size,worker_init_fn=seed_worker,generator=g)
dl_test = DataLoader(SemevalDataset(split="test"),collate_fn=collate_fn,batch_size=test_batch_size,worker_init_fn=seed_worker,generator=g)

In [None]:
# TODO2: Construct your model
class MultiLabelModel(torch.nn.Module):
    def __init__(self, mode, freeze=False, *args, **kwargs):
        super().__init__(*args, **kwargs)
        # Write your code here
        # Define what modules you will use in the model
        self.mode = mode
        if self.mode == 'separate':
          self.model0_1 = T.AutoModel.from_pretrained("bert-base-uncased")
          self.model0_2 = T.AutoModel.from_pretrained("bert-base-uncased")
          if freeze:
            for param in self.model0_1.parameters():
              param.requires_grad = False
            for param in self.model0_2.parameters():
              param.requires_grad = False
          self.model1 = torch.nn.Sequential(torch.nn.Linear(self.model0_1.config.hidden_size, 500),
                                            torch.nn.ReLU(),
                                            torch.nn.Linear(500, 1))
          self.model2 = torch.nn.Linear(self.model0_2.config.hidden_size, 3)
        else:
          self.model = T.AutoModel.from_pretrained("bert-base-uncased")
          if freeze:
            for param in self.model.parameters():
              param.requires_grad = False
          self.model1 = torch.nn.Sequential(torch.nn.Linear(self.model.config.hidden_size, 500),
                                            torch.nn.ReLU(),
                                            torch.nn.Linear(500, 1))
          self.model2 = torch.nn.Linear(self.model.config.hidden_size, 3)

        #self.init_weight()

    def init_weight(self):
        for layer in self.model1:
          if isinstance(layer, torch.nn.Linear):
            torch.nn.init.kaiming_normal_(layer.weight)
        torch.nn.init.kaiming_normal_(self.model2.weight)

    def forward(self, input):
        # Write your code here
        # Forward pass
        if self.mode == 'separate':
          X1 = self.model0_1(input)
          X2 = self.model0_2(input)
          return self.model1(X1.last_hidden_state[:,0,:]),self.model2(X2.pooler_output)
        else:
          X = self.model(input)
          return self.model1(X.last_hidden_state[:,0,:]),self.model2(X.pooler_output)

In [None]:
# TODO3: Define your optimizer and loss function
model = MultiLabelModel(mode = 'multi').to(device)
# TODO3-1: Define your Optimizer
optimizer = AdamW(params=model.parameters(),lr=lr)

# TODO3-2: Define your loss functions (you should have two)
# Write your code here

def pairwise_rank_loss(pred, target):
    #reference:chatGPT
    pred = torch.reshape(pred,(-1,))
    target = torch.reshape(target,(-1,))
    pairwise_diff_pred = pred.unsqueeze(1) - pred.unsqueeze(0)
    pairwise_diff_target = (target.unsqueeze(1) - target.unsqueeze(0)).sign()
    loss = torch.nn.functional.binary_cross_entropy_with_logits(pairwise_diff_pred, (pairwise_diff_target > 0).float())
    return loss.mean()

loss_fun_rank = pairwise_rank_loss
loss_fun_cat = torch.nn.CrossEntropyLoss()
loss_fun_reg = torch.nn.MSELoss()

# scoring functions
spc = SpearmanCorrCoef()
acc = Accuracy(task="multiclass", num_classes=3)
f1 = F1Score(task="multiclass", num_classes=3, average='macro')

reg_weight = 1
cat_weight = 1

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [12]:
import os
def make_dir(path):
    """ Create a directory if there isn't one already. """
    try:
        os.mkdir(path)
    except OSError:
        pass
make_dir('saved_models')

In [13]:
init_epoch = 0
for ep in range(init_epoch,epochs):
    pbar = tqdm(dl_train)
    pbar.set_description(f"Training epoch [{ep+1}/{epochs}]")
    model.train()
    # TODO4: Write the training loop
    # Write your code here
    # train your model
    # clear gradient
    # forward pass
    # compute loss
    # back-propagation
    # model optimization
    for input , relatedness , entailment in pbar:
      input , relatedness , entailment = input.to(device) , relatedness.to(device) , entailment.to(device)
      input = input.long()
      relatedness_pred,entailment_pred = model(input = input)
      loss1 = loss_fun_reg(relatedness_pred,relatedness)
      loss2 = loss_fun_cat(entailment_pred,entailment.long())
      loss3 = loss_fun_rank(relatedness_pred,relatedness)
      optimizer.zero_grad()
      loss = loss1 + loss2 + loss3
      loss.backward()
      torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1)
      optimizer.step()

    pbar = tqdm(dl_validation)
    pbar.set_description(f"Validation epoch [{ep+1}/{epochs}]")
    model.eval()
    # TODO5: Write the evaluation loop
    # Write your code here
    # Evaluate your model
    # Output all the evaluation scores (SpearmanCorrCoef, Accuracy, F1Score)

    relatedness_pred_all,entailment_pred_all = torch.tensor([]),torch.tensor([])
    relatedness_all,entailment_all = torch.tensor([]),torch.tensor([])
    for input , relatedness , entailment in pbar:
      input = input.to(device)
      input = input.long()
      relatedness_pred,entailment_pred = model(input = input)
      relatedness_pred,entailment_pred = relatedness_pred.detach().cpu(),entailment_pred.detach().cpu()
      relatedness_pred = relatedness_pred.squeeze(-1)
      relatedness_pred_all = torch.cat((relatedness_pred_all,relatedness_pred),dim=0)
      entailment_pred_all = torch.cat((entailment_pred_all,entailment_pred),dim=0)
      relatedness_all = torch.cat((relatedness_all,relatedness),dim=0)
      entailment_all = torch.cat((entailment_all,entailment),dim=0)

    spc_all = spc(relatedness_pred_all,relatedness_all)
    acc_all = acc(entailment_pred_all,entailment_all)
    f1_all = f1(entailment_pred_all,entailment_all)

    print(f"epoch{ep+1}:SpearmanCorrCoef={spc_all}, Accuracy={acc_all}, F1Score={f1_all}")

    torch.save(model, f'./saved_models/ep{ep+1}.ckpt')

Training epoch [1/10]:   0%|          | 0/282 [00:00<?, ?it/s]We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.
Training epoch [1/10]: 100%|██████████| 282/282 [00:38<00:00,  7.34it/s]
Validation epoch [1/10]: 100%|██████████| 32/32 [00:01<00:00, 26.61it/s]


epoch1:SpearmanCorrCoef=0.7529826164245605, Accuracy=0.8159999847412109, F1Score=0.8060834407806396


Training epoch [2/10]: 100%|██████████| 282/282 [00:38<00:00,  7.38it/s]
Validation epoch [2/10]: 100%|██████████| 32/32 [00:01<00:00, 22.70it/s]


epoch2:SpearmanCorrCoef=0.801530122756958, Accuracy=0.8579999804496765, F1Score=0.8632544279098511


Training epoch [3/10]: 100%|██████████| 282/282 [00:40<00:00,  6.96it/s]
Validation epoch [3/10]: 100%|██████████| 32/32 [00:01<00:00, 24.06it/s]


epoch3:SpearmanCorrCoef=0.7879534363746643, Accuracy=0.8579999804496765, F1Score=0.8528547286987305


Training epoch [4/10]: 100%|██████████| 282/282 [00:39<00:00,  7.13it/s]
Validation epoch [4/10]: 100%|██████████| 32/32 [00:01<00:00, 24.51it/s]


epoch4:SpearmanCorrCoef=0.7884798049926758, Accuracy=0.8659999966621399, F1Score=0.8664815425872803


Training epoch [5/10]: 100%|██████████| 282/282 [00:40<00:00,  7.01it/s]
Validation epoch [5/10]: 100%|██████████| 32/32 [00:01<00:00, 22.41it/s]


epoch5:SpearmanCorrCoef=0.787960946559906, Accuracy=0.8659999966621399, F1Score=0.8697340488433838


Training epoch [6/10]: 100%|██████████| 282/282 [00:39<00:00,  7.08it/s]
Validation epoch [6/10]: 100%|██████████| 32/32 [00:01<00:00, 23.73it/s]


epoch6:SpearmanCorrCoef=0.8096140623092651, Accuracy=0.8679999709129333, F1Score=0.8638327121734619


Training epoch [7/10]: 100%|██████████| 282/282 [00:39<00:00,  7.08it/s]
Validation epoch [7/10]: 100%|██████████| 32/32 [00:01<00:00, 24.62it/s]


epoch7:SpearmanCorrCoef=0.8191474080085754, Accuracy=0.8640000224113464, F1Score=0.8567255735397339


Training epoch [8/10]: 100%|██████████| 282/282 [00:40<00:00,  6.99it/s]
Validation epoch [8/10]: 100%|██████████| 32/32 [00:01<00:00, 21.98it/s]


epoch8:SpearmanCorrCoef=0.7980791926383972, Accuracy=0.8560000061988831, F1Score=0.8573348522186279


Training epoch [9/10]: 100%|██████████| 282/282 [00:39<00:00,  7.10it/s]
Validation epoch [9/10]: 100%|██████████| 32/32 [00:01<00:00, 24.69it/s]


epoch9:SpearmanCorrCoef=0.777121365070343, Accuracy=0.8759999871253967, F1Score=0.872044563293457


Training epoch [10/10]: 100%|██████████| 282/282 [00:40<00:00,  7.04it/s]
Validation epoch [10/10]: 100%|██████████| 32/32 [00:01<00:00, 23.85it/s]


epoch10:SpearmanCorrCoef=0.8201460242271423, Accuracy=0.8799999952316284, F1Score=0.8757357597351074


For test set predictions, you can write perform evaluation simlar to #TODO5.

In [14]:
model = torch.load('saved_models/ep10.ckpt')
pbar = tqdm(dl_test)
model.eval()
relatedness_pred_all,entailment_pred_all = torch.tensor([]),torch.tensor([])
relatedness_all,entailment_all = torch.tensor([]),torch.tensor([])
for input , relatedness , entailment in pbar:
  input = input.to(device)
  input = input.long()
  relatedness_pred,entailment_pred = model(input = input)
  relatedness_pred,entailment_pred = relatedness_pred.detach().cpu(),entailment_pred.detach().cpu()
  relatedness_pred = relatedness_pred.squeeze(-1)
  relatedness_pred_all = torch.cat((relatedness_pred_all,relatedness_pred),dim=0)
  entailment_pred_all = torch.cat((entailment_pred_all,entailment_pred),dim=0)
  relatedness_all = torch.cat((relatedness_all,relatedness),dim=0)
  entailment_all = torch.cat((entailment_all,entailment),dim=0)

spc_all = spc(relatedness_pred_all,relatedness_all)
acc_all = acc(entailment_pred_all,entailment_all)
f1_all = f1(entailment_pred_all,entailment_all)

print(f"test:SpearmanCorrCoef={spc_all}, Accuracy={acc_all}, F1Score={f1_all}")

100%|██████████| 308/308 [00:12<00:00, 24.65it/s]

test:SpearmanCorrCoef=0.8141725063323975, Accuracy=0.8688857555389404, F1Score=0.8611698150634766



