In [1]:
!export CUDA_VISIBLE_DEVICES=2
%set_env CUDA_VISIBLE_DEVICES=2

env: CUDA_VISIBLE_DEVICES=2


In [8]:
import os
import torch
import pickle
import random
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertModel, BertConfig
from torch.optim import Adam
from sklearn.preprocessing import StandardScaler
from torch.nn.utils import clip_grad_value_
from tqdm import tqdm
from data.dataloader import CustomDataset
from model.multi_bert import multiBert
from data.scale import get_scaled_down_scores, separate_and_rescale_attributes_for_scoring
from utils.evaluate import evaluation



In [12]:
train_dataset = CustomDataset(f'/home/tsaibw/Multi_scale/dataset/new_train/encode_prompt_1.pkl')
eval_dataset = CustomDataset(f'/home/tsaibw/Multi_scale/dataset/new_train/encode_prompt_1.pkl')

train_loader = DataLoader(train_dataset, batch_size=6, shuffle=False, num_workers=4)
eval_loader = DataLoader(eval_dataset, batch_size=6, shuffle=False, num_workers=4)


In [19]:

print(len(train_dataset[0]['chunked_documents'][0]))
print(len(train_dataset[1]))
# print(train_loader[0])

17
7


In [24]:
for batch in train_loader:
    for key in batch.keys():
        value = batch[key]
        if isinstance(value, torch.Tensor):
            print(f"{key}: {value.shape}")  # 打印張量的形狀
        else:
            print(f"{key}: {type(value)}")  # 如果不是張量，打印類型

    print(batch['chunked_documents'][0].shape)
    print(batch['chunked_documents'][1].shape)
    print(batch['lengths'][0].shape)
    break  # 測試一個批次即可prompt_id: torch.Size([10]) 

prompt_id: torch.Size([6])
document_single: torch.Size([6, 3, 3, 512])
chunked_documents: <class 'list'>
lengths: <class 'list'>
hand_craft: torch.Size([6, 52])
readability: torch.Size([6, 34])
scaled_score: torch.Size([6, 9])
torch.Size([6, 17, 3, 90])
torch.Size([6, 53, 3, 30])
torch.Size([6])


In [3]:

torch.manual_seed(11)

class NerConfig:
    def __init__(self):
        self.lr = 1e-3
        self.epoch = 10
        self.batch_size = 1
        self.device = "cuda"
        # self.chunk_sizes = [90]
        self.chunk_sizes = [90, 30, 130, 10]
        self.data_file = "/home/tsaibw/Multi_scale/ckps/only_score"
args = NerConfig()

In [7]:
# train normalize

def save_checkpoint(state, filename="checkpoint.pth.tar"):
    torch.save(state, filename)


def print_gradients(model):
    for name, parameter in model.named_parameters():
        if parameter.grad is not None:
            print(f"{name} - Gradient Norm: {parameter.grad.norm().item()}")
        else:
            print(f"{name} - No gradient")


for i in range(1,9):
    multi_bert_model = multiBert(args.chunk_sizes)  
    multi_bert_model.to(args.device)  
    optimizer = Adam(multi_bert_model.parameters(), lr = args.lr) 
    
    train_dataset = CustomDataset(f'/home/tsaibw/data/nas07/PersonalData/tsaibw/ASAP/dataset/train/encode_prompt_{i}.pkl')
    eval_dataset = CustomDataset(f'/home/tsaibw/data/nas07/PersonalData/tsaibw/ASAP/dataset/test/encode_prompt_{i}.pkl')
    
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4)
    eval_loader = DataLoader(eval_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4)

    train_loss_list , eval_loss_list = [] ,[] 
    os.makedirs(f"{args.data_file}/prompt{i}", exist_ok=True)
    
    for epoch in range(args.epoch):
        multi_bert_model.train()
        total_loss = 0

        for document_single, chunked_documents, label, id_, lengths in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{args.epoch}"):
            document_single = document_single.to(args.device)
            optimizer.zero_grad()
            
            predictions = multi_bert_model(
                    document_single=document_single,
                    chunked_documents=chunked_documents,
                    device=args.device,
                    lengths=lengths,

            )
            
            loss, inverse_predictions, inverse_labels = multi_bert_model.compute_loss(predictions, label, id_, args.device)
            total_loss += loss.item()

            loss.backward()
            optimizer.step()
        
        eval_loss, qwk_score, pearson_score = multi_bert_model.evaluate(eval_loader, device = args.device)
        
        print(f"Epoch {epoch}, Train Loss: {total_loss / len(train_loader)}")
        print(f"Test Loss: {eval_loss}")
        train_loss_list.append(total_loss / len(train_loader))
        eval_loss_list.append(eval_loss)

        qwk_path = f"{args.data_file}/prompt{i}/result.txt"
        with open(qwk_path, "a") as f:
            f.write(f"Epoch {epoch + 1}/{args.epoch}, QWK: {qwk_score}, Pearson: {pearson_score}, train_loss: {train_loss_list[-1]}, eval_loss: {eval_loss_list[-1]}\n")
  
        checkpoint_path = f"{args.data_file}/prompt{i}/epoch_{epoch+1}_checkpoint.pth.tar"
        save_checkpoint({
          'epoch': epoch + 1,
          'state_dict': multi_bert_model.state_dict(),
          'optimizer': optimizer.state_dict(),
          'train_loss': total_loss / len(train_loader),
          'eval_loss': eval_loss
        }, filename = checkpoint_path)


Epoch 1/10:   0% 0/8953 [00:00<?, ?it/s]


TypeError: forward() missing 2 required positional arguments: 'readability' and 'hand_craft'

In [24]:
print(train_dataset[3][4])

[1, 3, 1, 10]


In [23]:
a = train_dataset[3][1]
for i in range(4):
    print(len(a[i]))


17
53
12
184


In [25]:
print(a[1].shape)
print(a[1])
print(a[1][:3,0])
print(a[1][:3,0].shape)

torch.Size([53, 3, 30])
tensor([[[ 101, 1000, 2108,  ..., 2487, 1012,  102],
         [   0,    0,    0,  ...,    0,    0,    0],
         [   1,    1,    1,  ...,    1,    1,    1]],

        [[ 101, 2033, 1998,  ..., 2165, 1037,  102],
         [   0,    0,    0,  ...,    0,    0,    0],
         [   1,    1,    1,  ...,    1,    1,    1]],

        [[ 101, 2096, 2077,  ...,    0,    0,    0],
         [   0,    0,    0,  ...,    0,    0,    0],
         [   1,    1,    1,  ...,    0,    0,    0]],

        ...,

        [[ 101,  102,    0,  ...,    0,    0,    0],
         [   0,    0,    0,  ...,    0,    0,    0],
         [   1,    1,    0,  ...,    0,    0,    0]],

        [[ 101,  102,    0,  ...,    0,    0,    0],
         [   0,    0,    0,  ...,    0,    0,    0],
         [   1,    1,    0,  ...,    0,    0,    0]],

        [[ 101,  102,    0,  ...,    0,    0,    0],
         [   0,    0,    0,  ...,    0,    0,    0],
         [   1,    1,    0,  ...,    0,    0,    0]

In [36]:
import torch

# 模擬輸入特徵 (batch_size=2, seq_len=3, hidden_dim=4)
h = torch.tensor([
    [[1.0, 2.0, 3.0, 4.0],  # 第一個樣本的第1時間步
     [5.0, 6.0, 7.0, 8.0],  # 第一個樣本的第2時間步
     [9.0, 10.0, 11.0, 12.0]],  # 第一個樣本的第3時間步

    [[2.0, 3.0, 4.0, 5.0],  # 第二個樣本的第1時間步
     [6.0, 7.0, 8.0, 9.0],  # 第二個樣本的第2時間步
     [10.0, 11.0, 12.0, 13.0]]  # 第二個樣本的第3時間步
])  # shape: [2, 3, 4]

# 模擬注意力權重 (batch_size=2, seq_len=3, 1)
weight = torch.tensor([
    [[0.1],  # 第一個樣本的第1時間步的權重
     [0.5],  # 第一個樣本的第2時間步的權重
     [0.4]],  # 第一個樣本的第3時間步的權重

    [[0.2],  # 第二個樣本的第1時間步的權重
     [0.3],  # 第二個樣本的第2時間步的權重
     [0.5]]  # 第二個樣本的第3時間步的權重
])  # shape: [2, 3, 1]
expanded_weight = weight.repeat(1, 2, h.size(2))  # 在最後一個維度重複 hidden_dim 次


In [40]:
print(h.shape)
print(h[0].shape)


torch.Size([2, 3, 4])
torch.Size([3, 4])


In [37]:
print(weight.shape)
print(expanded_weight.shape)
print(expanded_weight)
print(h.shape)

torch.Size([2, 3, 1])
torch.Size([2, 6, 4])
tensor([[[0.1000, 0.1000, 0.1000, 0.1000],
         [0.5000, 0.5000, 0.5000, 0.5000],
         [0.4000, 0.4000, 0.4000, 0.4000],
         [0.1000, 0.1000, 0.1000, 0.1000],
         [0.5000, 0.5000, 0.5000, 0.5000],
         [0.4000, 0.4000, 0.4000, 0.4000]],

        [[0.2000, 0.2000, 0.2000, 0.2000],
         [0.3000, 0.3000, 0.3000, 0.3000],
         [0.5000, 0.5000, 0.5000, 0.5000],
         [0.2000, 0.2000, 0.2000, 0.2000],
         [0.3000, 0.3000, 0.3000, 0.3000],
         [0.5000, 0.5000, 0.5000, 0.5000]]])
torch.Size([2, 3, 4])


In [31]:
out = torch.mul(h, expanded_weight)
print(out.shape)
print(out)

torch.Size([2, 3, 4])
tensor([[[0.1000, 0.2000, 0.3000, 0.4000],
         [2.5000, 3.0000, 3.5000, 4.0000],
         [3.6000, 4.0000, 4.4000, 4.8000]],

        [[0.4000, 0.6000, 0.8000, 1.0000],
         [1.8000, 2.1000, 2.4000, 2.7000],
         [5.0000, 5.5000, 6.0000, 6.5000]]])


In [33]:
out = h * weight
print(out)

tensor([[[0.1000, 0.2000, 0.3000, 0.4000],
         [2.5000, 3.0000, 3.5000, 4.0000],
         [3.6000, 4.0000, 4.4000, 4.8000]],

        [[0.4000, 0.6000, 0.8000, 1.0000],
         [1.8000, 2.1000, 2.4000, 2.7000],
         [5.0000, 5.5000, 6.0000, 6.5000]]])
