In [1]:
import os
import json
from functools import partial
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import BertTokenizer
from transformers import BertModel
from sklearn.metrics import classification_report

In [2]:
from dataset import Dataset_custom, custom_collate_fn

In [3]:
PRETAINED_MODEL_NAME = 'bert-base-multilingual-cased'
tokenizer = BertTokenizer.from_pretrained(PRETAINED_MODEL_NAME)

In [4]:
PATH_dir = '../preprodata'

PATH_train = os.path.join(PATH_dir, 'prepro_ratings_train.json')
PATH_test = os.path.join(PATH_dir, 'prepro_ratings_test.json')

In [5]:
dataset_train = Dataset_custom(PATH_train)
dataset_test = Dataset_custom(PATH_test)

In [6]:
print(
		len(dataset_train),
		len(dataset_test)
)

150000 50000


In [7]:
batch_size = 64
partial_collate_fn = partial(custom_collate_fn, tokenizer)

In [8]:
dataloader_train = DataLoader(
    dataset_train,
    batch_size=batch_size,
    shuffle=True,
    collate_fn=partial_collate_fn
)
dataloader_test = DataLoader(
    dataset_test,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=partial_collate_fn
)

In [9]:
class prediction_Model(nn.Module):

    def __init__(self):
        super().__init__()
        self.bert = BertModel.from_pretrained(PRETAINED_MODEL_NAME)
        self.dropout = nn.Dropout(p = 0.1)
        self.linear = nn.Linear(768, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self,**kargs):
        output = self.bert(**kargs)
        output = self.dropout(output['pooler_output'])
        output = self.linear(output)
        output = self.sigmoid(output)
        return output

In [10]:
model = prediction_Model()
CELoss = nn.BCELoss()
optimizer = AdamW(model.parameters(), lr=1.0e-5)

Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [11]:
model.cuda()
device = model.bert.device
device

device(type='cuda', index=0)

In [12]:
train_epoch = 5


In [18]:
model.train()

for epoch in range(train_epoch):

    for iteration, batch in enumerate(dataloader_train):
        batch_inputs = {k: v.cuda(device) for k, v in list(batch[0].items())}
        batch_labels = batch[1].cuda(device)

        output = model(**batch_inputs)

        loss = CELoss(output.view(-1).to(torch.float32), batch_labels.view(-1).to(torch.float32))
        # loss = CELoss(output.view(-1, output.size(-1)), batch_labels.view(-1))

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()

        if (iteration + 1) % 10 == 0:
            print(f'epoch - {epoch}: {iteration + 1} - loss: {loss.item()}')

    print(f'epoch {epoch} END')
  

epoch - 0: 10 - loss: 0.6645258665084839
epoch - 0: 20 - loss: 0.6787600517272949
epoch - 0: 30 - loss: 0.6616678833961487
epoch - 0: 40 - loss: 0.5345892906188965
epoch - 0: 50 - loss: 0.583198070526123
epoch - 0: 60 - loss: 0.5818599462509155
epoch - 0: 70 - loss: 0.518320620059967
epoch - 0: 80 - loss: 0.5080231428146362
epoch - 0: 90 - loss: 0.5195685029029846
epoch - 0: 100 - loss: 0.4851904511451721
epoch - 0: 110 - loss: 0.5854507684707642
epoch - 0: 120 - loss: 0.4781477451324463
epoch - 0: 130 - loss: 0.47347649931907654
epoch - 0: 140 - loss: 0.564857006072998
epoch - 0: 150 - loss: 0.4814494252204895
epoch - 0: 160 - loss: 0.637811541557312
epoch - 0: 170 - loss: 0.44735080003738403
epoch - 0: 180 - loss: 0.47155559062957764
epoch - 0: 190 - loss: 0.540716290473938
epoch - 0: 200 - loss: 0.4722813069820404
epoch - 0: 210 - loss: 0.5578353404998779
epoch - 0: 220 - loss: 0.4866643249988556
epoch - 0: 230 - loss: 0.5162850618362427
epoch - 0: 240 - loss: 0.351921021938324
epoc

In [19]:
# Save model_state_dict
torch.save(model.state_dict(), '../model/model_state_dict.pt')

In [13]:
# # Load model_state_dict
# model.load_state_dict(torch.load('../model/model_state_dict.pt'))

<All keys matched successfully>

In [15]:
model.eval()

gold_list = []
pred_list = []

with torch.no_grad():
    for iteration, batch in enumerate(dataloader_test):
        batch_inputs = {k: v.cuda(device) for k, v in list(batch[0].items())}
        batch_labels = batch[1].cuda(device)
        
        output = model(**batch_inputs)
        loss = CELoss(output.view(-1).to(torch.float32), batch_labels.view(-1).to(torch.float32))
        
        print('loss:', loss.item())
        
        for gold, pred in zip(batch_labels, output):
            pred = torch.round(pred)
            # pred = pred.to(torch.int32)

            gold_list.append(gold)
            pred_list.append(pred)
            
            print(gold)
            print(pred)

loss: 0.3538481295108795
tensor([1], device='cuda:0')
tensor([1.], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1.], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1.], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1.], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1.], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1.], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1.], device='cuda:0')
tensor([0], device='cuda:0')
tensor([1.], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1.], device='cuda:0')
tensor([1], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([0], device='cuda:0')
tensor([0.], device='cuda:0')
tensor([1], device='cuda:0')
tensor([1.], device='cuda:0')
tensor([1], device='cuda:0')
te

In [16]:
gold_list_flat = []
pred_list_flat = []
for g, p in zip(gold_list, pred_list):
    gold_list_flat.append(g.item())
    pred_list_flat.append(p.item())

In [17]:
print(classification_report(gold_list_flat, pred_list_flat))


              precision    recall  f1-score   support

           0       0.89      0.83      0.86     24827
           1       0.84      0.90      0.87     25173

    accuracy                           0.87     50000
   macro avg       0.87      0.87      0.87     50000
weighted avg       0.87      0.87      0.87     50000

