## 모듈 설정

In [1]:
import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler, TensorDataset
from transformers import BertTokenizer, BertModel, AdamW
from torch.nn import BCELoss, Sigmoid
import torch.nn.functional as F

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'
print(device)



cuda


## tokenizer, model, optimizer, loss

In [2]:
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
model = BertModel.from_pretrained('bert-base-multilingual-cased')
optimizer = AdamW(model.parameters(), lr=1e-5)
bceloss = torch.nn.BCELoss()
sigmoid = torch.nn.Sigmoid()
bceloss.to(device)
sigmoid.to(device)

Sigmoid()

## batch_sentences & Tokenizing

In [3]:
batch_setences = ["나는 기분이 안좋다", "나는 기분이 별로다", "난 기분이 좋다", "난 기쁘다", "나는 느낌이 좋지 않아"]
encoded_inputs = tokenizer(batch_setences, return_tensors='pt', padding=True, truncation=True)
print(encoded_inputs)

{'input_ids': tensor([[   101, 100585,   8932,  37712,  10739,   9521, 119214,  11903,    102,
              0],
        [   101, 100585,   8932,  37712,  10739,   9353,  11261,  11903,    102,
              0],
        [   101,   8984,   8932,  37712,  10739,   9685,  11903,    102,      0,
              0],
        [   101,   8984,   8932, 119022,  11903,    102,      0,      0,      0,
              0],
        [   101, 100585,   9041, 118713,  10739,   9685,  12508,   9523,  16985,
            102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
        [1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}


In [4]:
for ids in encoded_inputs["input_ids"]:
    print(tokenizer.decode(ids))

[CLS] 나는 기분이 안좋다 [SEP] [PAD]
[CLS] 나는 기분이 별로다 [SEP] [PAD]
[CLS] 난 기분이 좋다 [SEP] [PAD] [PAD]
[CLS] 난 기쁘다 [SEP] [PAD] [PAD] [PAD] [PAD]
[CLS] 나는 느낌이 좋지 않아 [SEP]


## model to cuda & train

In [5]:
model.to(device)

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(119547, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
         

In [6]:
model.train()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(119547, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
         

## Binary Classification Linear Layer 생성

In [7]:
test = torch.nn.Linear(768, 1)
test.to(device)

Linear(in_features=768, out_features=1, bias=True)

## input_ids, attention_mask, labels

In [8]:
input_ids = encoded_inputs['input_ids']
attention_mask = encoded_inputs['attention_mask']

In [9]:
labels_generated = [[0], [0], [1], [1], [0]]
labels = torch.tensor(labels_generated)
print(labels.shape)

torch.Size([5, 1])


In [10]:
print(type(labels))
print(type(input_ids))
print(type(attention_mask))

print(labels.shape)
print(input_ids.shape)
print(attention_mask.shape)

<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
torch.Size([5, 1])
torch.Size([5, 10])
torch.Size([5, 10])


## train_loader, Dataloader

In [11]:
batch_data = []
batch_data.append([torch.tensor(encoded_inputs['input_ids']), torch.tensor(encoded_inputs['attention_mask']), torch.tensor(labels)])
print(batch_data)

[[tensor([[   101, 100585,   8932,  37712,  10739,   9521, 119214,  11903,    102,
              0],
        [   101, 100585,   8932,  37712,  10739,   9353,  11261,  11903,    102,
              0],
        [   101,   8984,   8932,  37712,  10739,   9685,  11903,    102,      0,
              0],
        [   101,   8984,   8932, 119022,  11903,    102,      0,      0,      0,
              0],
        [   101, 100585,   9041, 118713,  10739,   9685,  12508,   9523,  16985,
            102]]), tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0],
        [1, 1, 1, 1, 1, 1, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]), tensor([[0],
        [0],
        [1],
        [1],
        [0]])]]


  


In [12]:
prediction_data = TensorDataset(input_ids, attention_mask, labels)

In [13]:
train_loader = DataLoader(prediction_data, batch_size=1, shuffle=True)

In [14]:
for batch in train_loader:
    print(batch)

[tensor([[   101,   8984,   8932, 119022,  11903,    102,      0,      0,      0,
              0]]), tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0, 0]]), tensor([[1]])]
[tensor([[   101, 100585,   8932,  37712,  10739,   9353,  11261,  11903,    102,
              0]]), tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0]]), tensor([[0]])]
[tensor([[   101, 100585,   9041, 118713,  10739,   9685,  12508,   9523,  16985,
            102]]), tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]), tensor([[0]])]
[tensor([[  101,  8984,  8932, 37712, 10739,  9685, 11903,   102,     0,     0]]), tensor([[1, 1, 1, 1, 1, 1, 1, 1, 0, 0]]), tensor([[1]])]
[tensor([[   101, 100585,   8932,  37712,  10739,   9521, 119214,  11903,    102,
              0]]), tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 0]]), tensor([[0]])]


## model output (train mode)

In [15]:
print(input_ids.shape)
print(attention_mask.shape)
print(labels.shape)

torch.Size([5, 10])
torch.Size([5, 10])
torch.Size([5, 1])


In [16]:
for epoch_i in range(0, 10):
    for batch in train_loader:

        input_ids = batch[0].to(device, dtype = torch.long)
        attention_mask = batch[1].to(device, dtype = torch.long)
        labels = batch[2].to(device, dtype = torch.float)
        
        outputs = model(input_ids.view(1,-1), attention_mask)
        sequential, pooled = outputs
        output_after_linear = test(outputs[1])
        output_sig = sigmoid(output_after_linear)
        
        loss = bceloss(output_sig, labels)
        print(loss)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

tensor(0.6427, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.5789, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.8806, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.8625, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.8467, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.8049, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6484, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.7338, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.5896, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.7107, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6100, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6067, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6669, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6306, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.6475, devic

In [21]:
print(outputs[0].shape)
print(outputs[1].shape)
print(output_sig)

torch.Size([1, 10, 768])
torch.Size([1, 768])
tensor([[0.0727]], device='cuda:0', grad_fn=<SigmoidBackward>)


## eval mode

In [18]:
model.eval()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(119547, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
         

In [19]:
eval_loader = DataLoader(prediction_data, batch_size=1, shuffle=True)

In [20]:
for batch in train_loader:
    input_ids = batch[0].to(device, dtype = torch.long)
    attention_mask = batch[1].to(device, dtype = torch.long)
    labels = batch[2].to(device, dtype = torch.float)

    with torch.no_grad():
        outputs = model(input_ids.view(1,-1), attention_mask)
        loss, logits = outputs
        output_after_linear = test(outputs[1])
        output_sig = sigmoid(output_after_linear)

        loss = bceloss(output_sig, labels)
        print(loss)

tensor(0.1063, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0767, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0806, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.1072, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.0755, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
