In [1]:
class Configs:
    def __init__(self):
        self.manifest_file = "total_am.txt"
        self.labels_path = "aihub_labels.csv"
        self.train_ratio = 0.8
        self.num_workers = 4
        self.batch_size = 64
        self.sample_mode = 'random' #'smart'
        self.teacher_forcing_ratio = 0.0
        
        self.num_classes = 2001
        self.d_model = 512
        self.d_ff = 2048
        self.num_heads = 4
        self.num_layers = 3
        self.model_name = "BERT"
        
configs = Configs()

In [2]:
from Tokenizer import Tokenizer
from data_module import DataModule


tokenizer = Tokenizer(label_file=configs.labels_path)
data_module = DataModule(configs, tokenizer)
train_dataloader = data_module.get_dl("train")
valid_dataloader = data_module.get_dl("valid")

In [5]:
from Model import Transformer_LM

model = Transformer_LM(
    num_classes=configs.num_classes,
    d_model=configs.d_model,
    d_ff=configs.d_ff,
    num_heads=configs.num_heads,
    num_layers=configs.num_layers,
    model=configs.model_name
)

In [6]:
model = model.cuda()

In [7]:
from criterion import CrossEntropyLoss, Perplexity
from torch.optim import Adam

Loss = CrossEntropyLoss(tokenizer)
optimizer = Adam(model.parameters(), lr=1e-4)

In [8]:
from torch.utils.tensorboard import SummaryWriter
import torch


writer = SummaryWriter('runs/bert')

for iteration, (inputs, seq_lengths, targets) in enumerate(train_dataloader):
    inputs = inputs.cuda()
    targets = targets.cuda()
    optimizer.zero_grad()
    logits, preds = model(inputs, seq_lengths)
    loss = Loss(logits, targets)
    perplexity = torch.exp(loss)
    writer.add_scalar("train_loss", loss, iteration)
    writer.add_scalar("train_perplexity", perplexity, iteration)
    
    loss.backward()
    optimizer.step()
    
    if iteration % 1000 == 0 and iteration != 0:
        VAL_LOSS = 0
        val_iter = 0
        for i, (val_inputs, val_lengths, val_targets) in enumerate(valid_dataloader):
            if i > 100:
                break
            val_inputs = val_inputs.cuda()
            val_targets = val_targets.cuda()
            with torch.no_grad():
                logits, preds = model(val_inputs, val_lengths)
            val_loss = Loss(logits, val_targets)
            VAL_LOSS += val_loss
            val_iter += 1
        validation_loss = VAL_LOSS/val_iter
        validation_perplexity = torch.exp(validation_loss)
        writer.add_scalar("validation_loss", validation_loss, iteration)
        writer.add_scalar("validation_perplexity", validation_perplexity, iteration)
            
writer.close()

In [9]:
torch.save(model.state_dict(), "bert.pt")

In [16]:
# Test Bert Model
import random

sample = "이 좋은 날씨에 난 사무실에 앉아서 뭘 하는거람"

tokenizer.idx2char[2000] = "<mask>"

mask_indexes = random.sample(range(len(sample)), 3)

#inputs = tokenizer.encode(sample)
inputs = [tokenizer.sos_token] + tokenizer.encode(sample) + [tokenizer.eos_token]
for idx in mask_indexes:
    inputs[idx] = 2000
    
print(tokenizer.decode(inputs))

input_length = [len(inputs)]

inputs = torch.Tensor(inputs).unsqueeze(0).int()

inputs = inputs.cuda()

logits, preds = model(inputs, input_length)

for idx in mask_indexes:
    # <sos> 빼고
    inputs[0][idx] = preds[0][idx]
    
result = tokenizer.decode(inputs[0])
print(result)

<sos>이 좋은 날씨<mask> 난 사무<mask>에 앉아서 <mask> 하는거람<eos>
<sos>이 좋은 날씨에 난 사무진에 앉아서 안 하는거람<eos>
