In [1]:
from utils import configuration
from utils.utils import set_seeds
import trainer
from load_data import load_data

import torch.optim as optim
import torch.nn as nn
import torch

from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
)



In [2]:
cfg = 'config/uda.json'
model_cfg = 'config/bert_base.json'
cfg = configuration.params.from_json(cfg)
model_cfg = configuration.model.from_json(model_cfg)
set_seeds(cfg.seed)


In [3]:
# Load Data & Create Criterion
data = load_data(cfg)
if cfg.uda_mode:
    unsup_criterion = nn.KLDivLoss(reduction='none')
    data_iter = [data.sup_data_iter(), data.unsup_data_iter()] if cfg.mode == 'train' \
        else [data.sup_data_iter(), data.unsup_data_iter(), data.eval_data_iter()]  # train_eval
else:
    raise NotImplemented


In [4]:
# Load model
config = AutoConfig.from_pretrained(
    model_cfg.model_name_or_path,
    num_labels=model_cfg.num_labels
)

model = AutoModelForSequenceClassification.from_config(config=config)


In [13]:
# train
optimizer = optim.Adam(model.parameters(), lr=cfg.lr)
criterion = nn.CrossEntropyLoss()
device = torch.device('cuda')

# model을 gpu에 올린다.
#model = model.to(device)
model = AutoModelForSequenceClassification.from_config(config=config)


In [14]:
## 학습
# loss 기록
losses = []

# 학습 시작
for step, batch in enumerate(data_iter[0]):
    # 텐서로 바꿔준다. 데이터 종류에 따른  dtype을 다르게 한다
    #input_ids, input_mask, input_type_ids, labels = [t.to(device) for t in batch]
    input_ids, input_mask, input_type_ids, labels = batch
    outputs = model(input_ids, input_mask, input_type_ids)    
    loss = criterion(outputs.logits, labels)
    #print(loss.dtype)
    losses.append(loss)

    # backpropagation
    loss.backward()
    optimizer.step()

    # logging



In [17]:
# 학습 시작
losses = []
criterion = nn.KLDivLoss(reduction='batchmean')
for step, batch in enumerate(data_iter[1]):
    # 텐서로 바꿔준다. 데이터 종류에 따른  dtype을 다르게 한다
    if step == 2:
        break
    #ori_input_ids, ori_input_mask, ori_input_type_ids, \
    #aug_input_ids, aug_input_mask, aug_input_type_ids = [t.to(device) for t in batch]
    ori_input_ids, ori_input_mask, ori_input_type_ids, \
    aug_input_ids, aug_input_mask, aug_input_type_ids = batch
    ori_outputs = model(ori_input_ids, ori_input_mask, ori_input_type_ids)
    aug_outputs = model(aug_input_ids, aug_input_mask, aug_input_type_ids)   
    print(ori_outputs.logits)
    print(torch.argmax(ori_outputs.logits, dim=1))
    print(torch.argmax(aug_outputs.logits, dim=1))
    loss = criterion(ori_outputs.logits, aug_outputs.logits)
    print(loss)
    losses.append(loss)

    # backpropagation
    loss.backward()
    optimizer.step()

    # logging


tensor([[ 0.1113, -0.2415],
        [-0.1140,  0.0909],
        [-0.2046, -0.1474],
        [-0.5358, -0.2203],
        [-0.2453, -0.3304],
        [-0.0010, -0.3264]], grad_fn=<AddmmBackward>)
tensor([0, 1, 1, 1, 0, 0])
tensor([1, 1, 1, 1, 1, 1])
tensor(0., grad_fn=<DivBackward0>)
tensor([[-0.1298,  0.0715],
        [-0.5477, -0.1378],
        [-0.2407,  0.0166],
        [-0.5359, -0.1954],
        [-0.3624, -0.1927],
        [-0.3526, -0.1333]], grad_fn=<AddmmBackward>)
tensor([1, 1, 1, 1, 1, 1])
tensor([0, 1, 1, 1, 1, 1])
tensor(-0.0559, grad_fn=<DivBackward0>)


In [None]:
# model save

# model load