In [35]:
from utils import configuration
from utils.utils import *
from load_data import load_data
from trainer import trainer

import torch.optim as optim
import torch.nn as nn
import torch

from transformers import (
    AutoConfig,
    AutoModelForSequenceClassification,
)



In [36]:
cfg = 'config/uda_re.json'
model_cfg = 'config/bert_base.json'
cfg = configuration.params.from_json(cfg)
model_cfg = configuration.model.from_json(model_cfg)
set_seeds(cfg.seed)


In [37]:
data = load_data(cfg)
#sup_train_iter = data.sup_data_iter()
# unzsup_train_iter = data.unsup_data_iter()
sup_test_iter = data.test_data_iter()
# print([len(loader)  for loader in (sup_train_iter, unzsup_train_iter, sup_test_iter)])


In [38]:
# load model
config = AutoConfig.from_pretrained(model_cfg.model_name_or_path,num_labels=model_cfg.num_labels)
model = AutoModelForSequenceClassification.from_config(config=config)
optimizer = optim.Adam(model.parameters(), lr=1e-5)
criterion = nn.CrossEntropyLoss()
device = torch.device('cuda')
model = model.to(device)

## for unsup
# criterion = nn.KLDivLoss(reduction='batchmean', log_target=True)
# LSM = nn.LogSoftmax(dim=1)
#optimizer = optim.Adam(model.parameters(), lr=2e-3)
#model = AutoModelForSequenceClassification.from_config(config=config)

In [39]:
import logging


epochs = 1
model.train()
logging.basicConfig(level=logging.INFO)

for n in range(epochs):
    for step, batch in enumerate(sup_test_iter):
        # end 조건
        if step > cfg.ratio * len(sup_test_iter):
            break

        #  sup data를 device에 담는다.
        sup_input_ids, sup_input_mask, sup_input_type_ids, label_ids = (t.to(device) for t in batch)

        # inputs에 따른 outputs을 낸다
        sup_outputs = model(sup_input_ids, sup_input_mask, sup_input_type_ids)

        sup_loss = criterion(sup_outputs.logits, label_ids)

        # backpropagation
        sup_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # logging
        if step % 10 == 0:
            logging.info(f'Currnent train step: {step}/{int(len(sup_test_iter) * cfg.ratio)}')
            logging.info(f'Currnent sup loss : {sup_loss}')

logging.info('Train end')

INFO:root:Currnent train step: 0/78
INFO:root:Currnent sup loss : 0.6672449707984924
INFO:root:Currnent train step: 10/78
INFO:root:Currnent sup loss : 0.7843418121337891
INFO:root:Currnent train step: 20/78
INFO:root:Currnent sup loss : 0.6474961638450623
INFO:root:Currnent train step: 30/78
INFO:root:Currnent sup loss : 0.6686989068984985
INFO:root:Currnent train step: 40/78
INFO:root:Currnent sup loss : 0.7316548824310303
INFO:root:Currnent train step: 50/78
INFO:root:Currnent sup loss : 0.7148677110671997
INFO:root:Currnent train step: 60/78
INFO:root:Currnent sup loss : 0.7111068367958069
INFO:root:Currnent train step: 70/78
INFO:root:Currnent sup loss : 0.6714945435523987
INFO:root:Train end


In [None]:
#model_load(model, cfg, path='model/')


In [40]:
UDA_trainer = trainer(model, cfg)
data_iter = {'sup_test' : sup_test_iter}
accuracy = UDA_trainer.test(data_iter)

INFO:root:Test start
INFO:root:pred : tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0') labels : tensor([0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0,
        1, 1, 0, 1, 1, 1, 1, 1], device='cuda:0')
INFO:root:Currnent test step: 0/78
INFO:root:Currnent accuracy : 10/32:  0.31
INFO:root:Total accuracy : 10/32:  0.31
INFO:root:pred : tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0') labels : tensor([0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0,
        1, 0, 1, 1, 0, 1, 0, 0], device='cuda:0')
INFO:root:Currnent test step: 10/78
INFO:root:Currnent accuracy : 16/32:  0.50
INFO:root:Total accuracy : 168/352:  0.48
INFO:root:pred : tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0') labels : tensor([0, 0

In [None]:
a = torch.tensor([1, 2, 3], dtype=float, requires_grad=True)
model.no_grad()
for i, v in enumerate(model.parameters()):
    print(v)
    if i == 1:
        break


In [None]:
# sup train

# sup_data_iter = itertools.cycle(sup_test_iter)
model.train()
epochs = 100
losses = []
for n in range(epochs):
    for steps, batch in enumerate(sup_train_iter):
        inputs = (t.to(device) for t in batch[:3])
        labels = batch[-1].to(device)
        outputs = model(*inputs)
        loss = criterion(outputs.logits, labels)
        print(f'steps : {n * len(sup_train_iter) + steps + 1} / {epochs * len(sup_train_iter)} loss : {loss}')

        pred = torch.argmax(outputs.logits, dim = 1)
        print(f'pred {pred} label {labels}')
        print(f'result : {torch.sum(pred == labels, dim = 0)}/ {len(labels)}')
        loss.backward()
        losses.append(loss)
        optimizer.step()
        optimizer.zero_grad()

In [None]:
PATH = "model/train.pt"
torch.save(model.state_dict(), PATH)

In [None]:
PATH = "model/train.pt"
model.load_state_dict(torch.load(PATH))

In [None]:
import matplotlib.pyplot as plt

plt.figure(dpi=150)
plt.plot(losses)
plt.title('supervised')
plt.xlabel('steps')
plt.ylabel('loss')
plt.show()
plt.savefig('fig/superviese_losses.png', dpi=3000)



In [None]:
# del labels
# del inputs
# del outputs
# del loss
# del pred
# torch.cuda.empty_cache()

'''
unsup_data_iter = itertools.cycle(sup_train_iter)
for steps, batch in enumerate(unsup_data_iter):
    ori_outputs = model(*batch[:3])
    aug_outputs = model(*batch[3:])
    loss = criterion(LSM(aug_outputs.logits), LSM(ori_outputs.logits))
    print(f'steps : {steps} loss : {loss}')
    ori_pred = torch.argmax(ori_outputs.logits, dim = 1)
    aug_pred = torch.argmax(aug_outputs.logits, dim = 1)
    print(f'ori_pred {ori_pred} aug_pred {aug_pred}')
    loss.backward()
    optimizer.step()
'''

In [None]:
import torch

device1 = torch.device('cuda')
print(device1)

In [None]:
t = torch.cuda.get_device_properties(0).total_memory
r = torch.cuda.memory_reserved(0)
a = torch.cuda.memory_allocated(0)
f = r-a  # free inside reserved
print(t,r,a,f)

In [None]:
def sharpening_prediction(i : torch.Tensor, temperature:float, log : bool = True):
    i = i / temperature
    if log:
        f = torch.nn.LogSoftmax(dim=1)
    else:
        f = torch.nn.Softmax(dim=1)
    return f(i)

def confidence_based_masking(x: torch.Tensor, beta:float):
    f = torch.nn.LogSoftmax(dim=1)
    y = f(x)
    maxPs, _ = torch.max(y, dim = 1)
    
    pass

In [None]:
import torch

a = torch.rand((10, 2)) * 10
b = sharpening_prediction(a, 0.5)
print(a, b, end = '')
max_b, _ = torch.max(b, dim=1)
uses_batch_id = max_b > 0.5
print(uses_batch_id)

In [None]:
import torch
import math

a = torch.tensor([1, 2, 3, 4, 5])
print(a.data)
print(a)
print(math.log(0.5))
print(math.log(0.8))