In [None]:
# import pickle as pickle
# import os
# import pandas as pd
# import torch
# from sklearn.metrics import accuracy_score
# from transformers import *
# from load_data import *
# import time
# import numpy as np
# import random MMM

In [None]:
!pwd

In [None]:
import random
import os, sys
from importlib import import_module

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Subset
from torch.optim import SGD, Adam, AdamW
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR
import time
from tqdm import tqdm

import pickle as pickle
import os
import pandas as pd
import torch

from ipywidgets import FloatProgress
from load_data import *
from transformers import AutoTokenizer, BertForSequenceClassification, AutoConfig, AutoModelForSequenceClassification
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm, trange
from sklearn.metrics import classification_report
from catalyst.data.sampler import BalanceClassSampler
import wandb

# import amp

sys.path.append(os.path.abspath('..'))

def seed_everything(seed):
    """
    동일한 조건으로 학습을 할 때, 동일한 결과를 얻기 위해 seed를 고정시킵니다.
    
    Args:
        seed: seed 정수값
    """
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
seed = 73
seed_everything(seed)

In [None]:
# label_path = ''
num_workers = 8

train_data_path = "/opt/ml/input/data/train/train_c.tsv"
# Pstage/0422/nzz_results_bert-base-multilingual-cased08-03/002_accuracy_81.33%.ckpt
model_name = 'xlm-roberta-base'
batch_size = 32
num_classes = 42
# 32/1e-5
# 64/5e-5
num_epochs = 30
lr =1e-5
lr_decay_step = 4

# train_log_interval = 20
# name = model_name

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

In [None]:
  
import math
import torch
from torch.optim.lr_scheduler import _LRScheduler

class CosineAnnealingWarmupRestarts(_LRScheduler):
    """
        optimizer (Optimizer): Wrapped optimizer.
        first_cycle_steps (int): First cycle step size.
        cycle_mult(float): Cycle steps magnification. Default: -1.
        max_lr(float): First cycle's max learning rate. Default: 0.1.
        min_lr(float): Min learning rate. Default: 0.001.
        warmup_steps(int): Linear warmup step size. Default: 0.
        gamma(float): Decrease rate of max learning rate by cycle. Default: 1.
        last_epoch (int): The index of last epoch. Default: -1.
    """
    
    def __init__(self,
                 optimizer : torch.optim.Optimizer,
                 first_cycle_steps : int,
                 cycle_mult : float = 1.,
                 max_lr : float = 0.1,
                 min_lr : float = 0.001,
                 warmup_steps : int = 0,
                 gamma : float = 1.,
                 last_epoch : int = -1
        ):
        assert warmup_steps < first_cycle_steps
        
        self.first_cycle_steps = first_cycle_steps # first cycle step size
        self.cycle_mult = cycle_mult # cycle steps magnification
        self.base_max_lr = max_lr # first max learning rate
        self.max_lr = max_lr # max learning rate in the current cycle
        self.min_lr = min_lr # min learning rate
        self.warmup_steps = warmup_steps # warmup step size
        self.gamma = gamma # decrease rate of max learning rate by cycle
        
        self.cur_cycle_steps = first_cycle_steps # first cycle step size
        self.cycle = 0 # cycle count
        self.step_in_cycle = last_epoch # step size of the current cycle
        
        super(CosineAnnealingWarmupRestarts, self).__init__(optimizer, last_epoch)
        
        # set learning rate min_lr
        self.init_lr()
    
    def init_lr(self):
        self.base_lrs = []
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = self.min_lr
            self.base_lrs.append(self.min_lr)
    
    def get_lr(self):
        if self.step_in_cycle == -1:
            return self.base_lrs
        elif self.step_in_cycle < self.warmup_steps:
            return [(self.max_lr - base_lr)*self.step_in_cycle / self.warmup_steps + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.max_lr - base_lr) \
                    * (1 + math.cos(math.pi * (self.step_in_cycle-self.warmup_steps) \
                                    / (self.cur_cycle_steps - self.warmup_steps))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.step_in_cycle = self.step_in_cycle + 1
            if self.step_in_cycle >= self.cur_cycle_steps:
                self.cycle += 1
                self.step_in_cycle = self.step_in_cycle - self.cur_cycle_steps
                self.cur_cycle_steps = int((self.cur_cycle_steps - self.warmup_steps) * self.cycle_mult) + self.warmup_steps
        else:
            if epoch >= self.first_cycle_steps:
                if self.cycle_mult == 1.:
                    self.step_in_cycle = epoch % self.first_cycle_steps
                    self.cycle = epoch // self.first_cycle_steps
                else:
                    n = int(math.log((epoch / self.first_cycle_steps * (self.cycle_mult - 1) + 1), self.cycle_mult))
                    self.cycle = n
                    self.step_in_cycle = epoch - int(self.first_cycle_steps * (self.cycle_mult ** n - 1) / (self.cycle_mult - 1))
                    self.cur_cycle_steps = self.first_cycle_steps * self.cycle_mult ** (n)
            else:
                self.cur_cycle_steps = self.first_cycle_steps
                self.step_in_cycle = epoch
                
        self.max_lr = self.base_max_lr * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

In [None]:
class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes=42, smoothing=0.50, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            # true_dist = pred.data.clone()
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))

In [None]:
# 0라벨 없이 학습시키는 과정

# loss
# criterion = nn.CrossEntropyLoss()
criterion = LabelSmoothingLoss()

# load model tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({"additional_special_tokens": ["#", "@", '₩', '^']})

# load dataset
train_dataset = load_data(train_data_path)

# train_df, val_df = train_test_split(train_dataset, test_size = 0.2)
train_dataset = train_dataset[train_dataset['label']!= 0]
t2 = train_dataset[train_dataset.label != 40]
t3 = train_dataset[train_dataset.label == 40]
train_df, val_df = train_test_split(t2, test_size = 0.1, stratify = t2.label) # stratify option

train_df = pd.concat([train_df,t3])

train_label = train_df['label'].values
val_label = val_df['label'].values

# tokenizing dataset
tokenized_train = tokenized_dataset(train_df, tokenizer)
tokenized_val = tokenized_dataset(val_df, tokenizer)

# make dataset for pytorch
RE_train_dataset = RE_Dataset(tokenized_train, train_label)
RE_val_dataset = RE_Dataset(tokenized_val, val_label)


# native training using torch
config = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
model = AutoModelForSequenceClassification.from_pretrained(model_name, config= config)
model.to(device)

# upsampling option
# sampler = BalanceClassSampler(RE_train_dataset.get_classes(), 'upsampling')
# train_loader = DataLoader(RE_train_dataset, batch_size = batch_size, sampler = sampler)#, shuffle = True)

# num_workers = 8
# batch_size = 100
train_loader = DataLoader(RE_train_dataset, batch_size = batch_size, shuffle = True)

val_loader = DataLoader(RE_val_dataset, batch_size=batch_size, shuffle = False)

# optimizer
optim = AdamW(model.parameters(), lr = lr)



In [None]:
# scheduler
# scheduler = ReduceLROnPlateau(optim, factor = 0.1, patience = 10)
# scheduler = StepLR(optim, lr_decay_step, gamma=0.5)
scheduler = CosineAnnealingLR(optim, T_max=2, eta_min=0.000000001)
# scheduler = CosineAnnealingWarmupRestarts(optim,
#                                           first_cycle_steps=200,
#                                           cycle_mult=1.0,
#                                           max_lr=0.1,
#                                           min_lr=0.001,
#                                           warmup_steps=50,
#                                           gamma=1.0)

In [None]:
run = wandb.init(project='Relation Extraction', entity = 'hwan17', name = model_name, reinit = False)
wandb.config.update({
    "model_name" : model_name,
    "num_epochs" : num_epochs,
    "batch_size" : batch_size,
    "learning_rate" : lr,
    "lr_decay_step" : lr_decay_step,
    "scheduler" : scheduler,
    "time" : time.strftime('%H-%M', time.localtime(time.time())),
    "seed" : seed,
    'optimizer' : optim,
    'loss_function' : criterion
})

In [None]:
now = time.strftime('%H-%M', time.localtime(time.time()))
out_dir = os.path.join(os.getcwd(), 'results_{}{}'.format(model_name,now))
os.makedirs(out_dir, exist_ok = True)

best_val_acc = 0
best_val_loss = np.inf



for epoch in range(num_epochs*2): #30
    with tqdm(train_loader, total = len(train_loader), unit = 'batch') as train_bar:
        train_loss, train_acc, loss_value = 0,0,0
        model.train()
        for step, batch in enumerate(train_bar , 1):
                
            inputs = {key : value.to(device) for key, value in batch.items() if key != 'labels'}
            
#             input_ids=batch['input_ids'].to(device)
#             attention_mask = batch['attention_mask'].to(device)
#             token_type_ids = batch['token_type_ids'].to(device)

            labels = batch['labels'].to(device)

            outputs = model(**inputs)
            loss = criterion(outputs.logits, labels)

            optim.zero_grad()
#             with amp.scale_loss(loss, optim) as scaled_loss:
#                 scale_loss.backward()
            loss.backward()
            optim.step()

#             correct += (torch.argmax(outputs.logits, dim = 1) == labels).sum().item()
#             acc = correct/ len(batch['input_ids'])
#             loss_value += loss.item()
            preds = torch.argmax(outputs.logits, dim = 1)
    
            correct = (preds == labels).sum().item()
            acc = correct / batch_size
#             epoch_acc = acc

#             epoch_loss = loss.item()
            
#             div_num = batch_size * step if step < num_epochs else len(batch['input_ids'])
#             train_acc = correct/div_num
#             train_loss = loss_value
            scheduler.step()
#             cur_lr = scheduler.get_last_lr()
    
    
            train_bar.set_description(f'Training Epoch [{epoch+1}/{num_epochs}]')
            train_bar.set_postfix(loss=loss.item(), acc = acc*100, lr = lr)
            
            wandb.log({
                "train_acc" : 100.*acc,
                "train_loss" : loss.item()#,
#                 "learning_rate" : cur_lr
            })

        with torch.no_grad():
            print('Calculating validation results')
            model.eval()
            val_loss_items = []
            val_acc_items = []
 
            
            label_list = []
            pred_list = []
            for val_batch in val_loader:
                val_inputs = {key : value.to(device) for key, value in val_batch.items() if key != 'labels'}
                
                
           #                 input_ids=val_batch['input_ids'].to(device)
#                 attention_mask = val_batch['attention_mask'].to(device)
#                 token_type_ids = val_batch['token_type_ids'].to(device)
                labels = val_batch['labels'].to(device)

                outputs = model(**val_inputs)
                preds = torch.argmax(outputs.logits, dim = 1)

                loss_item = criterion(outputs.logits, labels).item()
                acc_item = (labels == preds).sum().item()
                
                pred_cpu = preds.to('cpu')
                label_list.extend(val_batch['labels'])
                pred_list.extend(pred_cpu)

                #classification report
#                 cr = classification_report(pred_cpu,val_batch['labels'])
#                 print(cr)
                wandb.log({
                    "val_preds" : pred_cpu,
                    "val_labels" : val_batch['labels']
#                     "classification_report" : cr               
                })
                
                val_loss_items.append(loss_item)
                val_acc_items.append(acc_item)

            val_loss = np.sum(val_loss_items) / len(val_loader)
            val_acc = np.sum(val_acc_items) / len(RE_val_dataset)
            cr2 = classification_report(label_list,pred_list)
            wandb.log({
                "val_acc" : 100.*val_acc,
                "val_loss" : val_loss,
                "classification_report" : cr2
        #         "preds" : preds.to('cpu')               
            })

#             wandb.log({
#                 "val_acc" : 100.*val_acc,
#                 "val_loss" : val_loss,
#                 "labels" : batch['labels'],
#                 "preds" : preds.to('cpu')               
#             })

            if val_acc > best_val_acc:
                print('New best model')
                best_val_acc = val_acc
                if best_val_acc > 0.8:
                    torch.save(model.state_dict(),out_dir+f'/{epoch:03}_accuracy_{val_acc:4.2%}.ckpt')
            print(f'val_acc : {val_acc}')

#         print('labels')
#         torch.cat(label_list).unique(return_counts = True)
#         print('preds')
#         torch.cat(pred_list).unique(return_counts = True)

In [None]:
# loss
# criterion = nn.CrossEntropyLoss()
criterion = LabelSmoothingLoss()

# load model tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({"additional_special_tokens": ["#", "@", '₩', '^']})

# load dataset
train_dataset = load_data(train_data_path)

# train_df, val_df = train_test_split(train_dataset, test_size = 0.2)
# train_dataset = train_dataset[train_dataset['label']!= 0]
t2 = train_dataset[train_dataset.label != 40]
t3 = train_dataset[train_dataset.label == 40]
train_df, val_df = train_test_split(t2, test_size = 0.1, stratify = t2.label) # stratify option

train_df = pd.concat([train_df,t3])

train_label = train_df['label'].values
val_label = val_df['label'].values

# tokenizing dataset
tokenized_train = tokenized_dataset(train_df, tokenizer)
tokenized_val = tokenized_dataset(val_df, tokenizer)

# make dataset for pytorch
RE_train_dataset = RE_Dataset(tokenized_train, train_label)
RE_val_dataset = RE_Dataset(tokenized_val, val_label)


# native training using torch
# config = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
# model = AutoModelForSequenceClassification.from_pretrained(model_name, config= config)
# model.to(device)

# upsampling option
# sampler = BalanceClassSampler(RE_train_dataset.get_classes(), 'upsampling')
# train_loader = DataLoader(RE_train_dataset, batch_size = batch_size, sampler = sampler)#, shuffle = True)

# num_workers = 8
# batch_size = 100
train_loader = DataLoader(RE_train_dataset, batch_size = batch_size, shuffle = True)

val_loader = DataLoader(RE_val_dataset, batch_size=batch_size, shuffle = False)



config = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
model = AutoModelForSequenceClassification.from_pretrained(model_name, config= config)
# model.to(device)
model.load_state_dict(torch.load('/opt/ml/Pstage/0422/nzz_results_bert-base-multilingual-cased08-03/002_accuracy_81.33%.ckpt'))
model.to(device)
# optimizer
optim = AdamW(model.parameters(), lr = lr)
print()
# scheduler

In [None]:
# run = wandb.init(project='Relation Extraction', entity = 'hwan17', name = model_name, reinit = False)
# wandb.config.update({
#     "model_name" : model_name,
#     "num_epochs" : num_epochs,
#     "batch_size" : batch_size,
#     "learning_rate" : lr,
#     "lr_decay_step" : lr_decay_step,
#     "scheduler" : scheduler,
#     "time" : time.strftime('%H-%M', time.localtime(time.time())),
#     "seed" : seed,
#     'optimizer' : optim,
#     'loss_function' : criterion
# })

# now = time.strftime('%H-%M', time.localtime(time.time()))
# out_dir = os.path.join(os.getcwd(), 'results_{}{}'.format(model_name,now))
# os.makedirs(out_dir, exist_ok = True)

best_val_acc = 0
best_val_loss = np.inf



for epoch in range(num_epochs): #15
    with tqdm(train_loader, total = len(train_loader), unit = 'batch') as train_bar:
        train_loss, train_acc, loss_value = 0,0,0
        model.train()
        for step, batch in enumerate(train_bar , 1):
                
            inputs = {key : value.to(device) for key, value in batch.items() if key != 'labels'}
            
#             input_ids=batch['input_ids'].to(device)
#             attention_mask = batch['attention_mask'].to(device)
#             token_type_ids = batch['token_type_ids'].to(device)

            labels = batch['labels'].to(device)

            outputs = model(**inputs)
            loss = criterion(outputs.logits, labels)

            optim.zero_grad()
#             with amp.scale_loss(loss, optim) as scaled_loss:
#                 scale_loss.backward()
            loss.backward()
            optim.step()

#             correct += (torch.argmax(outputs.logits, dim = 1) == labels).sum().item()
#             acc = correct/ len(batch['input_ids'])
#             loss_value += loss.item()
            preds = torch.argmax(outputs.logits, dim = 1)
    
            correct = (preds == labels).sum().item()
            acc = correct / batch_size
#             epoch_acc = acc

#             epoch_loss = loss.item()
            
#             div_num = batch_size * step if step < num_epochs else len(batch['input_ids'])
#             train_acc = correct/div_num
#             train_loss = loss_value
            scheduler.step()
#             cur_lr = scheduler.get_last_lr()
    
    
            train_bar.set_description(f'Training Epoch [{epoch+1}/{num_epochs}]')
            train_bar.set_postfix(loss=loss.item(), acc = acc*100, lr = lr)
            
            wandb.log({
                "train_acc" : 100.*acc,
                "train_loss" : loss.item()#,
#                 "learning_rate" : cur_lr
            })

        with torch.no_grad():
            print('Calculating validation results')
            model.eval()
            val_loss_items = []
            val_acc_items = []
 
            
            label_list = []
            pred_list = []
            for val_batch in val_loader:
                val_inputs = {key : value.to(device) for key, value in val_batch.items() if key != 'labels'}
                
                
           #                 input_ids=val_batch['input_ids'].to(device)
#                 attention_mask = val_batch['attention_mask'].to(device)
#                 token_type_ids = val_batch['token_type_ids'].to(device)
                labels = val_batch['labels'].to(device)

                outputs = model(**val_inputs)
                preds = torch.argmax(outputs.logits, dim = 1)

                loss_item = criterion(outputs.logits, labels).item()
                acc_item = (labels == preds).sum().item()
                
                pred_cpu = preds.to('cpu')
                label_list.extend(val_batch['labels'])
                pred_list.extend(pred_cpu)

                #classification report
#                 cr = classification_report(pred_cpu,val_batch['labels'])
#                 print(cr)
                wandb.log({
                    "val_preds" : pred_cpu,
                    "val_labels" : val_batch['labels']
#                     "classification_report" : cr               
                })
                
                val_loss_items.append(loss_item)
                val_acc_items.append(acc_item)

            val_loss = np.sum(val_loss_items) / len(val_loader)
            val_acc = np.sum(val_acc_items) / len(RE_val_dataset)
            cr2 = classification_report(label_list,pred_list)
            wandb.log({
                "val_acc" : 100.*val_acc,
                "val_loss" : val_loss,
                "classification_report" : cr2
        #         "preds" : preds.to('cpu')               
            })

#             wandb.log({
#                 "val_acc" : 100.*val_acc,
#                 "val_loss" : val_loss,
#                 "labels" : batch['labels'],
#                 "preds" : preds.to('cpu')               
#             })

            if val_acc > best_val_acc:
                print('New best model')
                best_val_acc = val_acc
                if best_val_acc > 0.8:
                    torch.save(model.state_dict(),out_dir+f'/{epoch:03}_accuracy_{val_acc:4.2%}.ckpt')
            print(f'val_acc : {val_acc}')

#         print('labels')
#         torch.cat(label_list).unique(return_counts = True)
#         print('preds')
#         torch.cat(pred_list).unique(return_counts = True)

In [1]:


from IPython.display import Audio
Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg", autoplay=True)

print(classification_report(np.array(label_list),np.array(pred_list)))

wkonow

# validataion test

# inference
state_dict_dir = os.path.join('/opt/ml','Pstage/0421/results_xlm-roberta-base03-47/006_accuracy_72.44%.ckpt')

# test_data = load_data("/opt/ml/input/data/test/test.tsv")
# test_label = test_data['label'].values

# tokenized_test = tokenized_dataset(test_data, tokenizer)

# config = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
# test_model = BertForSequenceClassification.from_pretrained(model_name, config= config)

config2 = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
test_model = BertForSequenceClassification.from_pretrained(model_name, config = config2)

test_model.load_state_dict(torch.load(state_dict_dir))
test_model.to(device)

# test_dataset = RE_Dataset(tokenized_test, test_label)

# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# test_model.eval()
# output_pred = []
# for data in tqdm(test_loader):
#     with torch.no_grad():
#         inputs = {key : value.to(device) for key, value in data.items() if key != 'labels'}
#         pred = test_model(**inputs)
#         result = np.argmax(pred.logits.detach().cpu().numpy(), axis = -1)
#         print(result)
#         output_pred.extend(result)
        
with torch.no_grad():
    print('Calculating validation results')
    test_model.eval()
    val_loss_items = []
    val_acc_items = []


    label_list = []
    pred_list = []
    for val_batch in val_loader:
        val_inputs = {key : value.to(device) for key, value in val_batch.items() if key != 'labels'}


   #                 input_ids=val_batch['input_ids'].to(device)
#                 attention_mask = val_batch['attention_mask'].to(device)
#                 token_type_ids = val_batch['token_type_ids'].to(device)
        labels = val_batch['labels'].to(device)

        outputs = test_model(**val_inputs)
        preds = torch.argmax(outputs.logits, dim = 1)

        loss_item = criterion(outputs.logits, labels).item()
        acc_item = (labels == preds).sum().item()
        
        pred_cpu = preds.to('cpu')
        label_list.extend(val_batch['labels'])
        pred_list.extend(pred_cpu)
#         print('preds')
#         print(preds.to('cpu'))
        
#         print('labels')
#         print(val_batch['labels'])
        #classification report
        cr = classification_report(pred_cpu,val_batch['labels'])
        print(cr)
        wandb.log({
            "val_preds" : pred_cpu,
            "val_labels" : val_batch['labels'],
            "classification_report" : cr               
        })

        
        val_loss_items.append(loss_item)
        val_acc_items.append(acc_item)

    val_loss = np.sum(val_loss_items) / len(val_loader)
    val_acc = np.sum(val_acc_items) / len(RE_val_dataset)
    cr2 = classification_report(label_list,pred_list)
    wandb.log({
        "val_acc" : 100.*val_acc,
        "val_loss" : val_loss,
        "classification_report" : cr2
#         "preds" : preds.to('cpu')               
    })

    if val_acc > best_val_acc:
        print('New best model')
        best_val_acc = val_acc
        torch.save(model.state_dict(),out_dir+f'/{epoch:03}_accuracy_{val_acc:4.2%}.ckpt')
    print(f'val_acc : {val_acc}')

# print('labels')
# print(torch.cat(label_list).unique(return_counts = True))
# print('preds')
# print(torch.cat(pred_list).unique(return_counts = True)        )
# output = pd.DataFrame(np.array(output_pred).flatten(), columns=['pred'])
# output.to_csv('./submission_{}.csv'.format(time.strftime('%H-%M', time.localtime(time.time()))), index = False)

from sklearn.metrics import classification_report

# cf = confusion_matrix(preds.to('cpu'),val_batch['labels'])
cr = classification_report(preds.to('cpu'),val_batch['labels'])
print(cr)
# cf









# inference
# hyunwoongko/kobart
# bert-base-multilingual-cased
model_name = 'bert-base-multilingual-cased'
state_dict_dir = os.path.join('/opt/ml','Pstage/0422/results_bert-base-multilingual-cased09-35/000_accuracy_87.67%.ckpt')

test_data = load_data("/opt/ml/input/data/test/test.tsv")
test_label = test_data['label'].values

tokenized_test = tokenized_dataset(test_data, tokenizer)

# config = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
# test_model = BertForSequenceClassification.from_pretrained(model_name, config= config)

config2 = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
test_model = AutoModelForSequenceClassification.from_pretrained(model_name, config = config2)

test_model.load_state_dict(torch.load(state_dict_dir))
test_model.to(device)

test_dataset = RE_Dataset(tokenized_test, test_label)

test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

test_model.eval()
output_pred = []
for data in tqdm(test_loader):
    with torch.no_grad():
        inputs = {key : value.to(device) for key, value in data.items() if key != 'labels'}
        pred = test_model(**inputs)
        result = np.argmax(pred.logits.detach().cpu().numpy(), axis = -1)
        print(result)
        output_pred.extend(result)
        
output = pd.DataFrame(np.array(output_pred).flatten(), columns=['pred'])
output.to_csv('./submission_{}.csv'.format(time.strftime('%H-%M', time.localtime(time.time()))), index = False)

np.array(output_pred).flatten()

In [1]:
# import pickle as pickle
# import os
# import pandas as pd
# import torch
# from sklearn.metrics import accuracy_score
# from transformers import *
# from load_data import *
# import time
# import numpy as np
# import random 

# !apt-get install g++ 
# !pip install pororo


import random
import os, sys
from importlib import import_module

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Subset
from torch.optim import SGD, Adam, AdamW
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR
import time
from tqdm import tqdm

import pickle as pickle
import os
import pandas as pd
import torch

from ipywidgets import FloatProgress
from load_data import *
from transformers import AutoTokenizer, BertForSequenceClassification, AutoConfig, AutoModelForSequenceClassification
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from tqdm import tqdm, trange
from sklearn.metrics import classification_report

# import amp

sys.path.append(os.path.abspath('..'))

def seed_everything(seed):
    """
    동일한 조건으로 학습을 할 때, 동일한 결과를 얻기 위해 seed를 고정시킵니다.
    
    Args:
        seed: seed 정수값
    """
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)  # if use multi-GPU
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)
seed = 73
seed_everything(seed)

# label_path = ''
num_workers = 8

train_data_path = "/opt/ml/input/data/train/train_c.tsv"

model_name = 'xlm-roberta-large'
batch_size = 32
num_classes = 42
# 32/1e-5
# 64/5e-5
num_epochs = 10
lr =5e-6
lr_decay_step = 4

# train_log_interval = 20
# name = model_name

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")


from catalyst.data.sampler import BalanceClassSampler

# class RE_Dataset(torch.utils.data.Dataset):
#   def __init__(self, tokenized_dataset, labels):
#     self.tokenized_dataset = tokenized_dataset
#     self.labels = labels

#   def __getitem__(self, idx):
#     item = {key: torch.tensor(val[idx]) for key, val in self.tokenized_dataset.items()}
#     item['labels'] = torch.tensor(self.labels[idx])
#     return item

#   def __len__(self):
#     return len(self.labels)

#   def get_classes(self):
#     return self.labels

# def train():
#   ...
#   RE_train_dataset = RE_Dataset(tokenized_train, train_label)
#   # BalanceClassSampler를 정의합니다. 여기선 upsampling 옵션을 주었습니다.
#   sampler = BalanceClassSampler(RE_train_dataset.get_classes(), 'upsampling')
#   RE_train_loader = DataLoader(RE_train_dataset, batch_size=16, sampler=sampler)
  
#   # 한 epoch에 모델에 들어가는 label의 분포를 살펴봅시다.
#   label_list = []
#   for batch in RE_train_loader:
#       label_list.append(batch['labels'])
#   torch.cat(label_list).unique(return_counts=True)



# # Dataset 구성.
# class RE_Dataset(torch.utils.data.Dataset):
#   def __init__(self, tokenized_dataset, labels):
#     self.tokenized_dataset = tokenized_dataset
#     self.labels = labels
    
#   def __getitem__(self, idx):
#     item = {key: torch.tensor(val[idx]) for key, val in self.tokenized_dataset.items()}
#     item['labels'] = torch.tensor(self.labels[idx])
#     return item

#   def __len__(self):
#     return len(self.labels)

#   def get_classes(self):
#     return self.labels

# # 처음 불러온 tsv 파일을 원하는 형태의 DataFrame으로 변경 시켜줍니다.
# # 변경한 DataFrame 형태는 baseline code description 이미지를 참고해주세요.
# def preprocessing_dataset(dataset, label_type):
#   label = []
#   for i in dataset[8]:
#     if i == 'blind':
#       label.append(100)
#     else:
#       label.append(label_type[i])
#   out_dataset = pd.DataFrame({'sentence':dataset[1],'entity_01':dataset[2],'entity_02':dataset[5],'label':label,})
#   return out_dataset

# # tsv 파일을 불러옵니다.
# def load_data(dataset_dir):
#   # load label_type, classes
#   with open('/opt/ml/input/data/label_type.pkl', 'rb') as f:
#     label_type = pickle.load(f)
#   # load dataset
#   dataset = pd.read_csv(dataset_dir, delimiter='\t', header=None)
#   # preprecessing dataset
#   dataset = preprocessing_dataset(dataset, label_type)
  
#   return dataset

# # bert input을 위한 tokenizing.
# # tip! 다양한 종류의 tokenizer와 special token들을 활용하는 것으로도 새로운 시도를 해볼 수 있습니다.
# # baseline code에서는 2가지 부분을 활용했습니다.
# def tokenized_dataset(dataset, tokenizer):
#   concat_entity = []
#   for e01, e02 in zip(dataset['entity_01'], dataset['entity_02']):
#     temp = ''
#     temp = e01 + '[SEP]' + e02
#     concat_entity.append(temp)
#   tokenized_sentences = tokenizer(
#       concat_entity,
#       list(dataset['sentence']),
#       return_tensors="pt",
#       padding=True,
#       truncation=True,
#       max_length=100,
#       add_special_tokens=True,
#       )
#   return tokenized_sentences

class LabelSmoothingLoss(nn.Module):
    def __init__(self, classes=42, smoothing=0.50, dim=-1):
        super(LabelSmoothingLoss, self).__init__()
        self.confidence = 1.0 - smoothing
        self.smoothing = smoothing
        self.cls = classes
        self.dim = dim

    def forward(self, pred, target):
        pred = pred.log_softmax(dim=self.dim)
        with torch.no_grad():
            # true_dist = pred.data.clone()
            true_dist = torch.zeros_like(pred)
            true_dist.fill_(self.smoothing / (self.cls - 1))
            true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence)
        return torch.mean(torch.sum(-true_dist * pred, dim=self.dim))
    
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class FocalLoss(nn.Module):
    def __init__(self, gamma=0, alpha=None, size_average=True):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha
        if isinstance(alpha,(float,int,long)): self.alpha = torch.Tensor([alpha,1-alpha])
        if isinstance(alpha,list): self.alpha = torch.Tensor(alpha)
        self.size_average = size_average

    def forward(self, input, target):
        if input.dim()>2:
            input = input.view(input.size(0),input.size(1),-1)  # N,C,H,W => N,C,H*W
            input = input.transpose(1,2)    # N,C,H*W => N,H*W,C
            input = input.contiguous().view(-1,input.size(2))   # N,H*W,C => N*H*W,C
        target = target.view(-1,1)

        logpt = F.log_softmax(input)
        logpt = logpt.gather(1,target)
        logpt = logpt.view(-1)
        pt = Variable(logpt.data.exp())

        if self.alpha is not None:
            if self.alpha.type()!=input.data.type():
                self.alpha = self.alpha.type_as(input.data)
            at = self.alpha.gather(0,target.data.view(-1))
            logpt = logpt * Variable(at)

        loss = -1 * (1-pt)**self.gamma * logpt
        if self.size_average: return loss.mean()
        else: return loss.sum()



# scheduler = StepLR(optim, factor = 0.1, patience = 10)

# loss
# criterion = nn.CrossEntropyLoss()
criterion = LabelSmoothingLoss()

# load model tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.add_special_tokens({"additional_special_tokens": ["#", "@", '₩', '^']})

# load dataset
train_dataset = load_data(train_data_path)

# train_df, val_df = train_test_split(train_dataset, test_size = 0.2)

t2 = train_dataset[train_dataset.label != 40]
t3 = train_dataset[train_dataset.label == 40]
train_df, val_df = train_test_split(t2, test_size = 0.2, stratify = t2.label) # stratify option

train_df = pd.concat([train_df,t3])

train_label = train_df['label'].values
val_label = val_df['label'].values

# tokenizing dataset
tokenized_train = tokenized_dataset(train_df, tokenizer)
tokenized_val = tokenized_dataset(val_df, tokenizer)

# make dataset for pytorch
RE_train_dataset = RE_Dataset(tokenized_train, train_label)
RE_val_dataset = RE_Dataset(tokenized_val, val_label)


# native training using torch
config = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
model = AutoModelForSequenceClassification.from_pretrained(model_name, config= config)
model.resize_token_embeddings(len(tokenizer))
model.to(device)

# upsampling option
# sampler = BalanceClassSampler(RE_train_dataset.get_classes(), 'upsampling')
# train_loader = DataLoader(RE_train_dataset, batch_size = batch_size, num_workers = num_workers, sampler = sampler)#, shuffle = True)

# num_workers = 8
# batch_size = 100
train_loader = DataLoader(RE_train_dataset, batch_size = batch_size, num_workers = num_workers, shuffle = True)

val_loader = DataLoader(RE_val_dataset, batch_size=batch_size, num_workers = num_workers,shuffle = False)

# optimizer
optim = AdamW(model.parameters(), lr = lr)

# scheduler
# scheduler = ReduceLROnPlateau(optim, factor = 0.1, patience = 10)
# scheduler = StepLR(optim, lr_decay_step, gamma=0.5)
scheduler = CosineAnnealingLR(optim, T_max=2, eta_min=0.0000000001)

import wandb

run = wandb.init(project='Relation Extraction', entity = 'hwan17', name = model_name, reinit = False)
wandb.config.update({
    "model_name" : model_name,
    "num_epochs" : num_epochs,
    "batch_size" : batch_size,
    "learning_rate" : lr,
    "lr_decay_step" : lr_decay_step,
    "scheduler" : scheduler,
    "time" : time.strftime('%H-%M', time.localtime(time.time())),
    "seed" : seed,
    'optimizer' : optim,
    'loss_function' : criterion
})

now = time.strftime('%H-%M', time.localtime(time.time()))
out_dir = os.path.join(os.getcwd(), 'results_{}{}'.format(model_name,now))
os.makedirs(out_dir, exist_ok = True)

best_val_acc = 0
best_val_loss = np.inf



for epoch in range(num_epochs):
    with tqdm(train_loader, total = len(train_loader), unit = 'batch') as train_bar:
        train_loss, train_acc, loss_value = 0,0,0
        model.train()
        for step, batch in enumerate(train_bar , 1):
                
            inputs = {key : value.to(device) for key, value in batch.items() if key != 'labels'}
            
#             input_ids=batch['input_ids'].to(device)
#             attention_mask = batch['attention_mask'].to(device)
#             token_type_ids = batch['token_type_ids'].to(device)

            labels = batch['labels'].to(device)

            outputs = model(**inputs)
            loss = criterion(outputs.logits, labels)

            optim.zero_grad()
#             with amp.scale_loss(loss, optim) as scaled_loss:
#                 scale_loss.backward()
            loss.backward()
            optim.step()

#             correct += (torch.argmax(outputs.logits, dim = 1) == labels).sum().item()
#             acc = correct/ len(batch['input_ids'])
#             loss_value += loss.item()
            preds = torch.argmax(outputs.logits, dim = 1)
    
            correct = (preds == labels).sum().item()
            acc = correct / batch_size
#             epoch_acc = acc

#             epoch_loss = loss.item()
            
#             div_num = batch_size * step if step < num_epochs else len(batch['input_ids'])
#             train_acc = correct/div_num
#             train_loss = loss_value
            scheduler.step()
            cur_lr = scheduler.get_last_lr()
    
    
            train_bar.set_description(f'Training Epoch [{epoch+1}/{num_epochs}]')
            train_bar.set_postfix(loss=loss.item(), acc = acc*100, lr = cur_lr)
            
            wandb.log({
                "train_acc" : 100.*acc,
                "train_loss" : loss.item(),
                "learning_rate" : cur_lr
            })

        with torch.no_grad():
            print('Calculating validation results')
            model.eval()
            val_loss_items = []
            val_acc_items = []

            
            label_list = []
            pred_list = []
            for val_batch in val_loader:
                val_inputs = {key : value.to(device) for key, value in val_batch.items() if key != 'labels'}
                
                
           #                 input_ids=val_batch['input_ids'].to(device)
#                 attention_mask = val_batch['attention_mask'].to(device)
#                 token_type_ids = val_batch['token_type_ids'].to(device)
                labels = val_batch['labels'].to(device)

                outputs = model(**val_inputs)
                preds = torch.argmax(outputs.logits, dim = 1)

                loss_item = criterion(outputs.logits, labels).item()
                acc_item = (labels == preds).sum().item()
                
                pred_cpu = preds.to('cpu')
                label_list.extend(val_batch['labels'])
                pred_list.extend(pred_cpu)

                #classification report
#                 cr = classification_report(pred_cpu,val_batch['labels'])
#                 print(cr)
                wandb.log({
                    "val_preds" : pred_cpu,
                    "val_labels" : val_batch['labels']
#                     "classification_report" : cr               
                })
                
                val_loss_items.append(loss_item)
                val_acc_items.append(acc_item)

            val_loss = np.sum(val_loss_items) / len(val_loader)
            val_acc = np.sum(val_acc_items) / len(RE_val_dataset)
            cr2 = classification_report(label_list,pred_list)
#             wandb.log({
#                 "val_acc" : 100.*val_acc,
#                 "val_loss" : val_loss,
#                 "classification_report" : cr2
#         #         "preds" : preds.to('cpu')               
#             })

            wandb.log({
                "val_acc" : 100.*val_acc,
                "val_loss" : val_loss,
                "labels" : batch['labels'],
                "preds" : preds.to('cpu')               
            })

            if val_acc > best_val_acc:
                print('New best model')
                best_val_acc = val_acc
                torch.save(model.state_dict(),out_dir+f'/{epoch:03}_accuracy_{val_acc:4.2%}.ckpt')
            print(f'val_acc : {val_acc}')

#         print('labels')
#         torch.cat(label_list).unique(return_counts = True)
#         print('preds')
#         torch.cat(pred_list).unique(return_counts = True)

print(torch.argmax(outputs.logits, dim = 1) )
print(labels)

correct = (torch.argmax(outputs.logits, dim = 1) == labels).sum().item()
correct/len(labels)

# # model(**RE_train_dataset[0])
# input_ids = RE_train_dataset[0]['input_ids'].to(device)
# attention_mask = RE_train_dataset[0]['attention_mask'].to(device)
# labels = RE_train_dataset[0]['labels'].to(device)

# model(input_ids, attention_mask, labels)

from IPython.display import Audio
Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg", autoplay=True)

print(classification_report(np.array(label_list),np.array(pred_list)))

wkonow

# labels = val_batch['labels'].to(device)

#         outputs = test_model(**val_inputs)
#         preds = torch.argmax(outputs.logits, dim = 1)

#         loss_item = criterion(outputs.logits, labels).item()
#         acc_item = (labels == preds).sum().item()
        
        
#         pred_cpu = preds.to('cpu')
#         label_list.extend(val_batch['labels'])
#         pred_list.extend(pred_cpu)
        
pred_max = torch.max(outputs.logits, dim = 1)
pred_maxx = np.array(pred_max.values.to('cpu'))
predss = np.array(preds.to('cpu'))
# labels = np.array(labels.to('cpu'))
check = list(zip(pred_maxx, predss, labels))

correct_val = [x[0] for x in check if x[1] == 0 and x[1] == x[2]]
incorrect_val = [x[0] for x in check if x[1] == 0 and x[1] != x[2]]

corrects = []
incorrects = []
print(correct_val)
print(incorrect_val)

# validataion test

# inference
state_dict_dir = os.path.join('/opt/ml','Pstage/0421/results_xlm-roberta-base04-12/006_accuracy_72.44%.ckpt')
state_dict_dir2 = os.path.join('/opt/ml','Pstage/0421/results_xlm-roberta-base06-45/004_accuracy_78.99%.ckpt')
# test_data = load_data("/opt/ml/input/data/test/test.tsv")
# test_label = test_data['label'].values

# tokenized_test = tokenized_dataset(test_data, tokenizer)

# config = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
# test_model = BertForSequenceClassification.from_pretrained(model_name, config= config)

config2 = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
test_model = AutoModelForSequenceClassification.from_pretrained(model_name, config = config2)

test_model.load_state_dict(torch.load(state_dict_dir))
test_model.to(device)

config2 = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
test_model2 = AutoModelForSequenceClassification.from_pretrained(model_name, config = config2)

test_model2.load_state_dict(torch.load(state_dict_dir2))
test_model2.to(device)

# test_dataset = RE_Dataset(tokenized_test, test_label)

# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# test_model.eval()
# output_pred = []
# for data in tqdm(test_loader):
#     with torch.no_grad():
#         inputs = {key : value.to(device) for key, value in data.items() if key != 'labels'}
#         pred = test_model(**inputs)
#         result = np.argmax(pred.logits.detach().cpu().numpy(), axis = -1)
#         print(result)
#         output_pred.extend(result)
corrects = []
incorrects = []
corrects2 = []
incorrects2 = []
with torch.no_grad():
    print('Calculating validation results')
    test_model.eval()
    val_loss_items = []
    val_acc_items = []


    label_list = []
    pred_list = []
    for val_batch in val_loader:
        val_inputs = {key : value.to(device) for key, value in val_batch.items() if key != 'labels'}


   #                 input_ids=val_batch['input_ids'].to(device)
#                 attention_mask = val_batch['attention_mask'].to(device)
#                 token_type_ids = val_batch['token_type_ids'].to(device)
        labels = val_batch['labels'].to(device)

        outputs = test_model(**val_inputs)
        preds = torch.argmax(outputs.logits, dim = 1)

        loss_item = criterion(outputs.logits, labels).item()
        acc_item = (labels == preds).sum().item()
        
        
        pred_cpu = preds.to('cpu')
        label_list.extend(val_batch['labels'])
        pred_list.extend(pred_cpu)
#         print('preds')
#         print(preds.to('cpu'))
        
#         print('labels')
#         print(val_batch['labels'])
        #classification report
#         cr = classification_report(pred_cpu,val_batch['labels'])
#         print(cr)
        wandb.log({
            "val_preds" : pred_cpu,
            "val_labels" : val_batch['labels'],
            "classification_report" : cr               
        })

        
#         val_loss_items.append(loss_item)
#         val_acc_items.append(acc_item)
        
        pred_max = torch.max(outputs.logits, dim = 1)
        pred_maxx = np.array(pred_max.values.to('cpu'))
        predss = np.array(preds.to('cpu'))
        # labels = np.array(labels.to('cpu'))
        check = list(zip(pred_maxx, predss, labels))

        correct_val = [x[0] for x in check if x[1] == 0 and x[1] == x[2]]
        incorrect_val = [x[0] for x in check if x[1] == 0 and x[1] != x[2]]
#         for x in check:
        
        corrects.extend(correct_val)
        incorrects.extend(incorrect_val)
        #####################
        
        outputs2 = test_model2(**val_inputs)
        preds2 = torch.argmax(outputs2.logits, dim = 1)

        loss_item2 = criterion(outputs2.logits, labels).item()
        acc_item2 = (labels == preds2).sum().item()
        
        
        pred_max2 = torch.max(outputs2.logits, dim = 1)
        pred_maxx2 = np.array(pred_max2.values.to('cpu'))
        predss2 = np.array(preds2.to('cpu'))
        # labels = np.array(labels.to('cpu'))
        check2 = list(zip(pred_maxx2, predss2, labels))

        correct_val2 = [x[0] for x in check2 if x[1] != 0 and x[1] == x[2]]
        incorrect_val2 = [x[0] for x in check2 if x[1] != 0 and x[1] != x[2]]
#         for x in check:
        
        corrects2.extend(correct_val2)
        incorrects2.extend(incorrect_val2)
        out1 = list(map(lambda x : (np.argmax(x), np.max(x)), np.array(outputs.logits.to('cpu'))))
        out2 = list(map(lambda x : (np.argmax(x), np.max(x)), np.array(outputs2.logits.to('cpu'))))
        print(labels)
        print(out1)
        print(out2)
        
        
        outputs3 = outputs.logits + outputs2.logits
        preds = torch.argmax(outputs3, dim = 1)

        loss_item = criterion(outputs3, labels).item()
        acc_item = (labels == preds).sum().item()
        
        val_loss_items.append(loss_item)
        val_acc_items.append(acc_item)

    
    val_loss = np.sum(val_loss_items) / len(val_loader)
    val_acc = np.sum(val_acc_items) / len(RE_val_dataset)
    cr2 = classification_report(label_list,pred_list)
    wandb.log({
        "val_acc" : 100.*val_acc,
        "val_loss" : val_loss,
        "classification_report" : cr2
#         "preds" : preds.to('cpu')               
    })

    if val_acc > best_val_acc:
        print('New best model')
        best_val_acc = val_acc
        torch.save(model.state_dict(),out_dir+f'/{epoch:03}_accuracy_{val_acc:4.2%}.ckpt')
    print(f'val_acc : {val_acc}')

# print('labels')
# print(torch.cat(label_list).unique(return_counts = True))
# print('preds')
# print(torch.cat(pred_list).unique(return_counts = True)        )
# output = pd.DataFrame(np.array(output_pred).flatten(), columns=['pred'])
# output.to_csv('./submission_{}.csv'.format(time.strftime('%H-%M', time.localtime(time.time()))), index = False)

print(len(corrects))
print(len(incorrects))
print(np.average(corrects)) # 0이라고 예측했을 때 맞은 값
print(np.average(incorrects)) # 0이라고 예측했을 때 틀린 값
print(len(corrects2))
print(len(incorrects2))
print(np.average(corrects2)) # 0 이 아닌 값을 맞게 예측한 값
print(np.average(incorrects2)) # 0 이 아닌 값을 틀리게 예측한 값


incorrects

train_dataset.label.value_counts()

out1 = list(map(lambda x : (np.argmax(x), np.max(x)), np.array(outputs.logits.to('cpu'))))
out2 = list(map(lambda x : (np.argmax(x), np.max(x)), np.array(outputs2.logits.to('cpu'))))
print(labels)
print(out1)
print(out2)

answer = []
for out1, out2 in zip(output1,output2):
#     print(out1,out2)
    if out1[0] == out2[0]:
#         print(f'answer : {out1[1]}')
        answer.append(out1[0])
        
    elif out1[1]<4.6:
        answer.append(out2[0])
        
    else:
        answer.append(out1[0])
answer




outputs.logits + outputs2.logits

from sklearn.metrics import classification_report

# cf = confusion_matrix(preds.to('cpu'),val_batch['labels'])
cr = classification_report(preds.to('cpu'),val_batch['labels'])
print(cr)
# cf









# makedata
state_dict_dir1 = os.path.join('/opt/ml','Pstage/0421/results_xlm-roberta-large11-40/009_accuracy_77.78%.ckpt')
state_dict_dir2 = os.path.join('/opt/ml','Pstage/0421/1~results_xlm-roberta-large09-36/004_accuracy_84.90%.ckpt')

test_data = load_data("/opt/ml/input/data/test/test.tsv")
test_label = test_data['label'].values

tokenized_test = tokenized_dataset(test_data, tokenizer)

# config = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
# test_model = BertForSequenceClassification.from_pretrained(model_name, config= config)

config2 = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
test_model1 = AutoModelForSequenceClassification.from_pretrained(model_name, config = config2)

test_model1.load_state_dict(torch.load(state_dict_dir1))
test_model1.to(device)


config2 = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
test_model2 = AutoModelForSequenceClassification.from_pretrained(model_name, config = config2)

test_model2.load_state_dict(torch.load(state_dict_dir2))
test_model2.to(device)


# test_dataset = RE_Dataset(tokenized_test, test_label)

# test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

test_model1.eval()
test_model2.eval()
output1, output2 = [], []

answer = []

for data in tqdm(val_loader):
    with torch.no_grad():
        inputs = {key : value.to(device) for key, value in data.items() if key != 'labels'}
        pred1 = test_model1(**inputs)
        pred2 = test_model2(**inputs)
        
#         out1 = list(map(lambda x : (np.argmax(x), np.max(x)), np.array(pred1.logits.to('cpu'))))
#         out2 = list(map(lambda x : (np.argmax(x), np.max(x)), np.array(pred2.logits.to('cpu'))))
        
# #         result = np.argmax(pred.logits.detach().cpu().numpy(), axis = -1)
# #         print(result)
#         output1.extend(out1)
#         output2.extend(out2)
        
#         out1 = list(map(lambda x : (np.argmax(x), np.max(x)), np.array(pred1.logits.to('cpu'))))
#         out2 = list(map(lambda x : (np.argmax(x), np.max(x)), np.array(pred2.logits.to('cpu'))))
#         print(labels)
#         print(out1)
#         print(out2)
        output1.extend(pred1.logits.to('cpu'))
        output2.extend(pred2.logits.to('cpu'))
        

from IPython.display import Audio
Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg", autoplay=True)

print(len(output1))
print(len(output2))

print(len(val_label))

# output1

torch.cat((output1,output2),0)

make_valset = []
for zero_data, nonzero_data in zip(output1,output2):
#     zero_lab, zero_val = zero_data
#     nonzero_lab, nonzero_val = nonzero_data
#     print(zero_lab, zero_val, nonzero_lab, nonzero_val, label)
#     make_dataset.append([zero_lab, zero_val, nonzero_lab, nonzero_val])
#     print(zero_data)
#     print(nonzero_data)
    make_valset.append(torch.cat((zero_data,nonzero_data), -1))
    
#     break
make_valset

len(make_trainset)
len(make_trainset[-1])

pd.DataFrame(make_trainset).to_csv('./make_trainset.csv', index = False)
pd.DataFrame(train_label).to_csv('./train_label.csv', index = False)
pd.DataFrame(make_valset).to_csv('./make_valset.csv', index = False)
pd.DataFrame(val_label).to_csv('./val_label.csv', index = False)






# inference
state_dict_dir1 = os.path.join('/opt/ml','Pstage/0421/results_xlm-roberta-large11-40/009_accuracy_77.78%.ckpt')
state_dict_dir2 = os.path.join('/opt/ml','Pstage/0421/1~results_xlm-roberta-large09-36/004_accuracy_84.90%.ckpt')

test_data = load_data("/opt/ml/input/data/test/test.tsv")
test_label = test_data['label'].values

tokenized_test = tokenized_dataset(test_data, tokenizer)

# config = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
# test_model = BertForSequenceClassification.from_pretrained(model_name, config= config)

config2 = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
test_model1 = AutoModelForSequenceClassification.from_pretrained(model_name, config = config2)

test_model1.load_state_dict(torch.load(state_dict_dir1))
test_model1.to(device)


config2 = AutoConfig.from_pretrained(model_name, num_labels = num_classes)
test_model2 = AutoModelForSequenceClassification.from_pretrained(model_name, config = config2)

test_model2.load_state_dict(torch.load(state_dict_dir2))
test_model2.to(device)


test_dataset = RE_Dataset(tokenized_test, test_label)

test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

test_model1.eval()
test_model2.eval()
output1, output2 = [], []

answer = []

for data in tqdm(test_loader):
    with torch.no_grad():
        inputs = {key : value.to(device) for key, value in data.items() if key != 'labels'}
        pred1 = test_model1(**inputs)
        pred2 = test_model2(**inputs)
        
#         out1 = list(map(lambda x : (np.argmax(x), np.max(x)), np.array(pred1.logits.to('cpu'))))
#         out2 = list(map(lambda x : (np.argmax(x), np.max(x)), np.array(pred2.logits.to('cpu'))))
        
# #         result = np.argmax(pred.logits.detach().cpu().numpy(), axis = -1)
# #         print(result)
#         output1.extend(out1)
#         output2.extend(out2)
        
        out1 = list(map(lambda x : (np.argmax(x), np.max(x)), np.array(pred1.logits.to('cpu'))))
        out2 = list(map(lambda x : (np.argmax(x), np.max(x)), np.array(pred2.logits.to('cpu'))))
#         print(labels)
#         print(out1)
#         print(out2)
        output1.extend(out1)
        output2.extend(out2)
        

for out1, out2 in zip(output1,output2):
#     print(out1,out2)
    if out1[0] == out2[0]:
#         print(f'answer : {out1[1]}')
        answer.append(out1[0])

    elif out1[1]<4.6:
        answer.append(out2[0])

    else:
        answer.append(out1[0])

# print(output1)
output = pd.DataFrame(np.array(answer).flatten(), columns=['pred'])
output.to_csv('./submission_{}.csv'.format(time.strftime('%H-%M', time.localtime(time.time()))), index = False)

answer = []
for out1, out2 in zip(output1,output2):
    print(out1,out2)
    if out1[1] == out2[1]:
        print(f'answer : {out1[1]}')
        answer.append(out1[1])
        
    elif out1[0]<out2[0]:
        answer.append(out2[1])
        
    break

np.array(output_pred).flatten()