In [2]:
# !pip install sentencepiece
# !pip install mxnet
# !pip install gluonnlp==0.8.0
# !pip install tqdm pandas
# !pip install torch
# !pip install sentencepiece
# !pip install transformers

In [3]:
# !pip install 'git+https://github.com/SKTBrain/KoBERT.git#egg=kobert_tokenizer&subdirectory=kobert_hf'

In [4]:
# !pip install openpyxl

In [5]:
# %pip install ipywidgets

In [6]:
!jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [7]:
from PIL import Image
import numpy as np
import os
import glob

In [8]:
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [9]:
from kobert_tokenizer import KoBERTTokenizer
from transformers import BertModel
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup

In [10]:
import gluonnlp as nlp
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import glob
import os
import openpyxl



In [11]:
device_type = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(device_type)

각 폴더에서 이미지와 텍스트 저장한 폴더 위치

In [12]:
section_folders = glob.glob('/home2/jh981017/myubai/NaverNews/*')
section_folders

['/home2/jh981017/myubai/NaverNews/economy',
 '/home2/jh981017/myubai/NaverNews/life',
 '/home2/jh981017/myubai/NaverNews/politics',
 '/home2/jh981017/myubai/NaverNews/science',
 '/home2/jh981017/myubai/NaverNews/society',
 '/home2/jh981017/myubai/NaverNews/world']

In [13]:
root = '/home2/jh981017/myubai/NaverNews'
sections = os.listdir(root)
sections

['economy', 'life', 'politics', 'science', 'society', 'world']

텍스트 위치

In [14]:
text_paths = []
for folder, section in zip(section_folders, sections):
  text_path = folder + '/' + section + 'text1' + '.xlsx'
  text_paths.append(text_path)

text_paths

['/home2/jh981017/myubai/NaverNews/economy/economytext1.xlsx',
 '/home2/jh981017/myubai/NaverNews/life/lifetext1.xlsx',
 '/home2/jh981017/myubai/NaverNews/politics/politicstext1.xlsx',
 '/home2/jh981017/myubai/NaverNews/science/sciencetext1.xlsx',
 '/home2/jh981017/myubai/NaverNews/society/societytext1.xlsx',
 '/home2/jh981017/myubai/NaverNews/world/worldtext1.xlsx']

각 섹션별 인덱스 딕셔너리

In [15]:
# 각 섹션별 사용할 데이터의 인덱스가 담겨 있는 딕셔너리

idx_dictionary = {}

for section, text_path in zip(sections, text_paths):
  text = pd.read_excel(text_path)
  idx_section = list(text['idx'])

  idx_dictionary[section] = idx_section

In [16]:
len(idx_dictionary['economy'])

1888

In [17]:
np.random.seed(602)

cv_idx_dictionary = {}

for section in sections:
  cv_idx_section = list(np.random.choice(idx_dictionary[section], size = 1200, replace = False))
  cv_idx_section.sort()

  cv_idx_dictionary[section] = cv_idx_section

In [18]:
cv_idx_dictionary.keys()

dict_keys(['economy', 'life', 'politics', 'science', 'society', 'world'])

In [19]:
len(cv_idx_dictionary['economy'])

1200

In [20]:
new_idx_dictionary = {}

for section in sections:
  new_idx_section = [i for i in idx_dictionary[section] if i not in cv_idx_dictionary[section]]

  new_idx_dictionary[section] = new_idx_section

In [21]:
len(new_idx_dictionary['economy'])

688

label - y 페어

In [22]:
# label_to_y = {section : idx for idx, section in enumerate(sections)}
# label_to_y

In [23]:
label_to_y = {
    'politics': 0,
    'society': 1,
    'science': 2,
    'life': 3,
    'world': 4,
    'economy': 5
}
label_to_y

{'politics': 0,
 'society': 1,
 'science': 2,
 'life': 3,
 'world': 4,
 'economy': 5}

분석에 사용할 이미지 데이터 경로

In [24]:
# 분석에 사용할 모든 데이터들의 경로를 불러온다.

cv_data = []
new_data = []

for section_folder in section_folders:

  # 각 섹션 이름 가져와서 인덱스랑 합하기
  section = os.path.basename(section_folder)
  section_textpath = section_folder + '/' + section + 'text1' + '.xlsx' #정제한걸로했음
  section_texts = pd.read_excel(section_textpath)


  cv_indicies = cv_idx_dictionary[section]
  new_indicies = new_idx_dictionary[section]

  cv_condition = section_texts['idx'].isin(cv_indicies)
  new_condition = section_texts['idx'].isin(new_indicies)

  cv_annotations = section_texts.loc[cv_condition, 'annotation']
  new_annotations = section_texts.loc[new_condition, 'annotation']



  y = label_to_y[section]


  for cv_idx, cv_annotation in zip(cv_indicies, cv_annotations):
    imgname = section + str(cv_idx) + '.jpg'
    imgpath = os.path.join(section_folder, imgname)

    data = []
    data.append(cv_idx)
    data.append(imgpath)
    data.append(cv_annotation)
    data.append(y)

    cv_data.append(data)


  for new_idx, new_annotation in zip(new_indicies, new_annotations):
    imgname = section + str(new_idx) + '.jpg'
    imgpath = os.path.join(section_folder, imgname)

    data = []
    data.append(new_idx)
    data.append(imgpath)
    data.append(new_annotation)
    data.append(y)

    new_data.append(data)

In [25]:
len(cv_data)

7200

In [26]:
df_cv_data = pd.DataFrame(cv_data)
df_cv_data.columns = ['idx', 'imgpath', 'annotation', 'y']

In [27]:
df_new_data = pd.DataFrame(new_data)
df_new_data.columns = ['idx', 'imgpath', 'annotation', 'y']

데이터셋 정의하기

In [28]:
# MobileNet image transform

train_imgtransform = transforms.Compose([
    transforms.Resize((224, 224)),
    #transforms.Resize(256),
    #transforms.CenterCrop((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_imgtransform = transforms.Compose([
    transforms.Resize((224, 224)),
    #transforms.Resize(256),
    #transforms.CenterCrop((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [29]:
# BERT transform

class BERTSentenceTransform:
    r"""BERT style data transformation.

    Parameters
    ----------
    tokenizer : BERTTokenizer.
        Tokenizer for the sentences.
    max_seq_length : int.
        Maximum sequence length of the sentences.
    pad : bool, default True
        Whether to pad the sentences to maximum length.
    pair : bool, default True
        Whether to transform sentences or sentence pairs.
    """

    def __init__(self, tokenizer, max_seq_length,vocab, pad=True, pair=True):
        self._tokenizer = tokenizer
        self._max_seq_length = max_seq_length
        self._pad = pad
        self._pair = pair
        self._vocab = vocab

    def __call__(self, line):
        """Perform transformation for sequence pairs or single sequences.

        The transformation is processed in the following steps:
        - tokenize the input sequences
        - insert [CLS], [SEP] as necessary
        - generate type ids to indicate whether a token belongs to the first
        sequence or the second sequence.
        - generate valid length

        For sequence pairs, the input is a tuple of 2 strings:
        text_a, text_b.

        Inputs:
            text_a: 'is this jacksonville ?'
            text_b: 'no it is not'
        Tokenization:
            text_a: 'is this jack ##son ##ville ?'
            text_b: 'no it is not .'
        Processed:
            tokens: '[CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]'
            type_ids: 0     0  0    0    0     0       0 0     1  1  1  1   1 1
            valid_length: 14

        For single sequences, the input is a tuple of single string:
        text_a.

        Inputs:
            text_a: 'the dog is hairy .'
        Tokenization:
            text_a: 'the dog is hairy .'
        Processed:
            text_a: '[CLS] the dog is hairy . [SEP]'
            type_ids: 0     0   0   0  0     0 0
            valid_length: 7

        Parameters
        ----------
        line: tuple of str
            Input strings. For sequence pairs, the input is a tuple of 2 strings:
            (text_a, text_b). For single sequences, the input is a tuple of single
            string: (text_a,).

        Returns
        -------
        np.array: input token ids in 'int32', shape (batch_size, seq_length)
        np.array: valid length in 'int32', shape (batch_size,)
        np.array: input token type ids in 'int32', shape (batch_size, seq_length)

        """

        # convert to unicode
        text_a = line[0]
        if self._pair:
            assert len(line) == 2
            text_b = line[1]

        tokens_a = self._tokenizer.tokenize(text_a)
        tokens_b = None

        if self._pair:
            tokens_b = self._tokenizer(text_b)

        if tokens_b:
            # Modifies `tokens_a` and `tokens_b` in place so that the total
            # length is less than the specified length.
            # Account for [CLS], [SEP], [SEP] with "- 3"
            self._truncate_seq_pair(tokens_a, tokens_b,
                                    self._max_seq_length - 3)
        else:
            # Account for [CLS] and [SEP] with "- 2"
            if len(tokens_a) > self._max_seq_length - 2:
                tokens_a = tokens_a[0:(self._max_seq_length - 2)]

        # The embedding vectors for `type=0` and `type=1` were learned during
        # pre-training and are added to the wordpiece embedding vector
        # (and position vector). This is not *strictly* necessary since
        # the [SEP] token unambiguously separates the sequences, but it makes
        # it easier for the model to learn the concept of sequences.

        # For classification tasks, the first vector (corresponding to [CLS]) is
        # used as as the "sentence vector". Note that this only makes sense because
        # the entire model is fine-tuned.
        #vocab = self._tokenizer.vocab
        vocab = self._vocab
        tokens = []
        tokens.append(vocab.cls_token)
        tokens.extend(tokens_a)
        tokens.append(vocab.sep_token)
        segment_ids = [0] * len(tokens)

        if tokens_b:
            tokens.extend(tokens_b)
            tokens.append(vocab.sep_token)
            segment_ids.extend([1] * (len(tokens) - len(segment_ids)))

        input_ids = self._tokenizer.convert_tokens_to_ids(tokens)

        # The valid length of sentences. Only real  tokens are attended to.
        valid_length = len(input_ids)

        if self._pad:
            # Zero-pad up to the sequence length.
            padding_length = self._max_seq_length - valid_length
            # use padding tokens for the rest
            input_ids.extend([vocab[vocab.padding_token]] * padding_length)
            segment_ids.extend([0] * padding_length)

        return np.array(input_ids, dtype='int32'), np.array(valid_length, dtype='int32'),\
            np.array(segment_ids, dtype='int32')

In [30]:
class RoBaMFFusionDataset(Dataset):
  def __init__(self, dataset, imgtransform, bert_tokenizer, vocab, max_len, pad, pair):

    # for MobileNet
    self.imgpaths = dataset['imgpath']
    self.imgtransform = imgtransform


    # for KoBERT
    texttransform = BERTSentenceTransform(bert_tokenizer, max_seq_length=max_len,vocab=vocab, pad=pad, pair=pair)
    self.kobertdata = dataset[['annotation', 'y']].values.tolist()
    self.sentences = [texttransform([i[0]]) for i in self.kobertdata]


    self.labels = [np.int32(i[1]) for i in self.kobertdata]


  def __getitem__(self, i):
    imgpath = self.imgpaths.iloc[i]

    img = Image.open(imgpath).convert('RGB')
    img = self.imgtransform(img)

    sentence = self.sentences[i]

    target = self.labels[i]

    ## sentence : (token_ids, valid_length, segment_ids) tuple
    return img, sentence, target

  def __len__(self):
    return len(self.labels)


In [31]:
tokenizer = KoBERTTokenizer.from_pretrained('skt/kobert-base-v1')

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'XLNetTokenizer'. 
The class this function is called from is 'KoBERTTokenizer'.


In [32]:
# kobert 공식 git에 있는 get_kobert_model 선언
def get_kobert_model(model_path, vocab_file, ctx="cpu"):
    bertmodel = BertModel.from_pretrained(model_path)
    device = torch.device(ctx)
    bertmodel.to(device)
    bertmodel.eval()
    vocab_b_obj = nlp.vocab.BERTVocab.from_sentencepiece(vocab_file,
                                                         padding_token='[PAD]')
    return bertmodel, vocab_b_obj


In [33]:
from transformers import BertModel
bertmodel, vocab = get_kobert_model('skt/kobert-base-v1', tokenizer.vocab_file)
tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower = False)

코버트 특징추출기

In [34]:
class BERTFeatureExtractor(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 768,
                 num_classes = 1024,   # Feature length : 1024
                 dr_rate = None,
                 params = None):
        super(BERTFeatureExtractor, self).__init__()
        self.bert = bert
        self.dr_rate = dr_rate

        self.classifier = nn.Linear(hidden_size , num_classes)
        if dr_rate:
            self.dropout = nn.Dropout(p = dr_rate)

    def gen_attention_mask(self, token_ids, valid_length):
        attention_mask = torch.zeros_like(token_ids)
        for i, v in enumerate(valid_length):
            attention_mask[i][:v] = 1
        return attention_mask.float()

    def forward(self, token_ids, valid_length, segment_ids):
        attention_mask = self.gen_attention_mask(token_ids, valid_length)

        _, pooler = self.bert(input_ids = token_ids, token_type_ids = segment_ids.long(), attention_mask = attention_mask.float().to(token_ids.device),return_dict = False)
        if self.dr_rate:
            out = self.dropout(pooler)
        return self.classifier(out)

In [35]:
kobert = BERTFeatureExtractor(bertmodel,  dr_rate = 0.5).to(device)

모바일넷v2 특징추출기

In [36]:
mobilenetv2 = models.mobilenet_v2(pretrained = 'IMAGENET1K_V2')

In [37]:
# 이놈이 FusionModel에서 적용이 안 됨. 왜지?

#fc = nn.Sequential(
#    nn.Linear(1024, 1024),
#    nn.ReLU(),
#    nn.MaxPool1d(kernel_size=2, stride=2, padding=0),
#    nn.Linear(512, 512),
#    nn.ReLU(),
#    nn.MaxPool1d(kernel_size=2, stride=2, padding=0),
#    nn.Linear(256, 128),
#)

#mobilenetv2.fc = fc

In [38]:
mobilenetv2

MobileNetV2(
  (features): Sequential(
    (0): ConvBNActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momen

모델 정의 : Feature Fusion

In [39]:
class FusionModel(nn.Module):
  def __init__(self, mobilenetv2, kobert):
    super(FusionModel, self).__init__()

    self.mobilenetv2 = mobilenetv2
    self.kobert = kobert

    self.fc_image = nn.Linear(1000, 1024)
    self.fc_text = nn.Linear(1024, 1024)

    self.classifier = nn.Sequential(
        nn.Linear(2048, 1024),
        nn.ReLU(),
        nn.Linear(1024, 1024),
        nn.ReLU(),
        nn.Linear(1024, 6),
        nn.Softmax(1)
    )

  def forward(self, img, token_ids, valid_length, segment_ids):
        # 이미지 특징 추출
        image_feature = self.mobilenetv2(img)
        image_feature = self.fc_image(image_feature)

        # 텍스트 특징 추출
        text_feature = self.kobert(token_ids, valid_length, segment_ids)
        text_feature = self.fc_text(text_feature)

        # 두 특징을 결합
        x = torch.cat((image_feature, text_feature), dim=1)

        # 분류기 적용
        x = self.classifier(x)

        return x

In [40]:
def calc_accuracy(X,Y):
    max_vals, max_indices = torch.max(X, 1)
    train_acc = (max_indices == Y).sum().data.cpu().numpy()/max_indices.size()[0]
    return train_acc

Stratified K-Fold CV

In [41]:
from sklearn.model_selection import StratifiedKFold

cv = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 602)

In [42]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [43]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [44]:
list_train_idx = []
list_test_idx = []

for train_idx, test_idx in cv.split(df_cv_data[['annotation', 'imgpath']], df_cv_data['y']):
  list_train_idx.append(train_idx)
  list_test_idx.append(test_idx)

In [45]:
max_len = 150

In [46]:
fold = 0

list_test_history = []


for train_idx, test_idx in zip(list_train_idx, list_test_idx):

  fold += 1

  warmup_ratio = 0.1
  num_epochs = 15
  max_grad_norm = 1
  log_interval = 200
  learning_rate =  5e-5


  bertmodel, vocab = get_kobert_model('skt/kobert-base-v1', tokenizer.vocab_file)
  kobert = BERTFeatureExtractor(bertmodel,  dr_rate = 0.5).to(device)
  mobilenetv2 = models.mobilenet_v2(pretrained = 'IMAGENET1K_V2')

  model = FusionModel(mobilenetv2, kobert).to(device)

  train_data = df_cv_data.iloc[train_idx]
  test_data = df_cv_data.iloc[test_idx]

  train_dataset = RoBaMFFusionDataset(train_data, imgtransform = train_imgtransform, bert_tokenizer = tokenizer , vocab = vocab, max_len = max_len, pad = True, pair = False)
  test_dataset = RoBaMFFusionDataset(test_data, imgtransform = test_imgtransform, bert_tokenizer = tokenizer , vocab = vocab, max_len = max_len, pad = True, pair = False)

  train_dataloader = DataLoader(train_dataset, batch_size = 32, shuffle = True)
  test_dataloader = DataLoader(test_dataset, batch_size = 32, shuffle = True)


  no_decay = ['bias', 'LayerNorm.weight']
  optimizer_grouped_parameters = [
      {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
      {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
  ]

  optimizer = AdamW(optimizer_grouped_parameters, lr = learning_rate)
  loss_fn = nn.CrossEntropyLoss() # 다중분류를 위한 loss function

  t_total = len(train_dataloader) * num_epochs
  warmup_step = int(t_total * warmup_ratio)

  scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps = warmup_step, num_training_steps = t_total)



  train_history = []
  test_history = []
  loss_history = []

  for e in range(num_epochs):
      train_acc = 0.0
      test_acc = 0.0
      model.train()
      for batch_id, (img, (token_ids, valid_length, segment_ids), label) in enumerate(tqdm(train_dataloader)):
          optimizer.zero_grad()

          img = img.to(device)
          token_ids = token_ids.long().to(device)
          segment_ids = segment_ids.long().to(device)
          valid_length= valid_length
          label = label.long().to(device)
          out = model(img, token_ids, valid_length, segment_ids)

          # print(label.shape, out.shape)
          loss = loss_fn(out, label)
          loss.backward()
          torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
          optimizer.step()
          scheduler.step()  # Update learning rate schedule
          train_acc += calc_accuracy(out, label)
          if batch_id % log_interval == 0:
              print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
              train_history.append(train_acc / (batch_id+1))
              loss_history.append(loss.data.cpu().numpy())
      print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
      # train_history.append(train_acc / (batch_id+1))

      # .eval() : nn.Module에서 train time과 eval time에서 수행하는 다른 작업을 수행할 수 있도록 switching 하는 함수
      # 즉, model이 Dropout이나 BatNorm2d를 사용하는 경우, train 시에는 사용하지만 evaluation을 할 때에는 사용하지 않도록 설정해주는 함수
      model.eval()
      for batch_id, (img, (token_ids, valid_length, segment_ids), label) in enumerate(tqdm(test_dataloader)):
          img = img.to(device)
          token_ids = token_ids.long().to(device)
          segment_ids = segment_ids.long().to(device)
          valid_length = valid_length
          label = label.long().to(device)
          out = model(img, token_ids, valid_length, segment_ids)
          test_acc += calc_accuracy(out, label)
      print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))
      test_history.append(test_acc / (batch_id+1))


  # 모형 가중치 저장
  torch.save(model.state_dict(), f'/home2/jh981017/myubai/machinelearning/Model Weights/AnnotationFusionWeight{fold}.pth')

  list_test_history.append(test_history)




  0%|          | 0/180 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.788845181465149 train acc 0.15625




epoch 1 train acc 0.2777777777777778


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 1 test acc 0.43125


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 1.520765781402588 train acc 0.5
epoch 2 train acc 0.47708333333333336


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 2 test acc 0.5215277777777778


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 1.6176252365112305 train acc 0.375
epoch 3 train acc 0.56875


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 3 test acc 0.5847222222222223


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 1.4121330976486206 train acc 0.625
epoch 4 train acc 0.609375


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 4 test acc 0.6104166666666667


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 1.374448537826538 train acc 0.6875
epoch 5 train acc 0.621875


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 5 test acc 0.5354166666666667


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 6 batch id 1 loss 1.4496021270751953 train acc 0.59375
epoch 6 train acc 0.6487847222222223


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 6 test acc 0.6076388888888888


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 7 batch id 1 loss 1.2758359909057617 train acc 0.75
epoch 7 train acc 0.6833333333333333


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 7 test acc 0.6104166666666667


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 8 batch id 1 loss 1.3266969919204712 train acc 0.71875
epoch 8 train acc 0.7107638888888889


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 8 test acc 0.6333333333333333


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 9 batch id 1 loss 1.2942078113555908 train acc 0.75
epoch 9 train acc 0.7368055555555556


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 9 test acc 0.6256944444444444


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 10 batch id 1 loss 1.3580223321914673 train acc 0.6875
epoch 10 train acc 0.7520833333333333


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 10 test acc 0.6236111111111111


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 11 batch id 1 loss 1.2822178602218628 train acc 0.78125
epoch 11 train acc 0.7652777777777777


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 11 test acc 0.6270833333333333


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 12 batch id 1 loss 1.1688281297683716 train acc 0.875
epoch 12 train acc 0.7789930555555555


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 12 test acc 0.6256944444444444


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 13 batch id 1 loss 1.200809359550476 train acc 0.84375
epoch 13 train acc 0.7907986111111112


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 13 test acc 0.6270833333333333


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 14 batch id 1 loss 1.2623547315597534 train acc 0.78125
epoch 14 train acc 0.7934027777777778


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 14 test acc 0.6229166666666667


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 15 batch id 1 loss 1.2038949728012085 train acc 0.84375
epoch 15 train acc 0.7958333333333333


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 15 test acc 0.6243055555555556




  0%|          | 0/180 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.7942864894866943 train acc 0.0625




epoch 1 train acc 0.2767361111111111


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 1 test acc 0.4076388888888889


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 1.6873104572296143 train acc 0.375
epoch 2 train acc 0.4420138888888889


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 2 test acc 0.44722222222222224


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 1.3766860961914062 train acc 0.6875
epoch 3 train acc 0.5423611111111111


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 3 test acc 0.5416666666666666


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 1.4564402103424072 train acc 0.59375
epoch 4 train acc 0.5911458333333334


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 4 test acc 0.5458333333333333


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 1.420843482017517 train acc 0.59375
epoch 5 train acc 0.6614583333333334


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 5 test acc 0.5805555555555556


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 6 batch id 1 loss 1.3762383460998535 train acc 0.65625
epoch 6 train acc 0.6963541666666667


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 6 test acc 0.575


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 7 batch id 1 loss 1.5160917043685913 train acc 0.53125
epoch 7 train acc 0.7145833333333333


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 7 test acc 0.5729166666666666


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 8 batch id 1 loss 1.275832176208496 train acc 0.75
epoch 8 train acc 0.7324652777777778


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 8 test acc 0.5680555555555555


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 9 batch id 1 loss 1.2352688312530518 train acc 0.8125
epoch 9 train acc 0.7701388888888889


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 9 test acc 0.5722222222222222


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 10 batch id 1 loss 1.259718418121338 train acc 0.78125
epoch 10 train acc 0.790625


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 10 test acc 0.5902777777777778


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 11 batch id 1 loss 1.360174536705017 train acc 0.6875
epoch 11 train acc 0.796875


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 11 test acc 0.5923611111111111


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 12 batch id 1 loss 1.1890501976013184 train acc 0.84375
epoch 12 train acc 0.8137152777777777


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 12 test acc 0.5923611111111111


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 13 batch id 1 loss 1.1681592464447021 train acc 0.875
epoch 13 train acc 0.8213541666666667


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 13 test acc 0.5895833333333333


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 14 batch id 1 loss 1.3108365535736084 train acc 0.71875
epoch 14 train acc 0.825


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 14 test acc 0.5909722222222222


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 15 batch id 1 loss 1.2108880281448364 train acc 0.84375
epoch 15 train acc 0.8274305555555556


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 15 test acc 0.5909722222222222




  0%|          | 0/180 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.7946147918701172 train acc 0.15625




epoch 1 train acc 0.2838541666666667


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 1 test acc 0.3854166666666667


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 1.7318332195281982 train acc 0.21875
epoch 2 train acc 0.44722222222222224


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 2 test acc 0.4791666666666667


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 1.5638691186904907 train acc 0.46875
epoch 3 train acc 0.5328125


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 3 test acc 0.5208333333333334


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 1.4887256622314453 train acc 0.5625
epoch 4 train acc 0.6201388888888889


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 4 test acc 0.5673611111111111


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 1.2938337326049805 train acc 0.75
epoch 5 train acc 0.6592013888888889


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 5 test acc 0.5861111111111111


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 6 batch id 1 loss 1.4220232963562012 train acc 0.625
epoch 6 train acc 0.6958333333333333


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 6 test acc 0.6076388888888888


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 7 batch id 1 loss 1.3520524501800537 train acc 0.6875
epoch 7 train acc 0.7163194444444444


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 7 test acc 0.6152777777777778


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 8 batch id 1 loss 1.2183791399002075 train acc 0.8125
epoch 8 train acc 0.7423611111111111


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 8 test acc 0.6048611111111111


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 9 batch id 1 loss 1.324764370918274 train acc 0.71875
epoch 9 train acc 0.7628472222222222


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 9 test acc 0.6118055555555556


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 10 batch id 1 loss 1.1370139122009277 train acc 0.90625
epoch 10 train acc 0.7805555555555556


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 10 test acc 0.6236111111111111


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 11 batch id 1 loss 1.3368698358535767 train acc 0.6875
epoch 11 train acc 0.7932291666666667


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 11 test acc 0.6194444444444445


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 12 batch id 1 loss 1.2310588359832764 train acc 0.8125
epoch 12 train acc 0.8043402777777777


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 12 test acc 0.6319444444444444


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 13 batch id 1 loss 1.2321362495422363 train acc 0.8125
epoch 13 train acc 0.8126736111111111


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 13 test acc 0.6277777777777778


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 14 batch id 1 loss 1.1996310949325562 train acc 0.84375
epoch 14 train acc 0.8154513888888889


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 14 test acc 0.6277777777777778


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 15 batch id 1 loss 1.0852913856506348 train acc 0.96875
epoch 15 train acc 0.8168402777777778


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 15 test acc 0.6243055555555556




  0%|          | 0/180 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.7945036888122559 train acc 0.1875




epoch 1 train acc 0.2689236111111111


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 1 test acc 0.4409722222222222


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 1.5959125757217407 train acc 0.4375
epoch 2 train acc 0.4366319444444444


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 2 test acc 0.47291666666666665


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 1.4310173988342285 train acc 0.625
epoch 3 train acc 0.5644097222222222


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 3 test acc 0.5659722222222222


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 1.280019760131836 train acc 0.75
epoch 4 train acc 0.6131944444444445


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 4 test acc 0.5798611111111112


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 1.2883039712905884 train acc 0.78125
epoch 5 train acc 0.6588541666666666


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 5 test acc 0.5958333333333333


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 6 batch id 1 loss 1.5106310844421387 train acc 0.53125
epoch 6 train acc 0.7060763888888889


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 6 test acc 0.6034722222222222


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 7 batch id 1 loss 1.359185814857483 train acc 0.6875
epoch 7 train acc 0.7157986111111111


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 7 test acc 0.5895833333333333


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 8 batch id 1 loss 1.2280573844909668 train acc 0.8125
epoch 8 train acc 0.7454861111111111


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 8 test acc 0.6222222222222222


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 9 batch id 1 loss 1.2671951055526733 train acc 0.78125
epoch 9 train acc 0.7616319444444445


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 9 test acc 0.6111111111111112


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 10 batch id 1 loss 1.3646984100341797 train acc 0.6875
epoch 10 train acc 0.7814236111111111


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 10 test acc 0.6159722222222223


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 11 batch id 1 loss 1.2614083290100098 train acc 0.78125
epoch 11 train acc 0.7960069444444444


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 11 test acc 0.6208333333333333


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 12 batch id 1 loss 1.2398916482925415 train acc 0.8125
epoch 12 train acc 0.8043402777777777


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 12 test acc 0.6159722222222223


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 13 batch id 1 loss 1.1075440645217896 train acc 0.9375
epoch 13 train acc 0.8135416666666667


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 13 test acc 0.6131944444444445


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 14 batch id 1 loss 1.3231744766235352 train acc 0.71875
epoch 14 train acc 0.8168402777777778


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 14 test acc 0.6145833333333334


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 15 batch id 1 loss 1.1145575046539307 train acc 0.9375
epoch 15 train acc 0.8192708333333333


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 15 test acc 0.6159722222222223




  0%|          | 0/180 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.7956881523132324 train acc 0.09375




epoch 1 train acc 0.28194444444444444


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 1 test acc 0.4041666666666667


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 1.5703402757644653 train acc 0.46875
epoch 2 train acc 0.4810763888888889


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 2 test acc 0.51875


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 1.4077978134155273 train acc 0.625
epoch 3 train acc 0.5932291666666667


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 3 test acc 0.5708333333333333


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 1.5320348739624023 train acc 0.46875
epoch 4 train acc 0.5920138888888888


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 4 test acc 0.5402777777777777


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 1.4166157245635986 train acc 0.625
epoch 5 train acc 0.6359375


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 5 test acc 0.5972222222222222


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 6 batch id 1 loss 1.3641077280044556 train acc 0.6875
epoch 6 train acc 0.6717013888888889


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 6 test acc 0.6215277777777778


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 7 batch id 1 loss 1.3618711233139038 train acc 0.6875
epoch 7 train acc 0.7015625


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 7 test acc 0.6159722222222223


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 8 batch id 1 loss 1.2727162837982178 train acc 0.78125
epoch 8 train acc 0.7276041666666667


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 8 test acc 0.625


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 9 batch id 1 loss 1.28423011302948 train acc 0.75
epoch 9 train acc 0.7451388888888889


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 9 test acc 0.6270833333333333


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 10 batch id 1 loss 1.2617595195770264 train acc 0.78125
epoch 10 train acc 0.7534722222222222


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 10 test acc 0.6159722222222223


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 11 batch id 1 loss 1.3624929189682007 train acc 0.6875
epoch 11 train acc 0.7647569444444444


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 11 test acc 0.6263888888888889


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 12 batch id 1 loss 1.3251214027404785 train acc 0.71875
epoch 12 train acc 0.7753472222222222


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 12 test acc 0.6215277777777778


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 13 batch id 1 loss 1.2291791439056396 train acc 0.8125
epoch 13 train acc 0.7862847222222222


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 13 test acc 0.6215277777777778


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 14 batch id 1 loss 1.2586429119110107 train acc 0.78125
epoch 14 train acc 0.7902777777777777


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 14 test acc 0.6236111111111111


  0%|          | 0/180 [00:00<?, ?it/s]

epoch 15 batch id 1 loss 1.323671579360962 train acc 0.71875
epoch 15 train acc 0.79375


  0%|          | 0/45 [00:00<?, ?it/s]

epoch 15 test acc 0.6215277777777778


In [47]:
df_test_history = pd.DataFrame(list_test_history)
df_test_history.to_csv('/home2/jh981017/myubai/machinelearning/Baseline CVs/Annotation Fusion.csv')

In [48]:
df_test_history

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,0.43125,0.521528,0.584722,0.610417,0.535417,0.607639,0.610417,0.633333,0.625694,0.623611,0.627083,0.625694,0.627083,0.622917,0.624306
1,0.407639,0.447222,0.541667,0.545833,0.580556,0.575,0.572917,0.568056,0.572222,0.590278,0.592361,0.592361,0.589583,0.590972,0.590972
2,0.385417,0.479167,0.520833,0.567361,0.586111,0.607639,0.615278,0.604861,0.611806,0.623611,0.619444,0.631944,0.627778,0.627778,0.624306
3,0.440972,0.472917,0.565972,0.579861,0.595833,0.603472,0.589583,0.622222,0.611111,0.615972,0.620833,0.615972,0.613194,0.614583,0.615972
4,0.404167,0.51875,0.570833,0.540278,0.597222,0.621528,0.615972,0.625,0.627083,0.615972,0.626389,0.621528,0.621528,0.623611,0.621528


New Data Prediction

In [49]:
max_len = 150

In [50]:
warmup_ratio = 0.1
num_epochs = 12
max_grad_norm = 1
log_interval = 200
learning_rate =  5e-5

In [52]:
bertmodel, vocab = get_kobert_model('skt/kobert-base-v1', tokenizer.vocab_file)
kobert = BERTFeatureExtractor(bertmodel,  dr_rate = 0.5).to(device)
# mobilenetv2 = models.mobilenet_v2(weights = 'DEFAULT')
mobilenetv2 = models.mobilenet_v2(pretrained = 'IMAGENET1K_V2')

model = FusionModel(mobilenetv2, kobert).to(device)

train_data = df_cv_data
test_data = df_new_data

train_dataset = RoBaMFFusionDataset(train_data, imgtransform = train_imgtransform, bert_tokenizer = tokenizer , vocab = vocab, max_len = max_len, pad = True, pair = False)
test_dataset = RoBaMFFusionDataset(test_data, imgtransform = test_imgtransform, bert_tokenizer = tokenizer , vocab = vocab, max_len = max_len, pad = True, pair = False)

train_dataloader = DataLoader(train_dataset, batch_size = 32, shuffle = True)
test_dataloader = DataLoader(test_dataset, batch_size = 32, shuffle = True)


no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [
    {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
    {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
]

optimizer = AdamW(optimizer_grouped_parameters, lr = learning_rate)
loss_fn = nn.CrossEntropyLoss() # 다중분류를 위한 loss function

t_total = len(train_dataloader) * num_epochs
warmup_step = int(t_total * warmup_ratio)

scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps = warmup_step, num_training_steps = t_total)



train_history = []
test_history = []
loss_history = []

for e in range(num_epochs):
    train_acc = 0.0
    test_acc = 0.0
    model.train()
    for batch_id, (img, (token_ids, valid_length, segment_ids), label) in enumerate(tqdm(train_dataloader)):
        optimizer.zero_grad()

        img = img.to(device)
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length= valid_length
        label = label.long().to(device)
        out = model(img, token_ids, valid_length, segment_ids)

        # print(label.shape, out.shape)
        loss = loss_fn(out, label)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        scheduler.step()  # Update learning rate schedule
        train_acc += calc_accuracy(out, label)
        if batch_id % log_interval == 0:
            print("epoch {} batch id {} loss {} train acc {}".format(e+1, batch_id+1, loss.data.cpu().numpy(), train_acc / (batch_id+1)))
            train_history.append(train_acc / (batch_id+1))
            loss_history.append(loss.data.cpu().numpy())
    print("epoch {} train acc {}".format(e+1, train_acc / (batch_id+1)))
    # train_history.append(train_acc / (batch_id+1))

    # .eval() : nn.Module에서 train time과 eval time에서 수행하는 다른 작업을 수행할 수 있도록 switching 하는 함수
    # 즉, model이 Dropout이나 BatNorm2d를 사용하는 경우, train 시에는 사용하지만 evaluation을 할 때에는 사용하지 않도록 설정해주는 함수
    model.eval()
    for batch_id, (img, (token_ids, valid_length, segment_ids), label) in enumerate(tqdm(test_dataloader)):
        img = img.to(device)
        token_ids = token_ids.long().to(device)
        segment_ids = segment_ids.long().to(device)
        valid_length = valid_length
        label = label.long().to(device)
        out = model(img, token_ids, valid_length, segment_ids)
        test_acc += calc_accuracy(out, label)
    print("epoch {} test acc {}".format(e+1, test_acc / (batch_id+1)))
    test_history.append(test_acc / (batch_id+1))



  0%|          | 0/225 [00:00<?, ?it/s]

epoch 1 batch id 1 loss 1.7909997701644897 train acc 0.1875




epoch 1 batch id 201 loss 1.5934830904006958 train acc 0.316386815920398
epoch 1 train acc 0.3308333333333333


  0%|          | 0/179 [00:00<?, ?it/s]

epoch 1 test acc 0.4248719739292365


  0%|          | 0/225 [00:00<?, ?it/s]

epoch 2 batch id 1 loss 1.4789628982543945 train acc 0.5625
epoch 2 batch id 201 loss 1.4176770448684692 train acc 0.5290733830845771
epoch 2 train acc 0.5320833333333334


  0%|          | 0/179 [00:00<?, ?it/s]

epoch 2 test acc 0.5268854748603352


  0%|          | 0/225 [00:00<?, ?it/s]

epoch 3 batch id 1 loss 1.335497260093689 train acc 0.71875
epoch 3 batch id 201 loss 1.3021266460418701 train acc 0.6047885572139303
epoch 3 train acc 0.6054166666666667


  0%|          | 0/179 [00:00<?, ?it/s]

epoch 3 test acc 0.5471368715083799


  0%|          | 0/225 [00:00<?, ?it/s]

epoch 4 batch id 1 loss 1.5135822296142578 train acc 0.5
epoch 4 batch id 201 loss 1.3833659887313843 train acc 0.6201803482587065
epoch 4 train acc 0.6152777777777778


  0%|          | 0/179 [00:00<?, ?it/s]

epoch 4 test acc 0.5420158286778398


  0%|          | 0/225 [00:00<?, ?it/s]

epoch 5 batch id 1 loss 1.322664499282837 train acc 0.71875
epoch 5 batch id 201 loss 1.3966560363769531 train acc 0.6551616915422885
epoch 5 train acc 0.6559722222222222


  0%|          | 0/179 [00:00<?, ?it/s]

epoch 5 test acc 0.5827513966480447


  0%|          | 0/225 [00:00<?, ?it/s]

epoch 6 batch id 1 loss 1.2477887868881226 train acc 0.8125
epoch 6 batch id 201 loss 1.4989666938781738 train acc 0.6777052238805971
epoch 6 train acc 0.6790277777777778


  0%|          | 0/179 [00:00<?, ?it/s]

epoch 6 test acc 0.589851024208566


  0%|          | 0/225 [00:00<?, ?it/s]

epoch 7 batch id 1 loss 1.292802095413208 train acc 0.75
epoch 7 batch id 201 loss 1.2635866403579712 train acc 0.6962064676616916
epoch 7 train acc 0.6956944444444444


  0%|          | 0/179 [00:00<?, ?it/s]

epoch 7 test acc 0.6109753258845437


  0%|          | 0/225 [00:00<?, ?it/s]

epoch 8 batch id 1 loss 1.184408187866211 train acc 0.84375
epoch 8 batch id 201 loss 1.3016223907470703 train acc 0.7232587064676617
epoch 8 train acc 0.7240277777777778


  0%|          | 0/179 [00:00<?, ?it/s]

epoch 8 test acc 0.6222067039106145


  0%|          | 0/225 [00:00<?, ?it/s]

epoch 9 batch id 1 loss 1.4197204113006592 train acc 0.625
epoch 9 batch id 201 loss 1.3766175508499146 train acc 0.7439365671641791
epoch 9 train acc 0.7452777777777778


  0%|          | 0/179 [00:00<?, ?it/s]

epoch 9 test acc 0.6285498137802608


  0%|          | 0/225 [00:00<?, ?it/s]

epoch 10 batch id 1 loss 1.3019431829452515 train acc 0.75
epoch 10 batch id 201 loss 1.2153455018997192 train acc 0.7548196517412935
epoch 10 train acc 0.7580555555555556


  0%|          | 0/179 [00:00<?, ?it/s]

epoch 10 test acc 0.6268621973929236


  0%|          | 0/225 [00:00<?, ?it/s]

epoch 11 batch id 1 loss 1.1341066360473633 train acc 0.90625
epoch 11 batch id 201 loss 1.411528468132019 train acc 0.7686567164179104
epoch 11 train acc 0.7679166666666667


  0%|          | 0/179 [00:00<?, ?it/s]

epoch 11 test acc 0.6278514897579144


  0%|          | 0/225 [00:00<?, ?it/s]

epoch 12 batch id 1 loss 1.0749378204345703 train acc 0.96875
epoch 12 batch id 201 loss 1.28717041015625 train acc 0.7761194029850746
epoch 12 train acc 0.7729166666666667


  0%|          | 0/179 [00:00<?, ?it/s]

epoch 12 test acc 0.6282006517690876


In [53]:
df_test_history = pd.DataFrame(test_history)
df_test_history.to_csv('/home2/jh981017/myubai/machinelearning/Baseline CVs/Annotation Fusion_new.csv')