### Setting.py

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)
[K     |████████████████████████████████| 3.5 MB 16.3 MB/s 
[?25hCollecting tokenizers!=0.11.3,>=0.10.1
  Downloading tokenizers-0.11.5-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.8 MB)
[K     |████████████████████████████████| 6.8 MB 72.7 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 62.7 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 5.9 MB/s 
Collecting sacremoses
  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 76.3 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transformers
  Attempting uninstall: pyyaml
 

In [3]:
import pandas as pd 
import numpy as np 
import os
import torch
import torch.nn as nn

import warnings 
warnings.filterwarnings("ignore")
from tqdm import tqdm
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
from transformers import AutoModel, AutoTokenizer, AdamW
from transformers.optimization import get_cosine_schedule_with_warmup, get_linear_schedule_with_warmup
import re
from sklearn.model_selection import train_test_split

In [4]:
# Random Seed Fix
import random
def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  
    torch.backends.cudnn.deterministic = True  
    torch.backends.cudnn.benchmark = True  
seed_everything()

In [5]:
def load_data(path):
  TRAIN = os.path.join(path, 'train_data.csv')
  TEST = os.path.join(path, 'test_data.csv')
  SS = os.path.join(path, 'sample_submission.csv')
  label_dict = {"entailment" : 0, "contradiction" : 1, "neutral" : 2}
  train = pd.read_csv(TRAIN)
  test = pd.read_csv(TEST)
  sample_submission = pd.read_csv(SS)
  train['label'] = train['label'].map(label_dict)

  return train,test,sample_submission

def text_clean(df):
  #=df["premise_"] = "<s>" + df["premise"].astype(str) + "[SEP]"
  df["premise_"] = "[CLS]"+df["premise"].astype(str)
  #df["hypothesis_"] = df["hypothesis"].astype(str) + "[SEP]"
  df["hypothesis_"] = df["hypothesis"].astype(str) + "[SEP]"
  df["text_sum"] = df.premise_ + "[SEP]" + df.hypothesis_
  df = df[['text_sum','label']]
  return df 

def random_deletion(sentence, p=0.2):
    words = sentence.split ()
    n = len (words)
    if n == 1: # return if single word
        return words
    remaining = list(filter(lambda x: random.uniform(0,1) > p,words))
    #print (remaining) 
    if len(remaining) == 0: # if not left, sample a random word
        return ' '.join ([random.choice(words)])
    else:
        return ' '.join (remaining)

def random_swap(sentence, n=2):
    sentence = sentence.split () 
    length = range(len(sentence))
    swapped = []
    if len(sentence) >2:
      for _ in range(n):
          idx1, idx2 = random.sample(length, 2)
          swapped.append ([sentence[idx1], sentence[idx2]])
          sentence[idx1], sentence[idx2] = sentence[idx2], sentence[idx1] 
    return ' '.join (sentence)

def eda_aug(df):

    cache = {'premise':[], 'hypothesis':[], 'label':[]}
    for idx in tqdm(range(len(df))):
        premise = df.iloc[idx]['premise']
        hypothesis = df.iloc[idx]['hypothesis']
        label = df.iloc[idx]['label']
        cache['premise'].append(premise)
        cache['hypothesis'].append(hypothesis)
        cache['label'].append(label)
        flag = random.randrange(10)
        if flag < 2:
          cache['premise'].append(random_deletion(premise))
          cache['hypothesis'].append(random_deletion(hypothesis))
          cache['label'].append(label)
          cache['premise'].append(random_swap(premise))
          cache['hypothesis'].append(random_swap(hypothesis))
          cache['label'].append(label)
    
    return pd.DataFrame(cache)

### Dataset.py

In [6]:
#ROOT = '/content/drive/MyDrive/DACON_MONTHLYNLI'
DATA = '/content/drive/MyDrive/DACON/sentence_relation/'
train,test,sample_submission = load_data(DATA)
###### AUGMENTATION ######
#train = eda_aug(train)
###### AUGMENTATION ######

clean_train,clean_test  = text_clean(train),text_clean(test)
display(clean_train)

Unnamed: 0,text_sum,label
0,"[CLS]씨름은 상고시대로부터 전해져 내려오는 남자들의 대표적인 놀이로서, 소년이나...",1
1,[CLS]삼성은 자작극을 벌인 2명에게 형사 고소 등의 법적 대응을 검토 중이라고 ...,1
2,[CLS]이를 위해 예측적 범죄예방 시스템을 구축하고 고도화한다.[SEP]예측적 범...,0
3,[CLS]광주광역시가 재개발 정비사업 원주민들에 대한 종합대책을 마련하는 등 원주민...,2
4,"[CLS]진정 소비자와 직원들에게 사랑 받는 기업으로 오래 지속되고 싶으면, 이런 ...",2
...,...,...
24993,"[CLS]오페라에 비하여 오라토리오에서는 독창보다도 합창이 중시되며, 테스토 또는 ...",2
24994,[CLS]지하철역까지 걸어서 5분 정도 걸립니다.[SEP]지하철역까지 도보로 5분 ...,0
24995,[CLS]한편 이날 중앙방역대책본부는 집단 감염이 발생한 음식점 관련 역학조사 결과...,1
24996,[CLS]마미손이 랩을 하자 시청자들은 그의 정체를 파악했다.[SEP]시청자들은 마...,0


In [7]:
model_roberta = AutoModel.from_pretrained("klue/roberta-large")
tokenizer_roberta = AutoTokenizer.from_pretrained("klue/roberta-large")

Downloading:   0%|          | 0.00/547 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.25G [00:00<?, ?B/s]

Some weights of the model checkpoint at klue/roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.decoder.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at klue/roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it f

Downloading:   0%|          | 0.00/375 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/243k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/734k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/173 [00:00<?, ?B/s]

In [8]:
def roberta_transform(text):
  transform = tokenizer_roberta(text,
                                pad_to_max_length=True,
                               truncation=True,
                               max_length=256,
                               return_tensors='pt',
                                add_special_tokens=False)
  return transform

In [9]:
class customDataset(Dataset):
  def __init__(self,dataset,mode='train',transform=roberta_transform):
    super(customDataset, self).__init__()
    self.mode = mode
    self.dataset = dataset.reset_index(drop=True)
    self.transform = transform

  def __getitem__(self, idx):
    
    text = self.dataset['text_sum'][idx]
    tokens = self.transform(text)
    token_ids = tokens['input_ids'][0]  # tensor of token ids
    attn_masks = tokens['attention_mask'][0]  # binary tensor with "0" for padded values and "1" for the other values
    token_type_ids = tokens['token_type_ids'][0]  # binary tensor with "0" for the 1st sentence tokens & "1" for the 2nd sentence tokens

    if self.mode == 'test':
      return token_ids,attn_masks,token_type_ids
    else: 
      labels = self.dataset['label'][idx]
      return token_ids,attn_masks,token_type_ids, labels
  
  def __len__(self):
    return(len(self.dataset))

### Model.py

In [10]:
class ROBERTaClassifier(nn.Module):
    def __init__(self,
                 bert,
                 hidden_size = 1024,
                 num_classes=3,   ##클래스 수 조정##
                 params=None,
                 freeze_bert=False):
        super(ROBERTaClassifier, self).__init__()
        self.bert = bert
        self.freeze_bert=freeze_bert

        if self.freeze_bert:
            for p in self.bert.parameters():
                p.requires_grad = False

                 
        self.classifier = nn.Linear(hidden_size , 256)
        self.dropout = nn.Dropout(p=0.5)
        self.fc_layer1 = nn.Linear(256,128)
        self.fc_layer2 = nn.Linear(128,num_classes)
    

    def forward(self, input_ids, attn_masks):
        
        _,pooler = self.bert(input_ids, attn_masks, return_dict=False)
        output1 = self.classifier(pooler)
        output2 = self.fc_layer1(output1)
        output3 = self.fc_layer2(self.dropout(output2))
        return (output3)

### Train.py

In [11]:
device = torch.device("cuda")
!nvidia-smi

Sun Feb 27 00:55:13 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    24W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [12]:
###### HYPERPARMS ######
lr = 2e-5

batch_size= 12
warmup_ratio = 0.06
num_epochs = 15

log_interval = 500
#learning_rate = 5e-6

In [13]:
############### CV ################
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits = 5,shuffle=True,random_state=42)
folds=[]
for trn_idx,val_idx in skf.split(clean_train['text_sum'], train['label']):
    folds.append((trn_idx,val_idx))

In [14]:


############### CV Training ###############
best_models = []


for i,fold in enumerate(range(0,5)):
    print('===============',i+1,'fold start===============')
    model = ROBERTaClassifier(model_roberta).to(device)
    model=nn.DataParallel(model).to(device)
    optimizer = AdamW(model.parameters(), lr=lr)

    train_idx = folds[fold][0]
    valid_idx = folds[fold][1]
    train_data = train.loc[train_idx]
    val_data = train.loc[valid_idx]
    train_dataset = customDataset(train_data,'train')
    valid_dataset = customDataset(val_data,'train')
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
    total_steps = len(train_loader) * num_epochs
    warmup_step = int(total_steps * warmup_ratio)
    scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=5, num_training_steps=total_steps)

    
    for epoch in range(num_epochs):
        batches = 0
        total_loss = 0.0
        correct = 0
        total =0
        model.train()
        
        for input_ids_batch, attention_masks_batch, token_type_ids_batch, y_batch in tqdm(train_loader):
            optimizer.zero_grad()
            y_batch = y_batch.to(device)
            y_pred = model(input_ids_batch.to(device),attn_masks= attention_masks_batch.to(device))
            loss = F.cross_entropy(y_pred, y_batch)
            loss.backward()
            #torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
            optimizer.step()
            scheduler.step()
            total_loss += loss.item()
            _, predicted = torch.max(y_pred, 1)
            correct += (predicted == y_batch).sum()
            total += len(y_batch)
            batches += 1
            if batches % log_interval == 0:
                print("Batch Loss: ", total_loss / batches, "Accuracy: ", correct.float() / total)
        
        val_loss = []
        val_acc = []
        
        for input_ids_batch, attention_masks_batch, token_type_ids, y_batch in tqdm(valid_loader):
            
            model.eval()
            with torch.no_grad():
                
                y_pred = model(input_ids_batch.to(device),attn_masks= attention_masks_batch.to(device))
                valid_loss = F.cross_entropy(y_pred,y_batch.to(device)).cpu().detach().numpy()

                preds = torch.argmax(y_pred,1)
                preds = preds.cpu().detach().numpy()
                y_batch = y_batch.cpu().detach().numpy()
                batch_acc = (preds==y_batch).mean()
                val_loss.append(valid_loss)
                val_acc.append(batch_acc)
                
        
        val_loss = np.mean(val_loss)
        val_acc = np.mean(val_acc)
        
        print(f'Epoch: {epoch} - valid Loss: {val_loss:.6f} - valid_acc : {val_acc:.6f}')
        print(optimizer.param_groups[0]["lr"])
        if epoch % 3 == 0:
            valid_acc_max = val_acc
            best_models.append(model)
            torch.save(model.state_dict(), f'/content/drive/MyDrive/DACON/sentence_relation/roberta_large/ROBERTa_large_fold_{fold}_{epoch}.pth') 
            print('model save, model val acc : ',val_acc)
            print('best_models size : ',len(best_models))



 30%|██▉       | 500/1667 [04:37<10:55,  1.78it/s]

Batch Loss:  0.7149811048358679 Accuracy:  tensor(0.7002, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:15<06:10,  1.80it/s]

Batch Loss:  0.5899571269229055 Accuracy:  tensor(0.7680, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:53<01:33,  1.80it/s]

Batch Loss:  0.5278077414954702 Accuracy:  tensor(0.7979, device='cuda:0')


100%|██████████| 1667/1667 [15:26<00:00,  1.80it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 0 - valid Loss: 0.332835 - valid_acc : 0.878797
1.978269358317465e-05
model save, model val acc :  0.8787969624300559
best_models size :  1


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.25897250760532914 Accuracy:  tensor(0.9135, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:16<06:11,  1.79it/s]

Batch Loss:  0.26141204410512 Accuracy:  tensor(0.9141, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:54<01:33,  1.79it/s]

Batch Loss:  0.2629968681304405 Accuracy:  tensor(0.9126, device='cuda:0')


100%|██████████| 1667/1667 [15:27<00:00,  1.80it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 1 - valid Loss: 0.307972 - valid_acc : 0.892686
1.913766807666949e-05


 30%|██▉       | 500/1667 [04:38<10:50,  1.80it/s]

Batch Loss:  0.17419662966392935 Accuracy:  tensor(0.9432, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:16<06:11,  1.79it/s]

Batch Loss:  0.16883877022843807 Accuracy:  tensor(0.9456, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.80it/s]

Batch Loss:  0.16996211015277854 Accuracy:  tensor(0.9456, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.80it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 2 - valid Loss: 0.332927 - valid_acc : 0.895983
1.8093123452515122e-05


 30%|██▉       | 500/1667 [04:38<10:50,  1.79it/s]

Batch Loss:  0.11328216261602939 Accuracy:  tensor(0.9652, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:16<06:12,  1.79it/s]

Batch Loss:  0.11349625711026602 Accuracy:  tensor(0.9640, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.11320022135988499 Accuracy:  tensor(0.9637, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.80it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 3 - valid Loss: 0.376900 - valid_acc : 0.901978
1.6694729518706965e-05
model save, model val acc :  0.9019784172661871
best_models size :  2


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.0746842005988583 Accuracy:  tensor(0.9768, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.07531928739184514 Accuracy:  tensor(0.9773, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.07645559831879412 Accuracy:  tensor(0.9768, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 4 - valid Loss: 0.392672 - valid_acc : 0.901978
1.500362715997331e-05


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.050362049964722246 Accuracy:  tensor(0.9853, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.06545024614641443 Accuracy:  tensor(0.9807, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.06371009870781563 Accuracy:  tensor(0.9814, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 5 - valid Loss: 0.399168 - valid_acc : 0.898781
1.3093755122671206e-05


 30%|██▉       | 500/1667 [04:39<10:51,  1.79it/s]

Batch Loss:  0.039423142169602216 Accuracy:  tensor(0.9888, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.03633697707520332 Accuracy:  tensor(0.9894, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.03621924653024568 Accuracy:  tensor(0.9896, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 6 - valid Loss: 0.435754 - valid_acc : 0.902278
1.1048617248783491e-05
model save, model val acc :  0.9022781774580336
best_models size :  3


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.024334978940780273 Accuracy:  tensor(0.9935, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.025308463590423345 Accuracy:  tensor(0.9923, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.02985953018348664 Accuracy:  tensor(0.9917, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.80it/s]
100%|██████████| 417/417 [01:16<00:00,  5.43it/s]


Epoch: 7 - valid Loss: 0.454892 - valid_acc : 0.901878
8.957631502717603e-06


 30%|██▉       | 500/1667 [04:38<10:50,  1.79it/s]

Batch Loss:  0.020837711859494446 Accuracy:  tensor(0.9955, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:16<06:11,  1.79it/s]

Batch Loss:  0.020281107237853577 Accuracy:  tensor(0.9956, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:32,  1.80it/s]

Batch Loss:  0.01978312414692482 Accuracy:  tensor(0.9954, device='cuda:0')


100%|██████████| 1667/1667 [15:27<00:00,  1.80it/s]
100%|██████████| 417/417 [01:16<00:00,  5.43it/s]


Epoch: 8 - valid Loss: 0.584713 - valid_acc : 0.895484
6.91222041955317e-06


 30%|██▉       | 500/1667 [04:38<10:49,  1.80it/s]

Batch Loss:  0.01166512821233482 Accuracy:  tensor(0.9967, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:16<06:12,  1.79it/s]

Batch Loss:  0.011964948497814475 Accuracy:  tensor(0.9965, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.011189258266211255 Accuracy:  tensor(0.9970, device='cuda:0')


100%|██████████| 1667/1667 [15:27<00:00,  1.80it/s]
100%|██████████| 417/417 [01:16<00:00,  5.42it/s]


Epoch: 9 - valid Loss: 0.566180 - valid_acc : 0.904876
5.001813909013246e-06
model save, model val acc :  0.9048760991207034
best_models size :  4


 30%|██▉       | 500/1667 [04:38<10:50,  1.79it/s]

Batch Loss:  0.009630309315863997 Accuracy:  tensor(0.9973, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:16<06:12,  1.79it/s]

Batch Loss:  0.009992741602298337 Accuracy:  tensor(0.9972, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.009098429964098613 Accuracy:  tensor(0.9976, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.80it/s]
100%|██████████| 417/417 [01:17<00:00,  5.41it/s]


Epoch: 10 - valid Loss: 0.553649 - valid_acc : 0.908973
3.309939181436199e-06


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.0030897028126055373 Accuracy:  tensor(0.9993, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:16<06:12,  1.79it/s]

Batch Loss:  0.002413703983249434 Accuracy:  tensor(0.9994, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.002652785499175176 Accuracy:  tensor(0.9992, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.80it/s]
100%|██████████| 417/417 [01:16<00:00,  5.42it/s]


Epoch: 11 - valid Loss: 0.600708 - valid_acc : 0.907974
1.9105687528581995e-06


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.0010610917617595987 Accuracy:  tensor(0.9997, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.0018394469172053505 Accuracy:  tensor(0.9997, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.80it/s]

Batch Loss:  0.002373626923945267 Accuracy:  tensor(0.9996, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.80it/s]
100%|██████████| 417/417 [01:16<00:00,  5.43it/s]


Epoch: 12 - valid Loss: 0.627398 - valid_acc : 0.908173
8.648862025249527e-07
model save, model val acc :  0.9081734612310152
best_models size :  5


 30%|██▉       | 500/1667 [04:38<10:50,  1.79it/s]

Batch Loss:  0.0008074049342540093 Accuracy:  tensor(0.9998, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:16<06:11,  1.80it/s]

Batch Loss:  0.0013538454674053356 Accuracy:  tensor(0.9997, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:54<01:33,  1.79it/s]

Batch Loss:  0.001329106022232736 Accuracy:  tensor(0.9997, device='cuda:0')


100%|██████████| 1667/1667 [15:27<00:00,  1.80it/s]
100%|██████████| 417/417 [01:16<00:00,  5.43it/s]


Epoch: 13 - valid Loss: 0.633508 - valid_acc : 0.908573
2.1861109108858526e-07


 30%|██▉       | 500/1667 [04:38<10:50,  1.79it/s]

Batch Loss:  0.0010340892757521943 Accuracy:  tensor(0.9998, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:16<06:12,  1.79it/s]

Batch Loss:  0.0007702508129223133 Accuracy:  tensor(0.9999, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.0006245667173837622 Accuracy:  tensor(0.9999, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.80it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 14 - valid Loss: 0.635514 - valid_acc : 0.908773
0.0


 30%|██▉       | 500/1667 [04:38<10:50,  1.79it/s]

Batch Loss:  0.18245358183979987 Accuracy:  tensor(0.9527, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.17563869568891824 Accuracy:  tensor(0.9527, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.16857708060058454 Accuracy:  tensor(0.9532, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 0 - valid Loss: 0.036073 - valid_acc : 0.993006
1.978269358317465e-05
model save, model val acc :  0.9930055955235811
best_models size :  6


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.09951113776816055 Accuracy:  tensor(0.9738, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:11,  1.79it/s]

Batch Loss:  0.10243729341542349 Accuracy:  tensor(0.9722, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.1106179235521704 Accuracy:  tensor(0.9697, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.40it/s]


Epoch: 1 - valid Loss: 0.066122 - valid_acc : 0.977618
1.913766807666949e-05


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.06908212940930389 Accuracy:  tensor(0.9808, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.07055524801986758 Accuracy:  tensor(0.9797, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.07587540784330728 Accuracy:  tensor(0.9783, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 2 - valid Loss: 0.039836 - valid_acc : 0.987110
1.8093123452515122e-05


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.04436799634178169 Accuracy:  tensor(0.9875, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:11,  1.79it/s]

Batch Loss:  0.04981268663826632 Accuracy:  tensor(0.9857, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.054737440828427984 Accuracy:  tensor(0.9844, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 3 - valid Loss: 0.054446 - valid_acc : 0.983513
1.6694729518706965e-05
model save, model val acc :  0.9835131894484412
best_models size :  7


 30%|██▉       | 500/1667 [04:38<10:52,  1.79it/s]

Batch Loss:  0.047969752885517664 Accuracy:  tensor(0.9850, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.04795510912215104 Accuracy:  tensor(0.9862, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.04851346544793341 Accuracy:  tensor(0.9862, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 4 - valid Loss: 0.074756 - valid_acc : 0.978217
1.500362715997331e-05


 30%|██▉       | 500/1667 [04:38<10:53,  1.79it/s]

Batch Loss:  0.028835663808567915 Accuracy:  tensor(0.9927, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.031658431114396084 Accuracy:  tensor(0.9915, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.03231242403271608 Accuracy:  tensor(0.9908, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 5 - valid Loss: 0.065304 - valid_acc : 0.979816
1.3093755122671206e-05


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.020058601241849827 Accuracy:  tensor(0.9952, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:13,  1.78it/s]

Batch Loss:  0.021167726621351902 Accuracy:  tensor(0.9938, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.022281832727176758 Accuracy:  tensor(0.9934, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 6 - valid Loss: 0.086868 - valid_acc : 0.977218
1.1048617248783491e-05
model save, model val acc :  0.9772182254196643
best_models size :  8


 30%|██▉       | 500/1667 [04:39<10:51,  1.79it/s]

Batch Loss:  0.019380151533987373 Accuracy:  tensor(0.9955, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.020998339886791655 Accuracy:  tensor(0.9948, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.022912247419871468 Accuracy:  tensor(0.9944, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 7 - valid Loss: 0.068952 - valid_acc : 0.982214
8.957631502717603e-06


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.010047125337034231 Accuracy:  tensor(0.9975, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:13,  1.79it/s]

Batch Loss:  0.015865988251185625 Accuracy:  tensor(0.9953, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.01478719466367814 Accuracy:  tensor(0.9958, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 8 - valid Loss: 0.084995 - valid_acc : 0.979416
6.91222041955317e-06


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.0098214849671931 Accuracy:  tensor(0.9970, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.011835256524209399 Accuracy:  tensor(0.9969, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.010099229394822032 Accuracy:  tensor(0.9973, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 9 - valid Loss: 0.062322 - valid_acc : 0.985612
5.001813909013246e-06
model save, model val acc :  0.9856115107913669
best_models size :  9


 30%|██▉       | 500/1667 [04:38<10:52,  1.79it/s]

Batch Loss:  0.002263976142377942 Accuracy:  tensor(0.9995, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.004307570863042201 Accuracy:  tensor(0.9989, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.005326790460031285 Accuracy:  tensor(0.9988, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 10 - valid Loss: 0.069871 - valid_acc : 0.985811
3.309939181436199e-06


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.002466863144436502 Accuracy:  tensor(0.9995, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.003812008208293264 Accuracy:  tensor(0.9992, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.0031809679964038273 Accuracy:  tensor(0.9993, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 11 - valid Loss: 0.078019 - valid_acc : 0.982814
1.9105687528581995e-06


 30%|██▉       | 500/1667 [04:39<10:51,  1.79it/s]

Batch Loss:  0.001713585180856171 Accuracy:  tensor(0.9997, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.001438984709500801 Accuracy:  tensor(0.9997, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.0018733016763580962 Accuracy:  tensor(0.9995, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 12 - valid Loss: 0.073529 - valid_acc : 0.983413
8.648862025249527e-07
model save, model val acc :  0.9834132693844924
best_models size :  10


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.001507682607643801 Accuracy:  tensor(0.9995, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.0015209444438842184 Accuracy:  tensor(0.9996, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.001146595078769072 Accuracy:  tensor(0.9997, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 13 - valid Loss: 0.072584 - valid_acc : 0.985012
2.1861109108858526e-07


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.0005594258510172949 Accuracy:  tensor(0.9998, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.0005278743921189743 Accuracy:  tensor(0.9998, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.001166299417688909 Accuracy:  tensor(0.9998, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 14 - valid Loss: 0.073127 - valid_acc : 0.984612
0.0


 30%|██▉       | 500/1667 [04:38<10:52,  1.79it/s]

Batch Loss:  0.11202053240919485 Accuracy:  tensor(0.9742, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.09511046201176941 Accuracy:  tensor(0.9760, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.08646566950335788 Accuracy:  tensor(0.9773, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 0 - valid Loss: 0.048073 - valid_acc : 0.987410
1.978269358317465e-05
model save, model val acc :  0.987410071942446
best_models size :  11


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.09261936854384839 Accuracy:  tensor(0.9725, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:13,  1.79it/s]

Batch Loss:  0.0728360999173019 Accuracy:  tensor(0.9782, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.06987252826651093 Accuracy:  tensor(0.9800, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 1 - valid Loss: 0.021253 - valid_acc : 0.994105
1.913766807666949e-05


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.05287433857982978 Accuracy:  tensor(0.9853, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.0529159262109315 Accuracy:  tensor(0.9845, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.05374922911318329 Accuracy:  tensor(0.9851, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 2 - valid Loss: 0.069835 - valid_acc : 0.981015
1.8093123452515122e-05


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.05644238537014462 Accuracy:  tensor(0.9838, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.050214000435255 Accuracy:  tensor(0.9857, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.04915787917080646 Accuracy:  tensor(0.9862, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 3 - valid Loss: 0.065505 - valid_acc : 0.981015
1.6694729518706965e-05
model save, model val acc :  0.9810151878497202
best_models size :  12


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.0381775827340316 Accuracy:  tensor(0.9898, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.037253714667545866 Accuracy:  tensor(0.9906, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.03935153141415988 Accuracy:  tensor(0.9896, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 4 - valid Loss: 0.050147 - valid_acc : 0.986011
1.500362715997331e-05


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.028186237939022248 Accuracy:  tensor(0.9917, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.025187551738490584 Accuracy:  tensor(0.9928, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.02542870791935517 Accuracy:  tensor(0.9923, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 5 - valid Loss: 0.032487 - valid_acc : 0.990008
1.3093755122671206e-05


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.023297443233197556 Accuracy:  tensor(0.9928, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.025440114101118523 Accuracy:  tensor(0.9925, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.025009827258831743 Accuracy:  tensor(0.9926, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 6 - valid Loss: 0.030775 - valid_acc : 0.991807
1.1048617248783491e-05
model save, model val acc :  0.9918065547561952
best_models size :  13


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.01330717210311559 Accuracy:  tensor(0.9963, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.014394703309473698 Accuracy:  tensor(0.9963, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.0158740494857484 Accuracy:  tensor(0.9956, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 7 - valid Loss: 0.043691 - valid_acc : 0.989209
8.957631502717603e-06


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.009804118393993122 Accuracy:  tensor(0.9970, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.00990178513724095 Accuracy:  tensor(0.9971, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.00983561364460426 Accuracy:  tensor(0.9972, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 8 - valid Loss: 0.027513 - valid_acc : 0.994205
6.91222041955317e-06


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.007642147902122815 Accuracy:  tensor(0.9982, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.004676117642375175 Accuracy:  tensor(0.9989, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.004897037124343721 Accuracy:  tensor(0.9986, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 9 - valid Loss: 0.026509 - valid_acc : 0.993605
5.001813909013246e-06
model save, model val acc :  0.9936051159072743
best_models size :  14


 30%|██▉       | 500/1667 [04:38<10:50,  1.79it/s]

Batch Loss:  0.0028809475833768373 Accuracy:  tensor(0.9993, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:13,  1.79it/s]

Batch Loss:  0.006290029961914115 Accuracy:  tensor(0.9985, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.004563975736518235 Accuracy:  tensor(0.9989, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 10 - valid Loss: 0.029507 - valid_acc : 0.994205
3.309939181436199e-06


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.0009189406694531499 Accuracy:  tensor(0.9997, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.0007925495086819864 Accuracy:  tensor(0.9997, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.0008622873229263253 Accuracy:  tensor(0.9996, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.40it/s]


Epoch: 11 - valid Loss: 0.026712 - valid_acc : 0.995604
1.9105687528581995e-06


 30%|██▉       | 500/1667 [04:38<10:49,  1.80it/s]

Batch Loss:  0.0011664388231256452 Accuracy:  tensor(0.9998, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:11,  1.79it/s]

Batch Loss:  0.0012953309770764464 Accuracy:  tensor(0.9997, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.001005702522105518 Accuracy:  tensor(0.9998, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.80it/s]
100%|██████████| 417/417 [01:17<00:00,  5.41it/s]


Epoch: 12 - valid Loss: 0.027786 - valid_acc : 0.995404
8.648862025249527e-07
model save, model val acc :  0.9954036770583534
best_models size :  15


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.0020911237901273124 Accuracy:  tensor(0.9997, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:16<06:11,  1.79it/s]

Batch Loss:  0.001973874115155013 Accuracy:  tensor(0.9995, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.0016880320421890549 Accuracy:  tensor(0.9996, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.80it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 13 - valid Loss: 0.025743 - valid_acc : 0.994804
2.1861109108858526e-07


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.00024068433678257862 Accuracy:  tensor(1., device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.00043860354316166194 Accuracy:  tensor(0.9999, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.0008119598369557934 Accuracy:  tensor(0.9998, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 14 - valid Loss: 0.025635 - valid_acc : 0.995004
0.0


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.09798711074260064 Accuracy:  tensor(0.9777, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.0876061295174295 Accuracy:  tensor(0.9778, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.08641652288800106 Accuracy:  tensor(0.9772, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 0 - valid Loss: 0.013547 - valid_acc : 0.996003
1.978269358317465e-05
model save, model val acc :  0.9960031974420464
best_models size :  16


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.0419403660891112 Accuracy:  tensor(0.9865, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.044215581665863285 Accuracy:  tensor(0.9868, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.047598885028079775 Accuracy:  tensor(0.9862, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 1 - valid Loss: 0.022806 - valid_acc : 0.994205
1.913766807666949e-05


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.0466583854847122 Accuracy:  tensor(0.9850, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.04636764623955241 Accuracy:  tensor(0.9855, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.049529404484763895 Accuracy:  tensor(0.9848, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 2 - valid Loss: 0.041059 - valid_acc : 0.987610
1.8093123452515122e-05


 30%|██▉       | 500/1667 [04:38<10:52,  1.79it/s]

Batch Loss:  0.03491831720143091 Accuracy:  tensor(0.9910, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.03593751906768011 Accuracy:  tensor(0.9897, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.03834894079906129 Accuracy:  tensor(0.9891, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 3 - valid Loss: 0.022053 - valid_acc : 0.993205
1.6694729518706965e-05
model save, model val acc :  0.9932054356514787
best_models size :  17


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.030569187887129375 Accuracy:  tensor(0.9907, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.03360077651201573 Accuracy:  tensor(0.9903, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.03133464945455974 Accuracy:  tensor(0.9911, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 4 - valid Loss: 0.028144 - valid_acc : 0.991807
1.500362715997331e-05


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.042558695145475216 Accuracy:  tensor(0.9878, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.03543463729854557 Accuracy:  tensor(0.9898, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.03253767482594897 Accuracy:  tensor(0.9906, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 5 - valid Loss: 0.023620 - valid_acc : 0.994404
1.3093755122671206e-05


 30%|██▉       | 500/1667 [04:39<10:51,  1.79it/s]

Batch Loss:  0.012916057358903344 Accuracy:  tensor(0.9973, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.019592432805715362 Accuracy:  tensor(0.9947, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.018073241665251163 Accuracy:  tensor(0.9953, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 6 - valid Loss: 0.024572 - valid_acc : 0.994804
1.1048617248783491e-05
model save, model val acc :  0.9948041566746603
best_models size :  18


 30%|██▉       | 500/1667 [04:39<10:53,  1.79it/s]

Batch Loss:  0.007194323435076512 Accuracy:  tensor(0.9980, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.010669915274760569 Accuracy:  tensor(0.9972, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.011378478608157215 Accuracy:  tensor(0.9972, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 7 - valid Loss: 0.032227 - valid_acc : 0.992806
8.957631502717603e-06


 30%|██▉       | 500/1667 [04:39<10:51,  1.79it/s]

Batch Loss:  0.011284621144732228 Accuracy:  tensor(0.9967, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:13,  1.79it/s]

Batch Loss:  0.009585854182209004 Accuracy:  tensor(0.9972, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.00969427862726055 Accuracy:  tensor(0.9972, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 8 - valid Loss: 0.042885 - valid_acc : 0.990807
6.91222041955317e-06


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.010669288719567703 Accuracy:  tensor(0.9975, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.009214835501043126 Accuracy:  tensor(0.9976, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.009863363168105328 Accuracy:  tensor(0.9977, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 9 - valid Loss: 0.026138 - valid_acc : 0.993805
5.001813909013246e-06
model save, model val acc :  0.9938049560351718
best_models size :  19


 30%|██▉       | 500/1667 [04:39<10:53,  1.79it/s]

Batch Loss:  0.007012516175818746 Accuracy:  tensor(0.9982, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.005724100553692551 Accuracy:  tensor(0.9984, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.0055412135002309995 Accuracy:  tensor(0.9986, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 10 - valid Loss: 0.025016 - valid_acc : 0.994205
3.309939181436199e-06


 30%|██▉       | 500/1667 [04:39<10:51,  1.79it/s]

Batch Loss:  0.003055879601655761 Accuracy:  tensor(0.9993, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:13,  1.79it/s]

Batch Loss:  0.0024079426745083764 Accuracy:  tensor(0.9996, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.002427355125032288 Accuracy:  tensor(0.9994, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 11 - valid Loss: 0.023463 - valid_acc : 0.995204
1.9105687528581995e-06


 30%|██▉       | 500/1667 [04:39<10:51,  1.79it/s]

Batch Loss:  0.0007503787527311942 Accuracy:  tensor(0.9998, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.0012745623545088165 Accuracy:  tensor(0.9997, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.0010137453177570326 Accuracy:  tensor(0.9998, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 12 - valid Loss: 0.025067 - valid_acc : 0.995604
8.648862025249527e-07
model save, model val acc :  0.995603517186251
best_models size :  20


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.002066898849836434 Accuracy:  tensor(0.9995, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.0012557459947202005 Accuracy:  tensor(0.9998, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.0012858992081589047 Accuracy:  tensor(0.9997, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 13 - valid Loss: 0.025061 - valid_acc : 0.995803
2.1861109108858526e-07


 30%|██▉       | 500/1667 [04:39<10:51,  1.79it/s]

Batch Loss:  0.0005892397887291736 Accuracy:  tensor(0.9998, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.0014387436962879291 Accuracy:  tensor(0.9996, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.0012863951655793548 Accuracy:  tensor(0.9996, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 14 - valid Loss: 0.025188 - valid_acc : 0.996003
0.0


 30%|██▉       | 500/1667 [04:39<10:51,  1.79it/s]

Batch Loss:  0.10167889298172668 Accuracy:  tensor(0.9768, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.08368684410909191 Accuracy:  tensor(0.9796, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.06907636900335395 Accuracy:  tensor(0.9826, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 0 - valid Loss: 0.041024 - valid_acc : 0.987810
1.978269358317465e-05
model save, model val acc :  0.9878097521982413
best_models size :  21


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.05599273914156947 Accuracy:  tensor(0.9840, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.05258230571210151 Accuracy:  tensor(0.9859, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.04631219951317568 Accuracy:  tensor(0.9873, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 1 - valid Loss: 0.015953 - valid_acc : 0.995004
1.913766807666949e-05


 30%|██▉       | 500/1667 [04:39<10:51,  1.79it/s]

Batch Loss:  0.030942543455865234 Accuracy:  tensor(0.9922, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.0349568651515292 Accuracy:  tensor(0.9917, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.03854834093357203 Accuracy:  tensor(0.9901, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 2 - valid Loss: 0.012515 - valid_acc : 0.996803
1.8093123452515122e-05


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.03614470838918351 Accuracy:  tensor(0.9907, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.04366836675017839 Accuracy:  tensor(0.9877, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.04286417538141055 Accuracy:  tensor(0.9877, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 3 - valid Loss: 0.015436 - valid_acc : 0.996603
1.6694729518706965e-05
model save, model val acc :  0.9966027178257394
best_models size :  22


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.040272688589786415 Accuracy:  tensor(0.9895, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.0337669373111421 Accuracy:  tensor(0.9914, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.032251239682382825 Accuracy:  tensor(0.9916, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.38it/s]


Epoch: 4 - valid Loss: 0.016714 - valid_acc : 0.994861
1.500362715997331e-05


 30%|██▉       | 500/1667 [04:39<10:53,  1.79it/s]

Batch Loss:  0.027220913709839804 Accuracy:  tensor(0.9937, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.02873169143398991 Accuracy:  tensor(0.9929, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.025562892219759912 Accuracy:  tensor(0.9935, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 5 - valid Loss: 0.016231 - valid_acc : 0.994604
1.3093755122671206e-05


 30%|██▉       | 500/1667 [04:39<10:52,  1.79it/s]

Batch Loss:  0.008509161015637801 Accuracy:  tensor(0.9972, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:18<06:12,  1.79it/s]

Batch Loss:  0.018312241436789917 Accuracy:  tensor(0.9945, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:57<01:33,  1.79it/s]

Batch Loss:  0.019123337495029168 Accuracy:  tensor(0.9945, device='cuda:0')


100%|██████████| 1667/1667 [15:30<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.39it/s]


Epoch: 6 - valid Loss: 0.013722 - valid_acc : 0.995604
1.1048617248783491e-05
model save, model val acc :  0.995603517186251
best_models size :  23


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.012888691210901016 Accuracy:  tensor(0.9963, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:11,  1.79it/s]

Batch Loss:  0.013461059492008645 Accuracy:  tensor(0.9961, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.012594148132072103 Accuracy:  tensor(0.9967, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.41it/s]


Epoch: 7 - valid Loss: 0.016514 - valid_acc : 0.997202
8.957631502717603e-06


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.010451312467077515 Accuracy:  tensor(0.9958, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:11,  1.79it/s]

Batch Loss:  0.011224267742560187 Accuracy:  tensor(0.9962, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.010605651746562217 Accuracy:  tensor(0.9969, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.80it/s]
100%|██████████| 417/417 [01:17<00:00,  5.41it/s]


Epoch: 8 - valid Loss: 0.009135 - valid_acc : 0.996803
6.91222041955317e-06


 30%|██▉       | 500/1667 [04:38<10:50,  1.79it/s]

Batch Loss:  0.0075440556064422706 Accuracy:  tensor(0.9985, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.006178565314912703 Accuracy:  tensor(0.9985, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:32,  1.80it/s]

Batch Loss:  0.006169767875044878 Accuracy:  tensor(0.9986, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.41it/s]


Epoch: 9 - valid Loss: 0.007021 - valid_acc : 0.998201
5.001813909013246e-06
model save, model val acc :  0.9982014388489209
best_models size :  24


 30%|██▉       | 500/1667 [04:38<10:50,  1.79it/s]

Batch Loss:  0.006237055938108824 Accuracy:  tensor(0.9987, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.00469598451546699 Accuracy:  tensor(0.9989, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.004333142660490315 Accuracy:  tensor(0.9990, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.40it/s]


Epoch: 10 - valid Loss: 0.007687 - valid_acc : 0.997402
3.309939181436199e-06


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.0032741082666689183 Accuracy:  tensor(0.9992, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:11,  1.79it/s]

Batch Loss:  0.003409036817472952 Accuracy:  tensor(0.9992, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.00316807723340753 Accuracy:  tensor(0.9993, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.79it/s]
100%|██████████| 417/417 [01:16<00:00,  5.42it/s]


Epoch: 11 - valid Loss: 0.004402 - valid_acc : 0.997802
1.9105687528581995e-06


 30%|██▉       | 500/1667 [04:38<10:51,  1.79it/s]

Batch Loss:  0.002680709287425998 Accuracy:  tensor(0.9993, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.002283078806422054 Accuracy:  tensor(0.9994, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.0017268269836216252 Accuracy:  tensor(0.9996, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.41it/s]


Epoch: 12 - valid Loss: 0.003589 - valid_acc : 0.998002
8.648862025249527e-07
model save, model val acc :  0.9980015987210231
best_models size :  25


 30%|██▉       | 500/1667 [04:38<10:50,  1.79it/s]

Batch Loss:  0.0019199855608057986 Accuracy:  tensor(0.9997, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.0011453694068968617 Accuracy:  tensor(0.9998, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:55<01:33,  1.79it/s]

Batch Loss:  0.0008587540164116945 Accuracy:  tensor(0.9998, device='cuda:0')


100%|██████████| 1667/1667 [15:28<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.41it/s]


Epoch: 13 - valid Loss: 0.002898 - valid_acc : 0.998801
2.1861109108858526e-07


 30%|██▉       | 500/1667 [04:38<10:50,  1.79it/s]

Batch Loss:  0.0017676565324436524 Accuracy:  tensor(0.9998, device='cuda:0')


 60%|█████▉    | 1000/1667 [09:17<06:12,  1.79it/s]

Batch Loss:  0.0011603254877663857 Accuracy:  tensor(0.9998, device='cuda:0')


 90%|████████▉ | 1500/1667 [13:56<01:33,  1.79it/s]

Batch Loss:  0.00092947426380609 Accuracy:  tensor(0.9998, device='cuda:0')


100%|██████████| 1667/1667 [15:29<00:00,  1.79it/s]
100%|██████████| 417/417 [01:17<00:00,  5.40it/s]

Epoch: 14 - valid Loss: 0.002886 - valid_acc : 0.998801
0.0



