# 2-7 学習と検証の実施

## プログラムに実装
前節までに実装した内容を用いて学習プログラムを実装する．
流れは次の通り．

1. DataLoader の実装
1. ネットワークモデルの作成
1. 損失関数の定義
1. 最適化手法の設定
1. 学習・検証の実施

## DataLoader
2.2，2.3節の実装を用いる．

In [1]:
import time
import pandas as pd
import torch
import torch.utils.data as data
from torch import nn
from utils.dataloader import od_collate_fn, make_VOC_dataloader
from utils.dataset import make_datapath_list, VOCDataset, DataTransform, Anno_xml2list
from utils.forward import SSD
from utils.loss import MultiBoxLoss

In [2]:
# ファイルパスのリストを作成
rootpath = "./data/VOCdevkit/VOC2012/"
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(rootpath)

# Dataset を作成
voc_classes = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair','cow', 'diningtable', 
               'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
color_mean = (104, 117, 123)  # BGR 色平均
input_size = 300              # 画像の入力サイズは 300x300

train_dataset = VOCDataset(train_img_list, train_anno_list, phase="train",
                           transform=DataTransform(input_size, color_mean),
                           transform_anno=Anno_xml2list(voc_classes))
val_dataset = VOCDataset(val_img_list, val_anno_list, phase="val",
                           transform=DataTransform(input_size, color_mean),
                           transform_anno=Anno_xml2list(voc_classes))

# データローダの作成
batch_size = 8
train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=od_collate_fn)
val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, collate_fn=od_collate_fn)

# 辞書型のオブジェクトにまとめる
dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

## ネットワークモデルの作成
2.4節で実装したネットワークを用いる．
vgg モジュールの初期値には ImageNet で事前に学習した結合パラメータを用いる．
ここでは，weights/vgg16_reducedfc.pth にダウンロード済みのものを用いる．
それ以外のモジュールの初期値は He の初期値を用いる．
He の初期値は，入力チャネル数 input_n に対して結合パラメータの初期値に，平均0，分散 2/input_n のガウス分布に従う乱数を使用する．

In [3]:
# SSD300 の設定
ssd_cfg = {
    'num_classes': 21, # 背景クラスを含めた合計クラス数
    'input_size': 300, # 画像の入力サイズ
    'bbox_aspect_num': [4, 6, 6, 6, 4, 4],      # 出力するデフォルトボックスのアスペクト比の種類
    'feature_maps': [38, 19, 10, 5, 3, 1],      # 各 source の画像サイズ
    'steps': [8, 16, 32, 64, 100, 300],         # デフォルトボックスの大きさを決める
    'min_sizes': [30, 60, 111, 162, 213, 264],  # デフォルトボックスの大きさを決める
    'max_sizes': [60, 111, 162, 213, 264, 315], # デフォルトボックスの大きさを決める
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
}

# SSD ネットワークモデル
net = SSD(phase="train", cfg=ssd_cfg)

# SSD の結合パラメータの初期値を設定
# vgg モジュールの初期値をロード
vgg_weights = torch.load("./weights/vgg16_reducedfc.pth")
net.vgg.load_state_dict(vgg_weights)

# SSD のその他のネットワークの重みは He の初期値で初期化
def weights_init(m):
    if isinstance(m, nn.Conv2d):
        nn.init.kaiming_normal_(m.weight.data)
        if m.bias is not None:  # バイアス項がある場合
            nn.init.constant_(m.bias, 0.0)
            
# He の初期値を適用
net.extras.apply(weights_init)
net.loc.apply(weights_init)
net.conf.apply(weights_init)

# GPU が使えるかを確認
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("使用デバイス：", device)
print("ネットワーク設定完了：学習済みの重みをロードしました")

使用デバイス： cuda:0
ネットワーク設定完了：学習済みの重みをロードしました


## 損失関数と最適化手法の設定

In [4]:
# 損失関数の設定
criterion = MultiBoxLoss(jaccard_threshold=0.5, neg_pos=3, device=device)
# 最適化手法の設定
optimizer = torch.optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)

## 学習と検証の実施
学習と検証を行う関数を定義する．
10 epoch ごとに検証を行い，各 epoch ごとに学習と検証の loss の値を log_output.csv に保存する．
ネットワークの結合パラメータも 10 epoch ごとに保存する．

In [5]:
# モデルを学習させる関数を作成
def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):
    # GPU が使えるか確認
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("使用デバイス：", device)
    
    # ネットワークを GPU へ
    net.to(device)
    
    # ネットワークがある程度固定であれば高速化
    torch.backends.cudnn.benchmark = True
    
    # イテレーションのカウンタをセット
    iteration = 1
    epoch_train_loss = 0.0  # 学習時の epoch の損失和
    epoch_val_loss = 0.0    # 検証時の epoch の損失和
    logs = []
    
    # epoch のグループ
    for epoch in range(num_epochs + 1):
        t_epoch_start = time.time()
        t_iter_start = time.time()
        
        print("-------------------")
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print("-------------------")
        
        # epoch ごとの訓練と検証のループ
        for phase in ["train", "val"]:
            if phase == "train":
                net.train()  # モデルを訓練モードに設定
                print("-------------------")
                print(" (train) ")
            else:
                if (epoch + 1) % 10 == 0:
                    net.eval()  # モデルを検証モードに設定
                    print("-------------------")
                    print(" (val) ")
                else:
                    continue
            
            # データローダから mini batch サイズずつ取り出すループ
            for images, targets in dataloaders_dict[phase]:
                # GPU が使用可能であれば GPU にデータを送る
                images = images.to(device)
                targets = [ann.to(device) for ann in targets] # リストの各要素のテンソルを GPU に送る
                
                # optimizer を初期化
                optimizer.zero_grad()
                
                # 学習の実行
                with torch.set_grad_enabled(phase == 'train'):
                    # 順伝播の計算
                    outputs = net(images)
                    
                    # 損失の計算
                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c
                    
                    # 訓練時には逆伝播も計算
                    if phase == "train":
                        loss.backward()
                        # 勾配が大きくなりすぎると学習が不安定になるため最大で2.0となるように制限
                        nn.utils.clip_grad_value_(net.parameters(), clip_value=2.0)
                        # パラメータを更新
                        optimizer.step()
                        # 10 iter に1度 loss を表示
                        if iteration % 10 == 0:
                            t_iter_finish =time.time()
                            duration = t_iter_finish - t_iter_start
                            print('iteration {} || Loss: {:.4f} || 10iter:{:.4f} sec.'.format(iteration, loss.item(), duration))
                            t_iter_start = time.time()
                            
                        epoch_train_loss += loss.item()
                        iteration += 1
                        
                    # 検証時
                    else:
                        epoch_val_loss += loss.item()
                        
        # epoch の phase ごとの loss と正解率
        t_epoch_finish = time.time()
        print("-------------------")
        print('epoch {} || Epoch_TRAIN_Loss:{:.4f} || Epoch_VAL_Loss:{:.4f}'.format(epoch + 1, epoch_train_loss, epoch_val_loss))
        t_epoch_start = time.time()
        
        # ログの保存
        log_epoch = {"epoch": epoch + 1, "train_loss": epoch_train_loss, "val_loss": epoch_val_loss}
        logs.append(log_epoch)
        df = pd.DataFrame(logs)
        df.to_csv("log_output.csv")
        
        # 損失値のリセット
        epoch_train_loss = 0.0
        epoch_val_loss = 0.0
        
        # 10 epoch ごとにネットワークを保存する
        if (epoch + 1) % 10 == 0:
            torch.save(net.state_dict(), 'weights/ssd300_' + str(epoch+1) + '.pth')

In [6]:
num_epochs = 50
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

使用デバイス： cuda:0
-------------------
Epoch 1/50
-------------------
-------------------
 (train) 
iteration 10 || Loss: 15.2293 || 10iter:11.3841 sec.
iteration 20 || Loss: 13.6413 || 10iter:7.8792 sec.
iteration 30 || Loss: 14.4254 || 10iter:7.7383 sec.
iteration 40 || Loss: 9.7031 || 10iter:7.7602 sec.
iteration 50 || Loss: 11.6924 || 10iter:7.7592 sec.
iteration 60 || Loss: 10.1063 || 10iter:7.7779 sec.
iteration 70 || Loss: 10.8501 || 10iter:7.8928 sec.
iteration 80 || Loss: 7.9851 || 10iter:7.8083 sec.
iteration 90 || Loss: 9.8848 || 10iter:8.0706 sec.
iteration 100 || Loss: 9.9436 || 10iter:8.2000 sec.
iteration 110 || Loss: 8.4661 || 10iter:7.9685 sec.
iteration 120 || Loss: 8.2492 || 10iter:7.8529 sec.
iteration 130 || Loss: 9.4068 || 10iter:8.0069 sec.
iteration 140 || Loss: 7.9618 || 10iter:7.8789 sec.
iteration 150 || Loss: 9.2625 || 10iter:7.8460 sec.
iteration 160 || Loss: 10.8993 || 10iter:7.9483 sec.
iteration 170 || Loss: 8.4994 || 10iter:8.1447 sec.
iteration 180 || Loss

iteration 1490 || Loss: 6.5225 || 10iter:8.0989 sec.
iteration 1500 || Loss: 7.7073 || 10iter:7.9287 sec.
iteration 1510 || Loss: 7.0649 || 10iter:8.2696 sec.
iteration 1520 || Loss: 6.3736 || 10iter:8.0390 sec.
iteration 1530 || Loss: 6.4012 || 10iter:8.2679 sec.
iteration 1540 || Loss: 8.5400 || 10iter:8.0381 sec.
iteration 1550 || Loss: 8.2234 || 10iter:7.9244 sec.
iteration 1560 || Loss: 8.3384 || 10iter:7.8214 sec.
iteration 1570 || Loss: 8.7888 || 10iter:7.9256 sec.
iteration 1580 || Loss: 8.1114 || 10iter:7.8121 sec.
iteration 1590 || Loss: 6.9493 || 10iter:7.9816 sec.
iteration 1600 || Loss: 8.8552 || 10iter:7.8491 sec.
iteration 1610 || Loss: 6.4098 || 10iter:7.7929 sec.
iteration 1620 || Loss: 10.6343 || 10iter:7.8728 sec.
iteration 1630 || Loss: 9.3367 || 10iter:7.9640 sec.
iteration 1640 || Loss: 7.4395 || 10iter:7.9645 sec.
iteration 1650 || Loss: 9.3289 || 10iter:7.9667 sec.
iteration 1660 || Loss: 9.3559 || 10iter:7.9214 sec.
iteration 1670 || Loss: 9.8283 || 10iter:7.88

iteration 2970 || Loss: 6.7068 || 10iter:7.9372 sec.
iteration 2980 || Loss: 8.8598 || 10iter:8.0227 sec.
iteration 2990 || Loss: 8.4985 || 10iter:7.9660 sec.
iteration 3000 || Loss: 6.6875 || 10iter:7.9178 sec.
iteration 3010 || Loss: 7.6920 || 10iter:7.8602 sec.
iteration 3020 || Loss: 7.0075 || 10iter:7.8251 sec.
iteration 3030 || Loss: 9.7056 || 10iter:7.9332 sec.
iteration 3040 || Loss: 6.4420 || 10iter:7.8441 sec.
iteration 3050 || Loss: 8.9670 || 10iter:7.8265 sec.
iteration 3060 || Loss: 5.1964 || 10iter:7.8961 sec.
iteration 3070 || Loss: 9.4408 || 10iter:7.9173 sec.
iteration 3080 || Loss: 7.6450 || 10iter:7.8456 sec.
iteration 3090 || Loss: 10.1844 || 10iter:7.8211 sec.
iteration 3100 || Loss: 6.3175 || 10iter:7.8938 sec.
iteration 3110 || Loss: 9.1922 || 10iter:7.8135 sec.
iteration 3120 || Loss: 7.8163 || 10iter:7.8980 sec.
iteration 3130 || Loss: 6.9840 || 10iter:7.7785 sec.
iteration 3140 || Loss: 6.4152 || 10iter:7.8841 sec.
iteration 3150 || Loss: 7.1860 || 10iter:7.83

iteration 4460 || Loss: 6.0655 || 10iter:7.9440 sec.
iteration 4470 || Loss: 7.5246 || 10iter:7.9462 sec.
iteration 4480 || Loss: 7.0708 || 10iter:7.8630 sec.
iteration 4490 || Loss: 8.2861 || 10iter:7.9180 sec.
iteration 4500 || Loss: 8.7318 || 10iter:7.8754 sec.
iteration 4510 || Loss: 7.2961 || 10iter:7.7836 sec.
iteration 4520 || Loss: 7.0793 || 10iter:7.7525 sec.
iteration 4530 || Loss: 7.1745 || 10iter:7.8311 sec.
iteration 4540 || Loss: 6.6808 || 10iter:7.8763 sec.
iteration 4550 || Loss: 6.1350 || 10iter:7.8630 sec.
iteration 4560 || Loss: 6.9599 || 10iter:7.9714 sec.
iteration 4570 || Loss: 5.9796 || 10iter:7.8952 sec.
iteration 4580 || Loss: 6.0352 || 10iter:7.8581 sec.
iteration 4590 || Loss: 6.7193 || 10iter:7.8452 sec.
iteration 4600 || Loss: 6.4083 || 10iter:7.9866 sec.
iteration 4610 || Loss: 6.5827 || 10iter:7.8003 sec.
iteration 4620 || Loss: 6.7247 || 10iter:7.8533 sec.
iteration 4630 || Loss: 6.0411 || 10iter:7.8736 sec.
iteration 4640 || Loss: 7.6450 || 10iter:7.902

iteration 5950 || Loss: 6.6091 || 10iter:7.8000 sec.
iteration 5960 || Loss: 6.3295 || 10iter:7.8808 sec.
iteration 5970 || Loss: 6.2572 || 10iter:7.9762 sec.
iteration 5980 || Loss: 5.5530 || 10iter:7.8583 sec.
iteration 5990 || Loss: 6.0298 || 10iter:7.8700 sec.
iteration 6000 || Loss: 7.1839 || 10iter:7.8505 sec.
iteration 6010 || Loss: 5.3397 || 10iter:7.8919 sec.
iteration 6020 || Loss: 7.2483 || 10iter:7.8508 sec.
iteration 6030 || Loss: 5.5321 || 10iter:7.8559 sec.
iteration 6040 || Loss: 6.4940 || 10iter:7.7316 sec.
iteration 6050 || Loss: 7.8727 || 10iter:7.8447 sec.
iteration 6060 || Loss: 5.4328 || 10iter:7.9060 sec.
iteration 6070 || Loss: 6.0718 || 10iter:7.7242 sec.
iteration 6080 || Loss: 6.2748 || 10iter:7.7754 sec.
iteration 6090 || Loss: 5.6395 || 10iter:7.8277 sec.
iteration 6100 || Loss: 5.1661 || 10iter:7.9305 sec.
iteration 6110 || Loss: 4.8291 || 10iter:7.8224 sec.
iteration 6120 || Loss: 5.2806 || 10iter:7.8562 sec.
iteration 6130 || Loss: 7.6413 || 10iter:7.820

iteration 7430 || Loss: 5.6898 || 10iter:7.8829 sec.
iteration 7440 || Loss: 5.9564 || 10iter:7.9191 sec.
iteration 7450 || Loss: 5.7991 || 10iter:7.8485 sec.
iteration 7460 || Loss: 5.6913 || 10iter:7.9787 sec.
iteration 7470 || Loss: 4.9518 || 10iter:7.8531 sec.
iteration 7480 || Loss: 5.5692 || 10iter:7.7846 sec.
iteration 7490 || Loss: 5.8146 || 10iter:7.8136 sec.
iteration 7500 || Loss: 7.2918 || 10iter:7.7594 sec.
iteration 7510 || Loss: 5.3724 || 10iter:7.9430 sec.
iteration 7520 || Loss: 5.8719 || 10iter:7.9501 sec.
iteration 7530 || Loss: 7.1959 || 10iter:7.9535 sec.
iteration 7540 || Loss: 6.2414 || 10iter:7.9816 sec.
iteration 7550 || Loss: 5.9936 || 10iter:7.9251 sec.
iteration 7560 || Loss: 5.7473 || 10iter:7.8961 sec.
iteration 7570 || Loss: 6.6906 || 10iter:7.8989 sec.
iteration 7580 || Loss: 6.4399 || 10iter:7.9877 sec.
iteration 7590 || Loss: 5.3701 || 10iter:7.9196 sec.
iteration 7600 || Loss: 6.8769 || 10iter:7.8842 sec.
iteration 7610 || Loss: 5.3424 || 10iter:7.939

iteration 8920 || Loss: 4.7686 || 10iter:7.8349 sec.
iteration 8930 || Loss: 5.8658 || 10iter:7.9068 sec.
iteration 8940 || Loss: 5.8775 || 10iter:7.8379 sec.
iteration 8950 || Loss: 6.3655 || 10iter:7.8975 sec.
iteration 8960 || Loss: 4.3361 || 10iter:7.9611 sec.
iteration 8970 || Loss: 6.7494 || 10iter:8.0214 sec.
iteration 8980 || Loss: 6.0154 || 10iter:7.9646 sec.
iteration 8990 || Loss: 7.0313 || 10iter:7.7731 sec.
iteration 9000 || Loss: 4.8700 || 10iter:7.8002 sec.
iteration 9010 || Loss: 6.7328 || 10iter:7.9028 sec.
iteration 9020 || Loss: 6.2732 || 10iter:7.8599 sec.
iteration 9030 || Loss: 8.0023 || 10iter:7.8644 sec.
iteration 9040 || Loss: 5.7469 || 10iter:7.9703 sec.
iteration 9050 || Loss: 6.2861 || 10iter:7.8972 sec.
iteration 9060 || Loss: 6.0497 || 10iter:7.8365 sec.
iteration 9070 || Loss: 6.2982 || 10iter:7.7494 sec.
iteration 9080 || Loss: 6.1102 || 10iter:7.8248 sec.
iteration 9090 || Loss: 5.5262 || 10iter:7.9625 sec.
iteration 9100 || Loss: 6.0190 || 10iter:7.853

iteration 10400 || Loss: 5.8411 || 10iter:7.9178 sec.
iteration 10410 || Loss: 6.0003 || 10iter:7.7887 sec.
iteration 10420 || Loss: 5.6101 || 10iter:7.9565 sec.
iteration 10430 || Loss: 5.3410 || 10iter:7.8057 sec.
iteration 10440 || Loss: 4.5810 || 10iter:7.8737 sec.
iteration 10450 || Loss: 6.1474 || 10iter:7.8924 sec.
iteration 10460 || Loss: 4.7376 || 10iter:7.9575 sec.
iteration 10470 || Loss: 7.8178 || 10iter:7.8351 sec.
iteration 10480 || Loss: 5.6750 || 10iter:7.8086 sec.
iteration 10490 || Loss: 5.2591 || 10iter:7.7707 sec.
iteration 10500 || Loss: 5.0169 || 10iter:7.8794 sec.
iteration 10510 || Loss: 4.8718 || 10iter:7.8127 sec.
iteration 10520 || Loss: 4.7549 || 10iter:8.0444 sec.
iteration 10530 || Loss: 5.6662 || 10iter:7.9557 sec.
iteration 10540 || Loss: 5.0258 || 10iter:7.9056 sec.
iteration 10550 || Loss: 4.6254 || 10iter:7.9490 sec.
iteration 10560 || Loss: 5.9840 || 10iter:7.8090 sec.
iteration 10570 || Loss: 4.4983 || 10iter:7.9177 sec.
iteration 10580 || Loss: 5.9

iteration 11860 || Loss: 4.4885 || 10iter:7.8595 sec.
iteration 11870 || Loss: 4.7477 || 10iter:7.8501 sec.
iteration 11880 || Loss: 5.4381 || 10iter:7.8099 sec.
iteration 11890 || Loss: 6.0897 || 10iter:7.8986 sec.
iteration 11900 || Loss: 6.1897 || 10iter:7.9715 sec.
iteration 11910 || Loss: 5.0142 || 10iter:7.8489 sec.
iteration 11920 || Loss: 5.9279 || 10iter:7.8709 sec.
iteration 11930 || Loss: 4.3580 || 10iter:7.8565 sec.
iteration 11940 || Loss: 4.4612 || 10iter:7.7943 sec.
iteration 11950 || Loss: 5.4573 || 10iter:7.8569 sec.
iteration 11960 || Loss: 4.6470 || 10iter:7.8032 sec.
iteration 11970 || Loss: 5.6565 || 10iter:7.7712 sec.
iteration 11980 || Loss: 5.2328 || 10iter:7.8011 sec.
iteration 11990 || Loss: 5.0861 || 10iter:7.8456 sec.
iteration 12000 || Loss: 4.7411 || 10iter:7.8671 sec.
iteration 12010 || Loss: 6.5140 || 10iter:7.9722 sec.
iteration 12020 || Loss: 5.2929 || 10iter:7.8908 sec.
iteration 12030 || Loss: 4.8673 || 10iter:7.7895 sec.
iteration 12040 || Loss: 5.9

iteration 13320 || Loss: 4.9781 || 10iter:7.7922 sec.
iteration 13330 || Loss: 5.5015 || 10iter:7.9145 sec.
iteration 13340 || Loss: 4.8557 || 10iter:7.8427 sec.
iteration 13350 || Loss: 4.9001 || 10iter:7.8039 sec.
iteration 13360 || Loss: 4.9284 || 10iter:7.8640 sec.
iteration 13370 || Loss: 4.4625 || 10iter:7.8699 sec.
iteration 13380 || Loss: 3.7474 || 10iter:7.8665 sec.
iteration 13390 || Loss: 5.9021 || 10iter:7.9558 sec.
iteration 13400 || Loss: 4.7497 || 10iter:7.8794 sec.
iteration 13410 || Loss: 5.2627 || 10iter:7.8581 sec.
iteration 13420 || Loss: 4.5542 || 10iter:7.8679 sec.
iteration 13430 || Loss: 5.7732 || 10iter:7.8352 sec.
iteration 13440 || Loss: 6.1476 || 10iter:7.8831 sec.
iteration 13450 || Loss: 5.4821 || 10iter:7.8712 sec.
iteration 13460 || Loss: 5.2343 || 10iter:7.8048 sec.
iteration 13470 || Loss: 6.7340 || 10iter:7.8782 sec.
iteration 13480 || Loss: 5.8285 || 10iter:7.9484 sec.
iteration 13490 || Loss: 5.4105 || 10iter:7.7711 sec.
iteration 13500 || Loss: 5.4

iteration 14770 || Loss: 5.7121 || 10iter:7.9196 sec.
iteration 14780 || Loss: 3.8943 || 10iter:7.9927 sec.
iteration 14790 || Loss: 6.0242 || 10iter:7.8002 sec.
iteration 14800 || Loss: 5.4931 || 10iter:7.8852 sec.
iteration 14810 || Loss: 4.4432 || 10iter:7.9561 sec.
iteration 14820 || Loss: 4.3878 || 10iter:7.9111 sec.
iteration 14830 || Loss: 4.9275 || 10iter:7.8214 sec.
iteration 14840 || Loss: 4.9526 || 10iter:7.8339 sec.
iteration 14850 || Loss: 4.8886 || 10iter:7.8395 sec.
iteration 14860 || Loss: 4.7943 || 10iter:7.9790 sec.
iteration 14870 || Loss: 4.3321 || 10iter:7.8354 sec.
iteration 14880 || Loss: 4.0009 || 10iter:7.8012 sec.
iteration 14890 || Loss: 4.7364 || 10iter:7.9341 sec.
iteration 14900 || Loss: 4.6797 || 10iter:7.7974 sec.
iteration 14910 || Loss: 5.3834 || 10iter:7.7667 sec.
iteration 14920 || Loss: 4.9057 || 10iter:7.7550 sec.
iteration 14930 || Loss: 5.6889 || 10iter:7.8778 sec.
iteration 14940 || Loss: 6.1298 || 10iter:7.9088 sec.
iteration 14950 || Loss: 5.2

iteration 16230 || Loss: 4.2264 || 10iter:7.9232 sec.
iteration 16240 || Loss: 4.4769 || 10iter:7.7439 sec.
iteration 16250 || Loss: 3.8010 || 10iter:7.8887 sec.
iteration 16260 || Loss: 5.5133 || 10iter:7.8481 sec.
iteration 16270 || Loss: 5.5987 || 10iter:7.8193 sec.
iteration 16280 || Loss: 6.3616 || 10iter:7.9672 sec.
iteration 16290 || Loss: 4.6808 || 10iter:7.9348 sec.
iteration 16300 || Loss: 4.3247 || 10iter:7.9721 sec.
iteration 16310 || Loss: 4.7898 || 10iter:7.9311 sec.
iteration 16320 || Loss: 5.7481 || 10iter:7.8188 sec.
iteration 16330 || Loss: 5.3180 || 10iter:7.9041 sec.
iteration 16340 || Loss: 4.1007 || 10iter:7.8497 sec.
iteration 16350 || Loss: 5.7248 || 10iter:7.8676 sec.
iteration 16360 || Loss: 5.3950 || 10iter:7.9119 sec.
iteration 16370 || Loss: 5.2011 || 10iter:7.9049 sec.
iteration 16380 || Loss: 5.6527 || 10iter:7.8707 sec.
iteration 16390 || Loss: 5.4762 || 10iter:7.7985 sec.
iteration 16400 || Loss: 4.3466 || 10iter:7.7845 sec.
iteration 16410 || Loss: 5.6

iteration 17690 || Loss: 3.6955 || 10iter:7.8888 sec.
iteration 17700 || Loss: 4.7683 || 10iter:7.9083 sec.
iteration 17710 || Loss: 4.8337 || 10iter:7.8078 sec.
iteration 17720 || Loss: 4.9308 || 10iter:7.8683 sec.
iteration 17730 || Loss: 4.7263 || 10iter:7.7567 sec.
iteration 17740 || Loss: 4.5197 || 10iter:7.9293 sec.
iteration 17750 || Loss: 4.6765 || 10iter:7.9570 sec.
iteration 17760 || Loss: 5.9615 || 10iter:7.8331 sec.
iteration 17770 || Loss: 4.4306 || 10iter:7.8449 sec.
iteration 17780 || Loss: 4.0522 || 10iter:7.9432 sec.
iteration 17790 || Loss: 3.8226 || 10iter:7.9179 sec.
iteration 17800 || Loss: 6.1673 || 10iter:7.7602 sec.
iteration 17810 || Loss: 4.6141 || 10iter:7.8332 sec.
iteration 17820 || Loss: 4.7176 || 10iter:7.8853 sec.
iteration 17830 || Loss: 5.4819 || 10iter:7.9315 sec.
iteration 17840 || Loss: 5.7478 || 10iter:7.8484 sec.
iteration 17850 || Loss: 4.8595 || 10iter:7.8810 sec.
iteration 17860 || Loss: 3.9477 || 10iter:7.8267 sec.
iteration 17870 || Loss: 4.7

iteration 19150 || Loss: 4.0436 || 10iter:7.9221 sec.
iteration 19160 || Loss: 3.9897 || 10iter:7.6324 sec.
iteration 19170 || Loss: 4.5373 || 10iter:7.8271 sec.
iteration 19180 || Loss: 5.3137 || 10iter:7.8136 sec.
iteration 19190 || Loss: 4.3889 || 10iter:7.8903 sec.
iteration 19200 || Loss: 4.0549 || 10iter:7.9338 sec.
iteration 19210 || Loss: 4.2942 || 10iter:7.9051 sec.
iteration 19220 || Loss: 4.4814 || 10iter:7.8022 sec.
iteration 19230 || Loss: 3.8768 || 10iter:8.0055 sec.
iteration 19240 || Loss: 4.7639 || 10iter:7.9263 sec.
iteration 19250 || Loss: 4.1622 || 10iter:7.8325 sec.
iteration 19260 || Loss: 4.8812 || 10iter:7.8498 sec.
iteration 19270 || Loss: 4.6545 || 10iter:7.8497 sec.
iteration 19280 || Loss: 3.9747 || 10iter:7.9005 sec.
iteration 19290 || Loss: 4.8967 || 10iter:7.9417 sec.
iteration 19300 || Loss: 4.7719 || 10iter:8.0069 sec.
-------------------
epoch 27 || Epoch_TRAIN_Loss:3290.1990 || Epoch_VAL_Loss:0.0000
-------------------
Epoch 28/50
-------------------


iteration 20610 || Loss: 4.2541 || 10iter:7.8450 sec.
iteration 20620 || Loss: 4.2200 || 10iter:7.8680 sec.
iteration 20630 || Loss: 4.2362 || 10iter:7.7893 sec.
iteration 20640 || Loss: 4.9019 || 10iter:7.8907 sec.
iteration 20650 || Loss: 4.4951 || 10iter:7.8400 sec.
iteration 20660 || Loss: 4.6419 || 10iter:7.9078 sec.
iteration 20670 || Loss: 5.4173 || 10iter:7.8396 sec.
iteration 20680 || Loss: 3.5860 || 10iter:7.8483 sec.
iteration 20690 || Loss: 3.9087 || 10iter:8.0125 sec.
iteration 20700 || Loss: 4.6575 || 10iter:7.8649 sec.
iteration 20710 || Loss: 4.2602 || 10iter:7.8655 sec.
iteration 20720 || Loss: 5.0546 || 10iter:7.8143 sec.
iteration 20730 || Loss: 4.0661 || 10iter:7.8838 sec.
-------------------
epoch 29 || Epoch_TRAIN_Loss:3204.3970 || Epoch_VAL_Loss:0.0000
-------------------
Epoch 30/50
-------------------
-------------------
 (train) 
iteration 20740 || Loss: 5.5031 || 10iter:3.7851 sec.
iteration 20750 || Loss: 4.7158 || 10iter:8.0186 sec.
iteration 20760 || Loss:

iteration 22060 || Loss: 3.9157 || 10iter:7.8088 sec.
iteration 22070 || Loss: 4.4158 || 10iter:7.7890 sec.
iteration 22080 || Loss: 4.8023 || 10iter:7.7853 sec.
iteration 22090 || Loss: 3.1511 || 10iter:7.8904 sec.
iteration 22100 || Loss: 3.7627 || 10iter:7.8726 sec.
iteration 22110 || Loss: 5.1495 || 10iter:7.7996 sec.
iteration 22120 || Loss: 4.2626 || 10iter:7.9134 sec.
iteration 22130 || Loss: 5.3490 || 10iter:7.8923 sec.
iteration 22140 || Loss: 4.1053 || 10iter:7.8988 sec.
iteration 22150 || Loss: 4.1340 || 10iter:7.8770 sec.
iteration 22160 || Loss: 4.3058 || 10iter:7.8786 sec.
-------------------
epoch 31 || Epoch_TRAIN_Loss:3155.1897 || Epoch_VAL_Loss:0.0000
-------------------
Epoch 32/50
-------------------
-------------------
 (train) 
iteration 22170 || Loss: 3.9382 || 10iter:3.7047 sec.
iteration 22180 || Loss: 3.0570 || 10iter:7.9731 sec.
iteration 22190 || Loss: 3.6268 || 10iter:7.8648 sec.
iteration 22200 || Loss: 3.7162 || 10iter:7.8431 sec.
iteration 22210 || Loss:

iteration 23520 || Loss: 3.0630 || 10iter:7.8535 sec.
iteration 23530 || Loss: 3.3813 || 10iter:8.0319 sec.
iteration 23540 || Loss: 4.4257 || 10iter:7.9651 sec.
iteration 23550 || Loss: 4.5321 || 10iter:7.8378 sec.
iteration 23560 || Loss: 4.9927 || 10iter:7.8356 sec.
iteration 23570 || Loss: 5.9328 || 10iter:7.9534 sec.
iteration 23580 || Loss: 3.9993 || 10iter:7.8294 sec.
iteration 23590 || Loss: 3.7237 || 10iter:7.8991 sec.
-------------------
epoch 33 || Epoch_TRAIN_Loss:3078.7099 || Epoch_VAL_Loss:0.0000
-------------------
Epoch 34/50
-------------------
-------------------
 (train) 
iteration 23600 || Loss: 3.9849 || 10iter:3.7467 sec.
iteration 23610 || Loss: 4.1835 || 10iter:7.8305 sec.
iteration 23620 || Loss: 3.7427 || 10iter:7.9440 sec.
iteration 23630 || Loss: 3.7783 || 10iter:7.9450 sec.
iteration 23640 || Loss: 4.6925 || 10iter:7.9796 sec.
iteration 23650 || Loss: 4.0865 || 10iter:7.8930 sec.
iteration 23660 || Loss: 4.4406 || 10iter:7.8468 sec.
iteration 23670 || Loss:

iteration 24980 || Loss: 3.2372 || 10iter:7.8917 sec.
iteration 24990 || Loss: 3.6644 || 10iter:7.8295 sec.
iteration 25000 || Loss: 3.5169 || 10iter:7.8601 sec.
iteration 25010 || Loss: 3.7363 || 10iter:7.8871 sec.
iteration 25020 || Loss: 4.0214 || 10iter:7.7816 sec.
-------------------
epoch 35 || Epoch_TRAIN_Loss:2993.1037 || Epoch_VAL_Loss:0.0000
-------------------
Epoch 36/50
-------------------
-------------------
 (train) 
iteration 25030 || Loss: 3.9550 || 10iter:3.7001 sec.
iteration 25040 || Loss: 4.3221 || 10iter:7.8315 sec.
iteration 25050 || Loss: 4.6340 || 10iter:8.0624 sec.
iteration 25060 || Loss: 4.8530 || 10iter:7.7773 sec.
iteration 25070 || Loss: 5.2028 || 10iter:7.8474 sec.
iteration 25080 || Loss: 4.2365 || 10iter:7.7788 sec.
iteration 25090 || Loss: 5.6688 || 10iter:7.8864 sec.
iteration 25100 || Loss: 4.7396 || 10iter:7.8781 sec.
iteration 25110 || Loss: 4.7139 || 10iter:7.7525 sec.
iteration 25120 || Loss: 3.2537 || 10iter:7.7557 sec.
iteration 25130 || Loss:

iteration 26440 || Loss: 4.8443 || 10iter:7.8353 sec.
iteration 26450 || Loss: 4.1605 || 10iter:7.8977 sec.
-------------------
epoch 37 || Epoch_TRAIN_Loss:2950.1833 || Epoch_VAL_Loss:0.0000
-------------------
Epoch 38/50
-------------------
-------------------
 (train) 
iteration 26460 || Loss: 3.8310 || 10iter:3.6708 sec.
iteration 26470 || Loss: 4.0404 || 10iter:7.8136 sec.
iteration 26480 || Loss: 3.6865 || 10iter:7.9179 sec.
iteration 26490 || Loss: 4.4274 || 10iter:7.8981 sec.
iteration 26500 || Loss: 5.0757 || 10iter:7.8792 sec.
iteration 26510 || Loss: 4.0977 || 10iter:7.9604 sec.
iteration 26520 || Loss: 4.5905 || 10iter:7.9080 sec.
iteration 26530 || Loss: 4.0698 || 10iter:7.8539 sec.
iteration 26540 || Loss: 3.0167 || 10iter:7.8114 sec.
iteration 26550 || Loss: 3.4063 || 10iter:7.8655 sec.
iteration 26560 || Loss: 3.8829 || 10iter:7.9157 sec.
iteration 26570 || Loss: 4.1207 || 10iter:7.7651 sec.
iteration 26580 || Loss: 4.2802 || 10iter:8.0157 sec.
iteration 26590 || Loss:

iteration 27890 || Loss: 4.9227 || 10iter:3.7367 sec.
iteration 27900 || Loss: 3.6415 || 10iter:7.8780 sec.
iteration 27910 || Loss: 3.2413 || 10iter:7.8936 sec.
iteration 27920 || Loss: 4.4786 || 10iter:7.9163 sec.
iteration 27930 || Loss: 5.0458 || 10iter:7.8448 sec.
iteration 27940 || Loss: 2.5348 || 10iter:7.8116 sec.
iteration 27950 || Loss: 3.8162 || 10iter:8.0014 sec.
iteration 27960 || Loss: 4.9365 || 10iter:7.9374 sec.
iteration 27970 || Loss: 3.2389 || 10iter:7.9489 sec.
iteration 27980 || Loss: 3.3724 || 10iter:7.9994 sec.
iteration 27990 || Loss: 4.3577 || 10iter:7.7635 sec.
iteration 28000 || Loss: 3.5424 || 10iter:7.7949 sec.
iteration 28010 || Loss: 3.4481 || 10iter:7.9680 sec.
iteration 28020 || Loss: 4.6072 || 10iter:7.8350 sec.
iteration 28030 || Loss: 3.4481 || 10iter:7.8353 sec.
iteration 28040 || Loss: 4.0902 || 10iter:7.9143 sec.
iteration 28050 || Loss: 4.1211 || 10iter:7.8303 sec.
iteration 28060 || Loss: 3.8789 || 10iter:7.8789 sec.
iteration 28070 || Loss: 4.3

iteration 29340 || Loss: 2.9244 || 10iter:7.9814 sec.
iteration 29350 || Loss: 4.8834 || 10iter:7.9309 sec.
iteration 29360 || Loss: 3.4122 || 10iter:7.8755 sec.
iteration 29370 || Loss: 3.6185 || 10iter:7.8402 sec.
iteration 29380 || Loss: 3.9857 || 10iter:7.9016 sec.
iteration 29390 || Loss: 4.2109 || 10iter:7.9797 sec.
iteration 29400 || Loss: 3.1566 || 10iter:7.9608 sec.
iteration 29410 || Loss: 4.1209 || 10iter:8.0315 sec.
iteration 29420 || Loss: 3.9366 || 10iter:7.9312 sec.
iteration 29430 || Loss: 4.1159 || 10iter:7.9787 sec.
iteration 29440 || Loss: 3.8136 || 10iter:7.8219 sec.
iteration 29450 || Loss: 4.5473 || 10iter:7.9792 sec.
iteration 29460 || Loss: 5.2258 || 10iter:7.9849 sec.
iteration 29470 || Loss: 2.8274 || 10iter:7.8037 sec.
iteration 29480 || Loss: 4.0416 || 10iter:7.9455 sec.
iteration 29490 || Loss: 3.8087 || 10iter:7.9298 sec.
iteration 29500 || Loss: 3.7669 || 10iter:7.8347 sec.
iteration 29510 || Loss: 4.6277 || 10iter:7.8666 sec.
iteration 29520 || Loss: 5.1

iteration 30800 || Loss: 4.0112 || 10iter:7.8110 sec.
iteration 30810 || Loss: 3.8586 || 10iter:7.9054 sec.
iteration 30820 || Loss: 3.2176 || 10iter:7.7174 sec.
iteration 30830 || Loss: 3.9157 || 10iter:7.8640 sec.
iteration 30840 || Loss: 3.9153 || 10iter:7.8245 sec.
iteration 30850 || Loss: 4.1232 || 10iter:7.7755 sec.
iteration 30860 || Loss: 4.1392 || 10iter:7.8403 sec.
iteration 30870 || Loss: 4.6893 || 10iter:7.9559 sec.
iteration 30880 || Loss: 3.9300 || 10iter:7.8897 sec.
iteration 30890 || Loss: 2.7949 || 10iter:7.8382 sec.
iteration 30900 || Loss: 3.6569 || 10iter:7.9167 sec.
iteration 30910 || Loss: 4.1503 || 10iter:7.9000 sec.
iteration 30920 || Loss: 4.9536 || 10iter:7.8635 sec.
iteration 30930 || Loss: 4.5077 || 10iter:7.8586 sec.
iteration 30940 || Loss: 3.6928 || 10iter:7.9844 sec.
iteration 30950 || Loss: 3.4461 || 10iter:7.8855 sec.
iteration 30960 || Loss: 3.9630 || 10iter:7.9297 sec.
iteration 30970 || Loss: 3.7434 || 10iter:7.8932 sec.
iteration 30980 || Loss: 2.9

iteration 32260 || Loss: 3.3920 || 10iter:7.9343 sec.
iteration 32270 || Loss: 4.1701 || 10iter:7.9036 sec.
iteration 32280 || Loss: 3.8287 || 10iter:7.8800 sec.
iteration 32290 || Loss: 4.3346 || 10iter:7.9383 sec.
iteration 32300 || Loss: 3.5365 || 10iter:7.9164 sec.
iteration 32310 || Loss: 3.8773 || 10iter:7.9173 sec.
iteration 32320 || Loss: 3.5265 || 10iter:7.9178 sec.
iteration 32330 || Loss: 3.4438 || 10iter:7.8816 sec.
iteration 32340 || Loss: 4.6182 || 10iter:7.9908 sec.
iteration 32350 || Loss: 3.3058 || 10iter:7.8972 sec.
iteration 32360 || Loss: 3.6754 || 10iter:7.8277 sec.
iteration 32370 || Loss: 3.5025 || 10iter:7.9114 sec.
iteration 32380 || Loss: 3.4778 || 10iter:8.0007 sec.
iteration 32390 || Loss: 3.7568 || 10iter:7.9764 sec.
iteration 32400 || Loss: 4.7126 || 10iter:7.9222 sec.
iteration 32410 || Loss: 3.4332 || 10iter:7.9051 sec.
iteration 32420 || Loss: 3.8258 || 10iter:7.9598 sec.
iteration 32430 || Loss: 2.8636 || 10iter:7.9809 sec.
iteration 32440 || Loss: 2.4

iteration 33720 || Loss: 3.3330 || 10iter:8.0327 sec.
iteration 33730 || Loss: 3.4981 || 10iter:7.8459 sec.
iteration 33740 || Loss: 2.8920 || 10iter:7.7973 sec.
iteration 33750 || Loss: 4.4048 || 10iter:7.9839 sec.
iteration 33760 || Loss: 4.3375 || 10iter:7.9566 sec.
iteration 33770 || Loss: 3.7253 || 10iter:7.9883 sec.
iteration 33780 || Loss: 3.8667 || 10iter:7.8452 sec.
iteration 33790 || Loss: 3.3553 || 10iter:7.7922 sec.
iteration 33800 || Loss: 4.0714 || 10iter:7.8893 sec.
iteration 33810 || Loss: 4.8243 || 10iter:7.8637 sec.
iteration 33820 || Loss: 3.8792 || 10iter:7.9015 sec.
iteration 33830 || Loss: 4.4430 || 10iter:7.8341 sec.
iteration 33840 || Loss: 4.7149 || 10iter:7.9698 sec.
iteration 33850 || Loss: 4.6001 || 10iter:7.9268 sec.
iteration 33860 || Loss: 3.0143 || 10iter:7.9726 sec.
iteration 33870 || Loss: 3.7538 || 10iter:7.9294 sec.
iteration 33880 || Loss: 4.0569 || 10iter:7.9495 sec.
iteration 33890 || Loss: 3.4847 || 10iter:7.8391 sec.
iteration 33900 || Loss: 3.7

iteration 35180 || Loss: 3.4507 || 10iter:7.9795 sec.
iteration 35190 || Loss: 4.6380 || 10iter:7.8513 sec.
iteration 35200 || Loss: 3.2867 || 10iter:7.9942 sec.
iteration 35210 || Loss: 3.3933 || 10iter:8.0478 sec.
iteration 35220 || Loss: 3.4519 || 10iter:7.8348 sec.
iteration 35230 || Loss: 2.3306 || 10iter:7.9054 sec.
iteration 35240 || Loss: 3.7508 || 10iter:8.0127 sec.
iteration 35250 || Loss: 3.2243 || 10iter:7.9015 sec.
iteration 35260 || Loss: 3.7075 || 10iter:7.8579 sec.
iteration 35270 || Loss: 2.9057 || 10iter:7.8094 sec.
iteration 35280 || Loss: 3.8047 || 10iter:7.8066 sec.
iteration 35290 || Loss: 3.8987 || 10iter:7.9055 sec.
iteration 35300 || Loss: 3.5411 || 10iter:8.0266 sec.
iteration 35310 || Loss: 3.5757 || 10iter:7.9389 sec.
iteration 35320 || Loss: 3.4338 || 10iter:7.9155 sec.
iteration 35330 || Loss: 4.3760 || 10iter:7.8957 sec.
iteration 35340 || Loss: 4.5024 || 10iter:7.9921 sec.
iteration 35350 || Loss: 3.5631 || 10iter:7.8468 sec.
iteration 35360 || Loss: 4.3