In [1]:
# パッケージのimport
import os.path as osp
import random
import time

import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
import torch.utils.data as data

In [2]:
# 乱数のシードを設定
torch.manual_seed(1234)
np.random.seed(1234)
random.seed(1234)

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("使用デバイス：", device)

使用デバイス： cuda:0


In [8]:
from utils.ssd_model import make_datapath_list, VOCDataset, DataTransform, Anno_xml2list, od_collate_fn

root_path = './data/VOC2012_train_val/'
train_img_list, train_anno_list, val_img_list, val_anno_list = make_datapath_list(root_path)

voc_classes = ['aeroplane', 'bicycle', 'bird', 'boat',
               'bottle', 'bus', 'car', 'cat', 'chair',
               'cow', 'diningtable', 'dog', 'horse',
               'motorbike', 'person', 'pottedplant',
               'sheep', 'sofa', 'train', 'tvmonitor']

color_mean = (104, 117, 123)
input_size = 300

train_dataset = VOCDataset(train_img_list, train_anno_list, phase="train",
transform=DataTransform(input_size, color_mean), transform_anno=Anno_xml2list(voc_classes))

val_dataset = VOCDataset(val_img_list, val_anno_list, phase="val",
transform=DataTransform(input_size, color_mean), transform_anno=Anno_xml2list(voc_classes))

batch_size = 32

train_dataloader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=od_collate_fn)
val_dataloader = data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=od_collate_fn)

dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

In [9]:
from utils.ssd_model import SSD

ssd_cfg = {
    'num_classes': 21,
    'input_size': 300,
    'bbox_aspect_num': [4, 6, 6, 6, 4, 4],
    'feature_maps': [38, 19, 10, 5, 3, 1],
    'steps': [8, 16, 32, 64, 100, 300],
    'min_sizes': [30, 60, 111, 162, 213, 264],
    'max_sizes': [60, 111, 162, 213, 264, 315],
    'aspect_ratios': [[2], [2, 3], [2, 3], [2, 3], [2], [2]],
}

net = SSD(phase='train', cfg=ssd_cfg)

vgg_weights = torch.load('./weights/vgg16_reducedfc.pth')
net.vgg.load_state_dict(vgg_weights)

def weights_init(m):
    if isinstance(m, nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias, 0.0)

net.extras.apply(weights_init)
net.loc.apply(weights_init)
net.conf.apply(weights_init)


ModuleList(
  (0): Conv2d(512, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): Conv2d(1024, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (2): Conv2d(512, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): Conv2d(256, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4-5): 2 x Conv2d(256, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)

In [10]:
from utils.ssd_model import MultiBoxLoss

criterion = MultiBoxLoss(jaccard_thresh=0.5, neg_pos=3, device=device)

optimizer = optim.SGD(net.parameters(), lr=1e-3, momentum=0.9, weight_decay=5e-4)

In [11]:
def train_model(net, dataloaders_dict, criterion, optimizer, num_epochs):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("使用デバイス：", device)

    net.to(device)

    torch.backends.cudnn.benchmark = True

    iteration = 1
    epoch_train_loss = 0.0
    epoch_val_loss = 0.0
    logs = []

    for epoch in range(num_epochs+1):
        t_epoch_start = time.time()
        t_iter_start = time.time()

        print('----------')
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('----------')

        for phase in ['train', 'val']:
            if phase == 'train':
                net.train()
                print(' (train) ')
            else:
                if ((epoch+1) % 10 == 0):
                    net.eval()
                    print('----------')
                    print(' (val) ')
                else:
                    continue
                    
            for images, targets in dataloaders_dict[phase]:

                images = images.to(device)
                targets = [ann.to(device) for ann in targets]

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = net(images)

                    loss_l, loss_c = criterion(outputs, targets)
                    loss = loss_l + loss_c

                    if phase == 'train':
                        loss.backward()

                        nn.utils.clip_grad_value_(net.parameters(), clip_value=2.0)

                        optimizer.step()

                        if (iteration % 10 == 0):
                            t_iter_finish = time.time()
                            duration = t_iter_finish - t_iter_start
                            print(f'イテレーション {iteration} || Loss: {loss.item():.4f} || 10iter: {duration:.4f}')

                            t_iter_start = time.time()

                        epoch_train_loss += loss.item()
                        iteration += 1

                    else:
                        epoch_val_loss += loss.item()
        
            t_epoch_finish = time.time()
            print('----------')
            print(f'epoch {epoch+1} || Epoch_TRAIN_Loss:{epoch_train_loss:.4f} || Epoch_VAL_Loss:{epoch_val_loss:.4f}')
            print(f'timer: {(t_epoch_finish - t_epoch_start):.4f} sec.')
            t_epoch_start = time.time()

            log_epoch = {'epoch': epoch+1, 'train_loss': epoch_train_loss, 'val_loss': epoch_val_loss}

            logs.append(log_epoch)
            df = pd.DataFrame(logs)
            df.to_csv("log_output.csv")

            epoch_train_loss = 0.0
            epoch_val_loss = 0.0

            if ((epoch+1) % 10 == 0):
                torch.save(net.state_dict(), 'weights/ssd300_' + str(epoch+1) + '.pth')


In [12]:
num_epoch = 1
train_model(net, dataloaders_dict, criterion, optimizer, num_epochs=num_epoch)

使用デバイス： cuda:0
----------
Epoch 1/1
----------
 (train) 
イテレーション 10 || Loss: 16.0777 || 10iter: 169.1657
イテレーション 20 || Loss: 14.2178 || 10iter: 154.7143
イテレーション 30 || Loss: 11.3827 || 10iter: 125.1569
イテレーション 40 || Loss: 10.2027 || 10iter: 142.8153
イテレーション 50 || Loss: 9.7231 || 10iter: 125.9625
イテレーション 60 || Loss: 9.5663 || 10iter: 161.4027
イテレーション 70 || Loss: 9.7920 || 10iter: 138.5169
イテレーション 80 || Loss: 8.7647 || 10iter: 138.0119
イテレーション 90 || Loss: 11.0954 || 10iter: 149.4548
イテレーション 100 || Loss: 9.0079 || 10iter: 146.5158
イテレーション 110 || Loss: 10.3550 || 10iter: 170.1505
イテレーション 120 || Loss: 9.3338 || 10iter: 138.3532
イテレーション 130 || Loss: 11.9199 || 10iter: 122.5581
イテレーション 140 || Loss: 11.9709 || 10iter: 130.8086
イテレーション 150 || Loss: 9.5958 || 10iter: 116.6570
イテレーション 160 || Loss: 10.5256 || 10iter: 128.5467
イテレーション 170 || Loss: 9.3137 || 10iter: 114.3947
----------
epoch 1 || Epoch_TRAIN_Loss:1946.0578 || Epoch_VAL_Loss:0.0000
timer: 2653.5224 sec.
----------
Epoch 2/1
----------