In [1]:
import torch
from data import LoadData
from model import VGG16
from torch.utils import data
from utils import *
from loss import EdgeSaliencyLoss
import os

torch.__version__

'1.6.0+cu101'

In [2]:
if torch.cuda.is_available():
    device = torch.device(device='cuda')
else:
    device = torch.device(device='cpu')
device

device(type='cuda')

In [3]:
path_image = "./DUTS/DUTS-TR/DUTS-TR-Image/"
path_mask = "./DUTS/DUTS-TR/DUTS-TR-Mask/"

In [4]:
len(os.listdir(path_image))

10553

In [4]:
batch_size = 4
learning_rate = 1e-3
target_size = 256
epochs = 1

In [5]:
total_batch = len(os.listdir(path_image)) // batch_size
total_batch

2638

In [6]:
data_loader = data.DataLoader(LoadData(path_image, path_mask, target_size),
                            batch_size=batch_size,
                            shuffle=True)

In [7]:
model = VGG16()

In [8]:
model.load_state_dict(torch.load("./model/vgg16_no_top.pth"), strict=False)

_IncompatibleKeys(missing_keys=['mfe1.conv11.weight', 'mfe1.conv11.bias', 'mfe1.conv33.weight', 'mfe1.conv33.bias', 'mfe1.conv55.weight', 'mfe1.conv55.bias', 'mfe2.conv11.weight', 'mfe2.conv11.bias', 'mfe2.conv33.weight', 'mfe2.conv33.bias', 'mfe2.conv55.weight', 'mfe2.conv55.bias', 'mfe3.conv11.weight', 'mfe3.conv11.bias', 'mfe3.conv33.weight', 'mfe3.conv33.bias', 'mfe3.conv55.weight', 'mfe3.conv55.bias', 'mfe4.conv11.weight', 'mfe4.conv11.bias', 'mfe4.conv33.weight', 'mfe4.conv33.bias', 'mfe4.conv55.weight', 'mfe4.conv55.bias', 'mfe5.conv11.weight', 'mfe5.conv11.bias', 'mfe5.conv33.weight', 'mfe5.conv33.bias', 'mfe5.conv55.weight', 'mfe5.conv55.bias', 'mfe6.conv11.weight', 'mfe6.conv11.bias', 'mfe6.conv33.weight', 'mfe6.conv33.bias', 'mfe6.conv55.weight', 'mfe6.conv55.bias', 'mfe7.conv11.weight', 'mfe7.conv11.bias', 'mfe7.conv33.weight', 'mfe7.conv33.bias', 'mfe7.conv55.weight', 'mfe7.conv55.bias', 'mfe8.conv11.weight', 'mfe8.conv11.bias', 'mfe8.conv33.weight', 'mfe8.conv33.bias', 'm

In [9]:
criterion = EdgeSaliencyLoss(device)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [10]:
model.to(device)

VGG16(
  (conv1_1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv1_2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (maxpooling1): MaxPool2d(kernel_size=2, stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv2_1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (maxpooling2): MaxPool2d(kernel_size=2, stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv3_1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3_3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (maxpooling3): MaxPool2d(kernel_size=2, stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv4_1): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4_2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1,

In [48]:
for epoch in range(epochs):
    model.train()
    for batch_n, (image, mask) in enumerate(data_loader, start=1):
        
        image = image.to(device)
        mask = mask.to(device)
        
        optimizer.zero_grad()
        predict = model(image)
        loss = criterion(predict, mask)
        
        loss.backward()
        optimizer.step()
        
        if batch_n % 100 == 0:
            acc = accuracy(predict, mask)
            pre = precision(predict, mask)
            rec = recall(predict, mask)
            f_score = (1 + 0.3) * pre * rec / (0.3 * pre + rec)
            print("Epoch:{} loss:{} Batch:{}/{}".format(epoch + 1, loss.item(), batch_n, total_batch), end="")
            print(" acc:{} pre:{} recall:{} F-measure:{}".format(acc, pre, rec, f_score))

RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 6.00 GiB total capacity; 4.44 GiB already allocated; 844.80 KiB free; 4.52 GiB reserved in total by PyTorch)

In [13]:
torch.save(model.state_dict(),"./model/model_1.pth")

In [17]:
import cv2
import numpy as np
from torchvision import transforms

In [16]:
def pad_resize_image(inp_img, out_img=None, target_size=None):
    h, w, c = inp_img.shape
    size = max(h, w)

    padding_h = (size - h) // 2
    padding_w = (size - w) // 2

    if out_img is None:
        # For inference
        temp_x = cv2.copyMakeBorder(inp_img, top=padding_h, bottom=padding_h, left=padding_w, right=padding_w,
                                    borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])
        if target_size is not None:
            temp_x = cv2.resize(temp_x, (target_size, target_size), interpolation=cv2.INTER_AREA)
        return temp_x
    else:
        # For training and testing
        temp_x = cv2.copyMakeBorder(inp_img, top=padding_h, bottom=padding_h, left=padding_w, right=padding_w,
                                    borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])
        temp_y = cv2.copyMakeBorder(out_img, top=padding_h, bottom=padding_h, left=padding_w, right=padding_w,
                                    borderType=cv2.BORDER_CONSTANT, value=[0, 0, 0])
        # print(inp_img.shape, temp_x.shape, out_img.shape, temp_y.shape)

        if target_size is not None:
            temp_x = cv2.resize(temp_x, (target_size, target_size), interpolation=cv2.INTER_AREA)
            temp_y = cv2.resize(temp_y, (target_size, target_size), interpolation=cv2.INTER_AREA)
        return temp_x, temp_y


In [24]:
def getInput(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype('float32')
    
    img = pad_resize_image(img, target_size=256)
    img /= 255.
    img = np.transpose(img, axes=(2, 0, 1))
    img = torch.from_numpy(img).float()
    
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225]) 
    
    img = normalize(img)
    return img

In [25]:
img = getInput("./image/1.jpg")

In [26]:
img.shape

torch.Size([3, 256, 256])

In [32]:
img = img.reshape(1, 3, 256, 256)
img = img.to(device)

In [33]:
predict = model(img)

In [35]:
predict.shape

torch.Size([1, 1, 256, 256])

In [43]:
msk = predict.reshape(256, 256, 1)
msk = msk.cpu().detach()

In [46]:
msk.shape

torch.Size([256, 256, 1])

In [47]:
cv2.imshow("test", np.array(msk))
cv2.waitKey(0)
cv2.destroyAllWindows()