In [None]:
#라이브러리 임포트
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init

import torchvision.datasets as datasets
import torchvision.transforms as transforms

from torch.utils.data import DataLoader

import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

In [None]:
#학습에 필요한 하이퍼파라미터
batch_size = 5
learning_rate = 0.0001
epoch = 10

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])

In [None]:
from PIL import Image
#데이터셋, 데이터로더
voc_train = datasets.VOCSegmentation(root='../Data/', year='2012', image_set='train', transform=transform, target_transform=transform, download=False)
voc_test = datasets.VOCSegmentation(root='../Data/', year='2012', image_set='val', transform=transform, target_transform=transform, download=False)

#print(voc_train[0])
#tf = transforms.ToPILImage()
#img = tf(voc_test[0][1])
#img.show()
# voc_test[0][1].show()
train_loader = DataLoader(voc_train, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True)
test_loader = DataLoader(voc_test, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=True)


In [None]:
class FCN(nn.Module):
    def __init__(self, num_classes=21):
        super(FCN, self).__init__()
        
        # 1/2
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels= 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True) # ceil_mode=True -> 바닥함수 대신 천장함수 사용
        )
        
        # 1/4
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
        )
        
        # 1/8
        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
        )
        
        # 1/16
        self.conv4 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
        )
        
        # 1/32
        self.conv5 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)
        )
        
        self.fc1 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=4096, kernel_size=1),
            nn.ReLU(),
            nn.Dropout2d()
        )
        
        self.fc2 = nn.Sequential(
            nn.Conv2d(in_channels=4096, out_channels=4096, kernel_size=1),
            nn.ReLU(),
            nn.Dropout2d()
        )
        
        self.score = nn.Sequential(
            nn.Conv2d(in_channels=4096, out_channels=num_classes, kernel_size=1),
            nn.ConvTranspose2d(in_channels=num_classes, out_channels=1, kernel_size=64, stride=32, padding=16)
        )
        
        #self.intialize_weights()
        
    def forward(self, x):
        x = self.conv1(x)
        print('conv1 = {}'.format(x.size()))
        x = self.conv2(x)
        print('conv2 = {}'.format(x.size()))
        x = self.conv3(x)
        print('conv3 = {}'.format(x.size()))
        x = self.conv4(x)
        print('conv4 = {}'.format(x.size()))
        x = self.conv5(x)
        print('conv5 = {}'.format(x.size()))
        x = self.fc1(x)
        print('fc1 = {}'.format(x.size()))
        x = self.fc2(x)
        print('fc2 = {}'.format(x.size()))
        x = self.score(x)
        print('score = {}'.format(x.size()))
        return x

In [None]:
# 사용할 장치, 모델, 손실함수, 옵티마이저 선언
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = FCN(21).to(device)

loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# 학습
loss_arr = []
for i in range(epoch):
    for j, [image, label] in tqdm(enumerate(train_loader)):
        image = image.to(device)
        label = label.to(device)
        print(image.size())
        
        optimizer.zero_grad()
        
        output = model.forward(image)
        print(output.size(), label.size())
        loss = loss_func(output, label)
        loss.backward()
        optimizer.step()
        
        if j % 1000 == 0:
            print(loss)
            loss_arr += loss.cpu().detach().numpy()

In [None]:
# 평가
correct = 0
total = 0

model.eval()

with torch.no_grad():
    for image, label in test_loader:
        image = image.to(device)
        label = label.to(device)
        
        output = model.forward(image)
        
        _, output_index = torch.max(output, 1)
        
        total += label.size(0)
        
        correct += (output_index == label).sum().float()
        
print("Accuracy of Test Data: {}%".format(100 * correct / total))