In [0]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision.utils import save_image
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import os
import glob
import PIL
from PIL import Image
from torch.utils import data as D
from torch.utils.data.sampler import SubsetRandomSampler
import random
import torchsummary
import time

__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
           'resnet152']


model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}

In [0]:
batch_size = 64 
validation_ratio = 0.1
random_seed = 10

In [0]:
#데이터셋 설정
transform_train = transforms.Compose([
        transforms.Resize(224),
        ### 오버피팅을 방지하기 위해 RandomCrop과 Randam HorizontalFlip같은 노이즈 추가.
        transforms.RandomCrop(224, padding=28), #오버피팅 막으려고 랜덤으로 잘라서 이미지 만든다,,,,(?)
        transforms.RandomHorizontalFlip(), # 오버피팅 막으려고 이미지 반전시켜서 만든다,,,(?)
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])

#validation이나 test는 그런 것 필요 없음
transform_validation = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])


transform_test = transforms.Compose([
        transforms.Resize(224),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])


#데이터 세트 다운로드

trainset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_train)

validset = torchvision.datasets.CIFAR10(
    root='./data', train=True, download=True, transform=transform_validation)

testset = torchvision.datasets.CIFAR10(
    root='./data', train=False, download=True, transform=transform_test)

#trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
#                                          shuffle=True, num_workers=0)

num_train = len(trainset)
indices = list(range(num_train))
split = int(np.floor(validation_ratio * num_train))

np.random.seed(random_seed)
np.random.shuffle(indices)

train_idx, valid_idx = indices[split:], indices[:split]
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)


# train_loader에 데이터를 로드 해오는 코드들
train_loader = torch.utils.data.DataLoader(
    trainset, batch_size=batch_size, sampler=train_sampler, num_workers=0
)

valid_loader = torch.utils.data.DataLoader(
    validset, batch_size=batch_size, sampler=valid_sampler, num_workers=0
)

test_loader = torch.utils.data.DataLoader(
    testset, batch_size=batch_size, shuffle=False, num_workers=0
)

#10개 클래스로 구분
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# 초기 학습률
initial_lr = 0.1

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified


In [0]:
class depthwise_conv(nn.Module):
    # __init__()에서 모델의 구조와 동작을 정의하는 생성자를 정의(속성값을 초기화하는 역할로, 객체가 생성될 때 자동으호 호출)
    def __init__(self, nin, kernel_size, padding, bias=False, stride=1):
        super(depthwise_conv, self).__init__() # super() 함수를 부르면 여기서 만든 클래스는 nn.Module 클래스의 속성들을 가지고 초기화
        #nn.conv2D 모듈 : 첫번째는 입력 채널 수, 두번째변수는 출력 채널 수 세번째는 커널 사이즈(숫자하나만 지정하면 정사각형으로 간주)
        self.depthwise = nn.Conv2d(nin, nin, kernel_size=kernel_size, stride=stride, padding=padding, groups=nin, bias=bias)
        #self.depthwise는 이제 nin 크기의 받아서 nin 크기의 출력을 하는 conv2D 함수가 됨.

    #foward() 함수는 모델이 학습데이터를 입력받아서 forward 연산을 진행시키는 함수
    def forward(self, x):
        out = self.depthwise(x)  #self.depthwise 실행하고 반환
        return out

In [0]:
class dw_block(nn.Module):
    def __init__(self, nin, kernel_size, padding=1, bias=False, stride=1):
        super(dw_block, self).__init__()
        self.dw_block = nn.Sequential(
            depthwise_conv(nin, kernel_size, padding, bias, stride),
            #BatchNorm2d(배치 정규화): 학습률을 너무 높게 잡으면 기울기가 소실되거나 발산하는 증상을 예방하여 학습과정을 안정화하는 방법
            nn.BatchNorm2d(nin),
            ##distribution을 평균 0, 표준편차 1인 input으로 normalize시키는 방법
            ##Training 할 때는 batch의 평균과 분산으로 normalize 하고, Test 할 때는 계산해놓은 이동 평균(training 때 계산)으로 normalize
            nn.ReLU()
        )
    def forward(self, x):
        out = self.dw_block(x)
        return out

In [0]:
class one_by_one_block(nn.Module):
    def __init__(self, nin, nout, padding=0, bias=False, stride=1):
        super(one_by_one_block, self).__init__()
        self.one_by_one_block = nn.Sequential(
            #커널 사이즈 1x1 로 컨벌루션 진행
            nn.Conv2d(nin, nout, kernel_size=1, stride=stride, padding=padding, bias=bias),
            nn.BatchNorm2d(nout),
            nn.ReLU()
        )
    def forward(self, x):
        out = self.one_by_one_block(x)
        return out

In [0]:
class MobileNet(nn.Module):
  
    def __init__(self, input_channel, num_classes=10):
        super(MobileNet, self).__init__()
        
        self.block1 = nn.Sequential(
            #nn.conv2D 모듈 : 첫번째는 입력 채널 수, 두번째변수는 출력 채널 수 세번째는 커널 사이즈(숫자하나만 지정하면 정사각형으로 간주)
            #BatchNorm2d(배치 정규화): 학습률을 너무 높게 잡으면 기울기가 소실되거나 발산하는 증상을 예방하여 학습과정을 안정화하는 방법
            #계층에 들어가는 입력을 평균과 분산으로 정규화함.
            nn.Conv2d(input_channel, 32, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
        )
        #112x112x32
        ###identity 저장
        self.block2=nn.Sequential(
            dw_block(32, kernel_size=3), #depthwise convolution / stride=1
        )
        ###resnet 작용하기 위해 downsampling넣기
        self.block3=nn.Sequential( 
            nn.ReLU(),  
            one_by_one_block(32, 64), #one_by_one convolution
            #112x112x64
            dw_block(64, kernel_size=3, stride=2), 
        )
        #56x56x64
        ###identity 저장
        self.block4=nn.Sequential(
            one_by_one_block(64, 64),
            dw_block(64, kernel_size=3),
        )
        ###resnet 작용하기 위해 downsampling넣기
        self.block5=nn.Sequential(    
            one_by_one_block(64, 128),
            #56x56x128
            nn.ReLU(),
            dw_block(128, kernel_size=3, stride=2),
        )
        #28x28x128
        ###identity 저장
        self.block6=nn.Sequential( 
            one_by_one_block(128, 128),
            dw_block(128, kernel_size=3),
        )
        #resnet 작용하기 위해 downsampling넣기
        self.block7=nn.Sequential(
            one_by_one_block(128, 256),
            #28x28x256
            nn.ReLU(),
            dw_block(256, kernel_size=3, stride=2),
        )
        #14x14x256
        #identity 저장
        self.block8=nn.Sequential(
            one_by_one_block(256, 128),
            #14x14x128   
            # 5 times 
            dw_block(128, kernel_size=3),
            one_by_one_block(128, 128),
            dw_block(128, kernel_size=3),
            one_by_one_block(128, 128),
            dw_block(128, kernel_size=3),
            one_by_one_block(128, 128),
            dw_block(128, kernel_size=3),
            one_by_one_block(128, 128),
            dw_block(128, kernel_size=3),
            one_by_one_block(128, 256),
        )
        #14x14x256
        #resnet 작용하기 위해 downsampling넣기
        self.block9=nn.Sequential(
            one_by_one_block(256, 512),
            nn.ReLU(),
            dw_block(512, kernel_size=3, stride=2),
        )
        #7x7x512
        #identity 저장
        self.block10=nn.Sequential(
            one_by_one_block(512, 512),
            #7x7x512
            dw_block(512, kernel_size=3, padding=4, stride=2),
        )
        #7x7x512
        #resnet 작용하기 위해 downsampling넣기
        self.block11=nn.Sequential(
            one_by_one_block(512, 1024),
        )
        #7x7x1024
        #avgPool->1x1x1024  

        #Fully Connected layer      
        self.fc_v2 = nn.Conv2d(1024, num_classes, 1, 1, groups=2)
        
    def forward(self, x):
        x = self.block1(x)
        identity = x
        x = self.block2(x)
        #112x112x32
        x += identity

        x = self.block3(x)
        identity = x
        x = self.block4(x)
        x += identity

        x = self.block5(x)
        identity = x
        x = self.block6(x)
        x += identity

        x = self.block7(x)
        identity = x
        x = self.block8(x)
        x += identity

        x = self.block9(x)
        identity = x
        x = self.block10(x)
        x += identity
        x = self.block11(x)
        
        body_output = x
        
        avg_pool_output = F.adaptive_avg_pool2d(body_output, (1, 1))
        output = self.fc_v2(avg_pool_output)
        output = output.view(output.size(0), -1)
        
        return output

In [0]:
net = MobileNet(3, 10) #아마도 인풋채널3개(RGB), 클래스 10개 로 추정됨!

In [0]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #GPU있고 cuda를 쓸수 있으면 쿠다를 쓰게 하고 없으면 cpu 쓰게함
print(device)

cuda:0


In [0]:
net.to(device) #이용가능한 device(cpu or Gpu)에 네트워크 전송

MobileNet(
  (block1): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (block2): Sequential(
    (0): dw_block(
      (dw_block): Sequential(
        (0): depthwise_conv(
          (depthwise): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        )
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
    )
  )
  (block3): Sequential(
    (0): ReLU()
    (1): one_by_one_block(
      (one_by_one_block): Sequential(
        (0): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU()
      )
    )
    (2): dw_block(
      (dw_block): Sequential(
        (0): depthwise_conv(
          (depthwise): Conv2d(

In [0]:
torchsummary.summary(net, (3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 112, 112]             864
       BatchNorm2d-2         [-1, 32, 112, 112]              64
              ReLU-3         [-1, 32, 112, 112]               0
            Conv2d-4         [-1, 32, 112, 112]             288
    depthwise_conv-5         [-1, 32, 112, 112]               0
       BatchNorm2d-6         [-1, 32, 112, 112]              64
              ReLU-7         [-1, 32, 112, 112]               0
          dw_block-8         [-1, 32, 112, 112]               0
              ReLU-9         [-1, 32, 112, 112]               0
           Conv2d-10         [-1, 64, 112, 112]           2,048
      BatchNorm2d-11         [-1, 64, 112, 112]             128
             ReLU-12         [-1, 64, 112, 112]               0
 one_by_one_block-13         [-1, 64, 112, 112]               0
           Conv2d-14           [-1, 64,

In [0]:
start = time.time()  # 시작 시간 저장
criterion = nn.CrossEntropyLoss() #(binary 아닌)여러 클래스의 loss구하는 함수
optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=0.9) ##optimizer(SGD방식): 역전파과정에서 loss function의 값을 줄여나가며 학습시킴(가중치 업데이트)
torch.autograd.set_detect_anomaly(True)
for epoch in range(100):  
    if epoch == 0:
        lr = initial_lr
    elif epoch % 2 == 0 and epoch != 0:
        lr *= 0.94
        optimizer = optim.SGD(net.parameters(), lr=lr, momentum=0.9)
    
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data  #데이터의 인풋 받아온다.s
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels) #결과와 라벨을 비교해서 loss를 구한다.
        loss.backward() #loss를 backward해서 기울기(미분치)를 구한다. -> 맞나?
        optimizer.step() #한 스텝 이동
        running_loss += loss.item()
        
        show_period = 250
        if i % show_period == show_period-1:    # print every "show_period" mini-batches
            print('[%d, %5d] loss: %.7f' %
                  (epoch + 1, i + 1, running_loss / show_period))
            running_loss = 0.0
       
    total = 0
    correct = 0
    for i, data in enumerate(valid_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = net(inputs)
        
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct = correct + (predicted == labels).sum().item() #맞으면 correct하고 추가함
        
    print('[%d epoch] Accuracy of the network on the validation images: %d %%' % 
          (epoch+1, 100 * correct / total)
         )

print('Finished Training')
print("time :", time.time() - start)  # 현재시각 - 시작시간 = 실행 시간

[1,   250] loss: 1.8506125
[1,   500] loss: 1.4230518
[1 epoch] Accuracy of the network on the validation images: 55 %
[2,   250] loss: 1.0747075
[2,   500] loss: 0.9801188
[2 epoch] Accuracy of the network on the validation images: 65 %
[3,   250] loss: 0.8303629
[3,   500] loss: 0.7667795
[3 epoch] Accuracy of the network on the validation images: 70 %
[4,   250] loss: 0.6778940
[4,   500] loss: 0.6443451
[4 epoch] Accuracy of the network on the validation images: 76 %
[5,   250] loss: 0.5910960
[5,   500] loss: 0.5706277
[5 epoch] Accuracy of the network on the validation images: 77 %
[6,   250] loss: 0.5244013
[6,   500] loss: 0.5244776
[6 epoch] Accuracy of the network on the validation images: 79 %
[7,   250] loss: 0.4674361
[7,   500] loss: 0.4754478
[7 epoch] Accuracy of the network on the validation images: 79 %
[8,   250] loss: 0.4498245
[8,   500] loss: 0.4271771
[8 epoch] Accuracy of the network on the validation images: 81 %
[9,   250] loss: 0.3948988
[9,   500] loss: 0.39