In [1]:
!nvidia-smi

Sun Jul  5 13:13:58 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.33.01    Driver Version: 440.33.01    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce RTX 208...  Off  | 00000000:3B:00.0 Off |                  N/A |
| 31%   47C    P0    64W / 250W |      0MiB / 11019MiB |      1%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce RTX 208...  Off  | 00000000:5E:00.0 Off |                  N/A |
| 33%   41C    P0    64W / 250W |      0MiB / 11019MiB |      1%      Default |
+-------------------------------+----------------------+----------------------+
|   2  GeForce RTX 208...  Off  | 00000000:86:00.0 Off |                  N/

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [4]:
import torch.nn as nn
import torch.nn.functional as F

class MBConv(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size=3,stride=1,expand=3):
        super(MBConv, self).__init__()
        
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.expand = expand
        
        wide_channel = round(self.in_channels * self.expand)
        
        self.inv_conv = nn.Conv2d(self.in_channels,wide_channel,1,1,0,bias=False)
        self.inv_bn = nn.BatchNorm2d(wide_channel)
        self.inv_relu6 = nn.ReLU6()
        
        padding = kernel_size//2
        
        self.wide_conv = nn.Conv2d(wide_channel,wide_channel,kernel_size,stride,padding,groups=wide_channel,bias=False)
        self.wide_bn = nn.BatchNorm2d(wide_channel)
        self.wide_relu6 = nn.ReLU6()
        
        self.out_conv = nn.Conv2d(wide_channel,self.out_channels,1,1,0,bias=False)
        self.out_bn = nn.BatchNorm2d(self.out_channels)
        
    def forward(self,x):
        x = self.inv_conv(x)
        x = self.inv_bn(x)
        x = self.inv_relu6(x)
        
        x = self.wide_conv(x)
        x = self.wide_bn(x)
        x = self.wide_relu6(x)
        
        x = self.out_conv(x)
        x = self.out_bn(x)
        
        return x

In [5]:
class zero_layer(nn.Module):
    def __init__(self,in_channels,out_channels,stride):
        super(zero_layer,self).__init__()
        
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.stride = stride
        
    def forward(self,x):
        size = [d for d in x.size()]
        size[1] = self.out_channels
        if self.stride>1:
            size[2] = size[2]//self.stride
            size[3] = size[3]//self.stride

        new_x = torch.zeros(size)
        return new_x.to(x.device)

# zero = zero_layer(3,6,2)
# temp = torch.ones([4,3,224,224]).to(device)
# zero(temp).size()

In [6]:
torch.Tensor(6)

tensor([4.2039e-45, 2.6709e-42, 6.8374e-40, 1.8206e-34, 2.5353e+30, 1.1210e-44])

In [7]:
import torch.nn.functional as F
import numpy as np

class random_durum(nn.Module):
    def __init__(self,in_channels,out_channels,stride,include_zero = False):
        super(random_durum,self).__init__()
        self.all_edges = nn.ModuleList()
        self.num_edges = 6
        
        #(3,3),(3,5),(3,7),(6,3),(6,5),(6,7)
        for expand in [3,6]:
            for kernel_size in [3,5,7]:
                self.all_edges.append(MBConv(in_channels,out_channels,kernel_size,stride,expand))
        
        if include_zero:
            self.all_edges.append(zero_layer(in_channels,out_channels,stride))
            self.num_edges += 1
            
        self.prob_edges = torch.Tensor(self.num_edges)
            
        self.active = [0]
        self.inactive = [1]
        
    def select_active(self):
#         probs = F.softmax(self.prob_edges)
        edge = np.random.choice(self.num_edges,1)[0]
        self.active[0] = edge
    
    def forward(self,x):
        return self.all_edges[self.active[0]](x)

# temp = torch.ones([4,3,224,224])
# random_durum(3,6,1,False)(temp)

In [8]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = random_durum(3, 6,1)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = random_durum(6, 16,1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(16 * 8 * 8, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
        self.edges = []
        for m in self.modules():
            if isinstance(m,random_durum):
                self.edges.append(m)
    
    def select_active(self):
#         active_list = []
        for m in self.edges:
            m.select_active()
#             active_list.append(m.active[0])
#         print(active_list)

    def forward(self, x):
        x = self.pool(self.conv1(x))
        x = self.pool2(self.conv2(x))
        x = x.view(-1, 16 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()
# temp = torch.ones([4,3,224,224])
# net.select_active()
# net.select_active()
# net.select_active()
# net.select_active()
# net.select_active()

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# CUDA 기기가 존재한다면, 아래 코드가 CUDA 장치를 출력합니다:

print(device)

cuda:0


In [10]:
net.to(device)

Net(
  (conv1): random_durum(
    (all_edges): ModuleList(
      (0): MBConv(
        (inv_conv): Conv2d(3, 9, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (inv_bn): BatchNorm2d(9, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (inv_relu6): ReLU6()
        (wide_conv): Conv2d(9, 9, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=9, bias=False)
        (wide_bn): BatchNorm2d(9, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (wide_relu6): ReLU6()
        (out_conv): Conv2d(9, 6, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (out_bn): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): MBConv(
        (inv_conv): Conv2d(3, 9, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (inv_bn): BatchNorm2d(9, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (inv_relu6): ReLU6()
        (wide_conv): Conv2d(9, 9, kernel_size=(5, 5), stride=(1, 1), pa

In [11]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

In [12]:
from tqdm import tqdm

num_classes = 10
trn_loss_list = []
val_loss_list = []
acc_list = []
acc_list_aug = []
num_batches=len(trainloader)
total_epoch = 10

for epoch in range(total_epoch):   # 데이터셋을 수차례 반복합니다.
    net.train()
    running_loss = 0.0
    for i, data in tqdm(enumerate(trainloader, 0),total=len(trainloader)):
        net.select_active()
        # [inputs, labels]의 목록인 data로부터 입력을 받은 후;
        inputs, labels = data[0].to(device), data[1].to(device)

        # 변화도(Gradient) 매개변수를 0으로 만들고
        optimizer.zero_grad()

        # 순전파 + 역전파 + 최적화를 한 후
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
    net.eval()
    with torch.no_grad(): # very very very very important!!!
        val_loss = 0.0
        class_correct = list(0. for i in range(num_classes))
        class_total = list(0. for i in range(num_classes))
        correct = 0
        total = 0
        for j,val in enumerate(testloader):
            v1 = val[0].to(device)
            val_labels = val[1].to(device)
            val_output = net(v1)
            v_loss = criterion(val_output, val_labels)
            val_loss += v_loss
            _, predicted = torch.max(val_output, 1)
            c = (predicted == val_labels).squeeze()
            total += val_labels.size(0)
            correct += (predicted == val_labels).sum().item()
            for i in range(len(val_labels)):
                val_label = val_labels[i]
                class_correct[val_label] += c[i].item()
                class_total[val_label] += 1

        print("epoch:",str(epoch))
        temp_acc = []
        for i in range(num_classes):
            if class_total[i]==0:
                print('class_total = 0',class_correct,class_total)
            else:
                print('Accuracy of %5s : %2d %%' % (i, 100 * class_correct[i] / class_total[i]))
                temp_acc.append(100 * class_correct[i] / class_total[i])
        acc_list.append(temp_acc)

    print("epoch: {}/{} | step: {}/{} | trn loss: {:.4f} | val loss: {:.4f}".format(
        epoch+1, total_epoch, i+1, num_batches, running_loss / len(trainloader), val_loss / len(testloader)
    ))        
    print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))

    trn_loss_list.append(running_loss/1875)
    val_loss_list.append(val_loss/len(testloader))
    running_loss = 0.0

print('Finished Training')

100%|██████████| 391/391 [00:04<00:00, 81.67it/s]


epoch: 0
Accuracy of     0 : 42 %
Accuracy of     1 : 76 %
Accuracy of     2 : 30 %
Accuracy of     3 : 15 %
Accuracy of     4 : 15 %
Accuracy of     5 : 33 %
Accuracy of     6 : 41 %
Accuracy of     7 : 43 %
Accuracy of     8 : 54 %
Accuracy of     9 : 27 %
epoch: 1/10 | step: 10/391 | trn loss: 1.9824 | val loss: 1.7208
Accuracy of the network on the test images: 37 %


100%|██████████| 391/391 [00:05<00:00, 73.12it/s]


epoch: 1
Accuracy of     0 : 59 %
Accuracy of     1 : 50 %
Accuracy of     2 : 42 %
Accuracy of     3 : 24 %
Accuracy of     4 : 19 %
Accuracy of     5 : 38 %
Accuracy of     6 : 45 %
Accuracy of     7 : 38 %
Accuracy of     8 : 65 %
Accuracy of     9 : 48 %
epoch: 2/10 | step: 10/391 | trn loss: 1.5933 | val loss: 1.5726
Accuracy of the network on the test images: 43 %


100%|██████████| 391/391 [00:05<00:00, 75.03it/s]


epoch: 2
Accuracy of     0 : 52 %
Accuracy of     1 : 59 %
Accuracy of     2 : 32 %
Accuracy of     3 : 13 %
Accuracy of     4 : 51 %
Accuracy of     5 : 44 %
Accuracy of     6 : 59 %
Accuracy of     7 : 55 %
Accuracy of     8 : 66 %
Accuracy of     9 : 57 %
epoch: 3/10 | step: 10/391 | trn loss: 1.4621 | val loss: 1.3981
Accuracy of the network on the test images: 49 %


100%|██████████| 391/391 [00:05<00:00, 75.68it/s]


epoch: 3
Accuracy of     0 : 47 %
Accuracy of     1 : 34 %
Accuracy of     2 : 43 %
Accuracy of     3 : 41 %
Accuracy of     4 : 34 %
Accuracy of     5 : 45 %
Accuracy of     6 : 61 %
Accuracy of     7 : 63 %
Accuracy of     8 : 72 %
Accuracy of     9 : 63 %
epoch: 4/10 | step: 10/391 | trn loss: 1.3670 | val loss: 1.3581
Accuracy of the network on the test images: 50 %


100%|██████████| 391/391 [00:05<00:00, 67.48it/s]


epoch: 4
Accuracy of     0 : 60 %
Accuracy of     1 : 68 %
Accuracy of     2 : 31 %
Accuracy of     3 : 27 %
Accuracy of     4 : 34 %
Accuracy of     5 : 59 %
Accuracy of     6 : 82 %
Accuracy of     7 : 51 %
Accuracy of     8 : 69 %
Accuracy of     9 : 51 %
epoch: 5/10 | step: 10/391 | trn loss: 1.2889 | val loss: 1.3090
Accuracy of the network on the test images: 53 %


100%|██████████| 391/391 [00:04<00:00, 83.22it/s] 


epoch: 5
Accuracy of     0 : 72 %
Accuracy of     1 : 44 %
Accuracy of     2 : 41 %
Accuracy of     3 : 31 %
Accuracy of     4 : 34 %
Accuracy of     5 : 41 %
Accuracy of     6 : 70 %
Accuracy of     7 : 56 %
Accuracy of     8 : 81 %
Accuracy of     9 : 59 %
epoch: 6/10 | step: 10/391 | trn loss: 1.2332 | val loss: 1.3175
Accuracy of the network on the test images: 53 %


100%|██████████| 391/391 [00:05<00:00, 70.58it/s]


epoch: 6
Accuracy of     0 : 66 %
Accuracy of     1 : 67 %
Accuracy of     2 : 31 %
Accuracy of     3 : 43 %
Accuracy of     4 : 43 %
Accuracy of     5 : 52 %
Accuracy of     6 : 64 %
Accuracy of     7 : 72 %
Accuracy of     8 : 63 %
Accuracy of     9 : 72 %
epoch: 7/10 | step: 10/391 | trn loss: 1.1754 | val loss: 1.1787
Accuracy of the network on the test images: 57 %


100%|██████████| 391/391 [00:05<00:00, 70.93it/s]


epoch: 7
Accuracy of     0 : 58 %
Accuracy of     1 : 69 %
Accuracy of     2 : 38 %
Accuracy of     3 : 41 %
Accuracy of     4 : 55 %
Accuracy of     5 : 39 %
Accuracy of     6 : 68 %
Accuracy of     7 : 69 %
Accuracy of     8 : 76 %
Accuracy of     9 : 67 %
epoch: 8/10 | step: 10/391 | trn loss: 1.1322 | val loss: 1.1676
Accuracy of the network on the test images: 58 %


100%|██████████| 391/391 [00:05<00:00, 77.68it/s]


epoch: 8
Accuracy of     0 : 66 %
Accuracy of     1 : 51 %
Accuracy of     2 : 38 %
Accuracy of     3 : 51 %
Accuracy of     4 : 54 %
Accuracy of     5 : 46 %
Accuracy of     6 : 66 %
Accuracy of     7 : 71 %
Accuracy of     8 : 73 %
Accuracy of     9 : 63 %
epoch: 9/10 | step: 10/391 | trn loss: 1.0893 | val loss: 1.1683
Accuracy of the network on the test images: 58 %


100%|██████████| 391/391 [00:04<00:00, 81.08it/s]


epoch: 9
Accuracy of     0 : 60 %
Accuracy of     1 : 79 %
Accuracy of     2 : 51 %
Accuracy of     3 : 39 %
Accuracy of     4 : 53 %
Accuracy of     5 : 32 %
Accuracy of     6 : 75 %
Accuracy of     7 : 57 %
Accuracy of     8 : 77 %
Accuracy of     9 : 72 %
epoch: 10/10 | step: 10/391 | trn loss: 1.0527 | val loss: 1.1415
Accuracy of the network on the test images: 59 %
Finished Training
