In [1]:
!nvidia-smi

Tue Jul 14 09:47:06 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.33.01    Driver Version: 440.33.01    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce RTX 208...  Off  | 00000000:3B:00.0 Off |                  N/A |
| 31%   51C    P0    64W / 250W |      0MiB / 11019MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  GeForce RTX 208...  Off  | 00000000:5E:00.0 Off |                  N/A |
| 34%   53C    P0    69W / 250W |      0MiB / 11019MiB |      1%      Default |
+-------------------------------+----------------------+----------------------+
|   2  GeForce RTX 208...  Off  | 00000000:86:00.0 Off |                  N/

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms

In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=256,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=256,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [4]:
torch.Tensor(8)

tensor([-5.2286e-24,  4.5731e-41, -5.3081e-24,  4.5731e-41, -2.7301e-09,
         4.5730e-41, -5.2832e-24,  4.5731e-41])

In [5]:
from torch.nn.parallel.data_parallel import DataParallel
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.distributed as dist
import os

os.environ['MASTER_ADDR'] = '127.0.0.1'
os.environ['MASTER_PORT'] = '29500'
dist.init_process_group(backend='nccl',rank=0,world_size=1)

In [6]:
from darts import Darts

net = Darts(C=36,num_classes=10,num_layers=8)



In [7]:
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")

# CUDA 기기가 존재한다면, 아래 코드가 CUDA 장치를 출력합니다:

print(device)

cuda:2


In [8]:
net.to(device)
if torch.cuda.device_count() > 1:
    net = DDP(net,device_ids = [2,3])

In [9]:
# [x if 'arch' in x[0] else  for x in net.named_parameters()]
arch_params = []
weight_params = []
for x in net.named_parameters():
    if 'arch' in x[0]:
        arch_params.append(x[1])
    else:
        weight_params.append(x[1])


In [10]:
import torch.optim as optim
import torch
import torch.nn as nn

criterion = nn.CrossEntropyLoss()
optimizer_weight = optim.SGD(weight_params, lr=0.01, momentum=0.9)
optimizer_arch = optim.Adam(arch_params, lr=0.001)

In [None]:
from tqdm import tqdm
import torch.nn.functional as F

num_classes = 10
trn_loss_list = []
val_loss_list = []
acc_list = []
acc_list_aug = []
num_batches=len(trainloader)
total_epoch = 10

for epoch in range(total_epoch):   # 데이터셋을 수차례 반복합니다.
    net.train()
    running_loss = 0.0
    for i, data in tqdm(enumerate(trainloader, 0),total=len(trainloader)):
        if i%6 == 0 and epoch != 0:
            # [inputs, labels]의 목록인 data로부터 입력을 받은 후;
            inputs, labels = data[0].to(device), data[1].to(device)

            # 변화도(Gradient) 매개변수를 0으로 만들고
            optimizer_arch.zero_grad()

            # 순전파 + 역전파 + 최적화를 한 후
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer_arch.step()
            running_loss += loss.item()
        else:
            # [inputs, labels]의 목록인 data로부터 입력을 받은 후;
            inputs, labels = data[0].to(device), data[1].to(device)

            # 변화도(Gradient) 매개변수를 0으로 만들고
            optimizer_weight.zero_grad()

            # 순전파 + 역전파 + 최적화를 한 후
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer_weight.step()
            running_loss += loss.item()
#         print(loss.item())
        
    net.eval()
    with torch.no_grad(): # very very very very important!!!
        val_loss = 0.0
        class_correct = list(0. for i in range(num_classes))
        class_total = list(0. for i in range(num_classes))
        correct = 0
        total = 0
        for j,val in enumerate(testloader):
            v1 = val[0].to(device)
            val_labels = val[1].to(device)
            val_output = net(v1)
            v_loss = criterion(val_output, val_labels)
            val_loss += v_loss
            _, predicted = torch.max(val_output, 1)
            c = (predicted == val_labels).squeeze()
            total += val_labels.size(0)
            correct += (predicted == val_labels).sum().item()
            for i in range(len(val_labels)):
                val_label = val_labels[i]
                class_correct[val_label] += c[i].item()
                class_total[val_label] += 1

        print("epoch:",str(epoch))
        temp_acc = []
        for i in range(num_classes):
            if class_total[i]==0:
                print('class_total = 0',class_correct,class_total)
            else:
                print('Accuracy of %5s : %2d %%' % (i, 100 * class_correct[i] / class_total[i]))
                temp_acc.append(100 * class_correct[i] / class_total[i])
        acc_list.append(temp_acc)
    
    print('arch_param_normal')
    for i in range(4):
        print(torch.max(F.softmax(net.module.arch_param_normal[i],dim=1),1)[1])

    print()
    print('arch_param_reduc')
    for i in range(4):
        print(torch.max(F.softmax(net.module.arch_param_reduc[i],dim=1),1)[1])

    print("epoch: {}/{} | step: {}/{} | trn loss: {:.4f} | val loss: {:.4f}".format(
        epoch+1, total_epoch, i+1, num_batches, running_loss / len(trainloader), val_loss / len(testloader)
    ))        
    print('Accuracy of the network on the test images: %d %%' % (100 * correct / total))

    trn_loss_list.append(running_loss/1875)
    val_loss_list.append(val_loss/len(testloader))
    running_loss = 0.0

print('Finished Training')

100%|██████████| 196/196 [03:15<00:00,  1.00it/s]


epoch: 0
Accuracy of     0 : 41 %
Accuracy of     1 : 93 %
Accuracy of     2 : 17 %
Accuracy of     3 : 18 %
Accuracy of     4 : 28 %
Accuracy of     5 : 16 %
Accuracy of     6 : 56 %
Accuracy of     7 : 42 %
Accuracy of     8 : 52 %
Accuracy of     9 : 37 %
arch_param_normal
tensor([0, 0], device='cuda:2')
tensor([5, 1, 0], device='cuda:2')
tensor([0, 0, 0, 0], device='cuda:2')
tensor([0, 0, 0, 0, 0], device='cuda:2')

arch_param_reduc
tensor([0, 0], device='cuda:2')
tensor([0, 0, 7], device='cuda:2')
tensor([0, 0, 0, 0], device='cuda:2')
tensor([0, 0, 0, 0, 0], device='cuda:2')
epoch: 1/10 | step: 4/196 | trn loss: 1.5228 | val loss: 1.8531
Accuracy of the network on the test images: 40 %


100%|██████████| 196/196 [03:12<00:00,  1.02it/s]


epoch: 1
Accuracy of     0 : 57 %
Accuracy of     1 : 79 %
Accuracy of     2 : 50 %
Accuracy of     3 : 45 %
Accuracy of     4 : 45 %
Accuracy of     5 : 48 %
Accuracy of     6 : 67 %
Accuracy of     7 : 76 %
Accuracy of     8 : 79 %
Accuracy of     9 : 33 %
arch_param_normal
tensor([0, 4], device='cuda:2')
tensor([4, 1, 4], device='cuda:2')
tensor([3, 4, 0, 5], device='cuda:2')
tensor([3, 2, 1, 1, 1], device='cuda:2')

arch_param_reduc
tensor([1, 7], device='cuda:2')
tensor([6, 5, 7], device='cuda:2')
tensor([0, 3, 4, 0], device='cuda:2')
tensor([7, 7, 2, 1, 0], device='cuda:2')
epoch: 2/10 | step: 4/196 | trn loss: 1.1395 | val loss: 1.1543
Accuracy of the network on the test images: 58 %


100%|██████████| 196/196 [03:09<00:00,  1.03it/s]


epoch: 2
Accuracy of     0 : 69 %
Accuracy of     1 : 71 %
Accuracy of     2 : 53 %
Accuracy of     3 : 54 %
Accuracy of     4 : 28 %
Accuracy of     5 : 29 %
Accuracy of     6 : 93 %
Accuracy of     7 : 45 %
Accuracy of     8 : 66 %
Accuracy of     9 : 59 %
arch_param_normal
tensor([1, 4], device='cuda:2')
tensor([4, 1, 4], device='cuda:2')
tensor([4, 4, 4, 4], device='cuda:2')
tensor([3, 2, 1, 4, 1], device='cuda:2')

arch_param_reduc
tensor([1, 7], device='cuda:2')
tensor([4, 7, 7], device='cuda:2')
tensor([0, 6, 4, 2], device='cuda:2')
tensor([1, 7, 1, 3, 1], device='cuda:2')
epoch: 3/10 | step: 4/196 | trn loss: 0.9652 | val loss: 1.2180
Accuracy of the network on the test images: 57 %


 16%|█▌        | 31/196 [00:31<02:45,  1.00s/it]

In [None]:
import matplotlib.pyplot as plt
import numpy as np

plt.title('val_acc')
plt.plot([np.mean(x) for x in acc_list])
plt.legend()
plt.show()
plt.title('val_loss')
plt.plot(val_loss_list)
plt.legend()
plt.show()

In [None]:
import torch.nn.functional as F

print('arch_param_normal')
for i in range(4):
    print(torch.max(F.softmax(net.module.arch_param_normal[i],dim=1),1)[1])
    
print()
print('arch_param_reduc')
for i in range(4):
    print(torch.max(F.softmax(net.module.arch_param_reduc[i],dim=1),1)[1])

In [None]:
net.module.arch_param_reduc