In [1]:
# 에포크마다 사진으로 출력하여 변화 관찰

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torchvision
import matplotlib.pyplot as plt
import numpy as np
import math
import os


In [3]:
if torch.cuda.is_available():
    device = torch.device('cuda')

print(device)

cuda


In [4]:
#dataset
transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_dataset = torchvision.datasets.CIFAR10(root='./data',
                                        train=True,
                                        download=True,
                                        transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data',
                                       train=False,
                                       download=True,
                                       transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset,
                                          batch_size=4,
                                          shuffle=True,
                                          num_workers=4)
test_loader = torch.utils.data.DataLoader(test_dataset,
                                         batch_size=4,
                                         shuffle=False,
                                         num_workers=4)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
class BasicBlock(nn.Module):
    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes) 
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes) 
        self.shortcut = nn.Sequential() 
        if stride != 1: 
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x) 
        out = F.relu(out)
        return out


In [6]:
# ResNet class
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=2):
        super(ResNet, self).__init__()
        self.in_planes = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.linear = nn.Linear(64, num_classes)
        
        self.classifier = nn.Linear(2, 10)
        

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(BasicBlock(self.in_planes, planes, stride))
            self.in_planes = planes # 다음 레이어를 위해 채널 수 변경
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, 8)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        out = self.classifier(out)
        return out

    
    def get_feature(self, x):
        with torch.no_grad():
            out = F.relu(self.bn1(self.conv1(x)))
            out = self.layer1(out)
            out = self.layer2(out)
            out = self.layer3(out)
            out = F.avg_pool2d(out, 8)
            out = out.view(out.size(0), -1)
            out = self.linear(out)
            return out

In [7]:
def ResNet20():
    return ResNet(BasicBlock, [3, 3, 3])
def ResNet32():
    return ResNet(BasicBlock, [5, 5, 5])
def ResNet44():
    return ResNet(BasicBlock, [7, 7, 7])
def ResNet56():
    return ResNet(BasicBlock, [9, 9, 9])

In [8]:
class CenterLoss(nn.Module):
    """Center loss.
    
    Reference:
    Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
    
    Args:
        num_classes (int): number of classes.
        feat_dim (int): feature dimension.
    """
    def __init__(self, num_classes=10, feat_dim=2, use_gpu=True):
        super(CenterLoss, self).__init__()
        self.num_classes = num_classes
        self.feat_dim = feat_dim
        self.use_gpu = use_gpu

        if self.use_gpu:
            self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).cuda())
        else:
            self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim))

    def forward(self, x, labels):
        """
        Args:
            x: feature matrix with shape (batch_size, feat_dim).
            labels: ground truth labels with shape (batch_size).
        """
        batch_size = x.size(0)
        distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
                  torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
        distmat.addmm_(1, -2, x, self.centers.t())

        classes = torch.arange(self.num_classes).long()
        if self.use_gpu: classes = classes.cuda()
        labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
        mask = labels.eq(classes.expand(batch_size, self.num_classes))

        dist = distmat * mask.float()
        loss = dist.clamp(min=1e-12, max=1e+12).sum() / batch_size

        return loss

In [9]:
center_loss = CenterLoss(num_classes=10, feat_dim=2, use_gpu=True)


In [10]:
optimizer_centloss = torch.optim.SGD(center_loss.parameters(), lr=0.5)


In [11]:
lenet = ResNet20().to(device)
learning_rate = 0.001
criterion = nn.CrossEntropyLoss() # softmax 이미 포함
optimizer = optim.SGD(lenet.parameters(), lr=learning_rate)

In [12]:
train_losses = []
train_accuracy = []

In [13]:
colors = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9']

In [14]:
def get_samples_vector(net, dataloader, num_samples=1000):
    samples = []
    labels = []
    for x, y in dataloader:
        sample = net.get_feature(x.cuda()) # normalized vector
        samples.extend(sample.tolist())
        labels.extend(y.tolist())
        if len(samples) > num_samples:
            break
    samples = list(zip(samples, labels))
    return samples, labels



In [15]:
def show_2d_space(samples, labels, w,count):
    plt.figure(figsize=(10,10))
    for i in range(10):
        samples_i = list(filter(lambda x : x[1] == i, samples))
        samples_feat = np.ndarray.flatten(np.array([s[0] for s in samples_i]))
        plt.plot(*samples_feat, marker="o", color=colors[i], markersize=1)
        plt.plot([w[i][0], 0], [w[i][1], 0], color=colors[i])
    # plt.plot(*zip(w, [[0,0] for i in range(10)]))
    plt.savefig(count+'.png')
    #plt.show()

In [16]:
def train(net,epoch):
    print('\n[ Train epoch: %d ]' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()

        outputs = net(images)
        other_loss = criterion(outputs, labels)
        
        features = net.get_feature(images.cuda())
        alpha = 0.9
        loss = center_loss(features, labels) * alpha + other_loss
        optimizer_centloss.zero_grad()
        loss.backward()
# multiple (1./alpha) in order to remove the effect of alpha on updating centers
        for param in center_loss.parameters():
            param.grad.data *= (1./alpha)
        optimizer_centloss.step()
        
        
        
        
        
        
        optimizer.step()

        train_loss += loss.item()
        _, prediction = outputs.max(1)

        total += labels.size(0)
        correct += prediction.eq(labels).sum().item()

        if batch_idx % 500 == 0:
            print('\nCurrent batch:', str(batch_idx))
            print('Current train accuracy:', str(prediction.eq(labels).sum().item() / labels.size(0)))
            print('Current train average loss:', loss.item() / 100)


    train_losses.append(train_loss / total)
    train_accuracy.append(100. * correct / total)
    print('\nTrain accuarcy:', 100. * correct / total)
    print('Train average loss:', train_loss / total)
    
    lenet_samples, lenet_labels = get_samples_vector(lenet, dataloader=train_loader, num_samples=1000)
    lenet_w = F.normalize(lenet.classifier.weight).tolist()
    show_2d_space(lenet_samples, lenet_labels, lenet_w,str(epoch))


def test(net,epoch):
    print('\n[ Test epoch: %d ]' % epoch)
    net.eval()
    loss = 0
    correct = 0
    total = 0

    for batch_idx, (images, labels) in enumerate(test_loader):
        images = images.to(device)
        labels = labels.to(device)
        total += labels.size(0)

        outputs = net(images)
        loss += criterion(outputs, labels).item()

        _, prediction = outputs.max(1)
        correct += prediction.eq(labels).sum().item()

    print('\nTest accuarcy:', 100. * correct / total)
    print('Test average loss:', loss / total)

In [17]:
for epoch in range(0, 250) :
    train(lenet,epoch)


[ Train epoch: 0 ]


	addmm_(Number beta, Number alpha, Tensor mat1, Tensor mat2)
Consider using one of the following signatures instead:
	addmm_(Tensor mat1, Tensor mat2, *, Number beta, Number alpha) (Triggered internally at  ..\torch\csrc\utils\python_arg_parser.cpp:1025.)
  distmat.addmm_(1, -2, x, self.centers.t())


RuntimeError: CUDA error: CUBLAS_STATUS_ALLOC_FAILED when calling `cublasCreate(handle)`