In [1]:
import torch
import torch.nn as nn
import numpy as np
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
from torchsummary import summary
import torchvision.transforms as transforms

In [None]:
# import visdom

# vis = visdom.Visdom()
# vis.close(env="main")

In [None]:
# def loss_tracker(loss_plot, loss_value, num):
#     '''num, loss_value, are Tensor'''
#     vis.line(X=num, Y=loss_value, win=loss_plot,update='append')

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(110)
if device == 'cuda':
    torch.cuda.manual_seed_all(110)

In [3]:
train_ds = torchvision.datasets.ImageFolder(
    root="../Lab_05_(inception)/baby_data/train", transform=transforms.ToTensor())
val_ds = torchvision.datasets.ImageFolder(
    root="../Lab_05_(inception)/baby_data/val", transform=transforms.ToTensor())
test_ds = torchvision.datasets.ImageFolder(
    root="../Lab_05_(inception)/baby_data/test", transform=transforms.ToTensor())
print(len(train_ds))
print(len(val_ds))
print(len(test_ds))

7730
300
432


In [4]:
train_meanRGB = [np.mean(x.numpy(), axis=(1, 2)) for x, _ in train_ds]
train_stdRGB = [np.std(x.numpy(), axis=(1, 2)) for x, _ in train_ds]

train_meanR = np.mean([m[0] for m in train_meanRGB])
train_meanG = np.mean([m[1] for m in train_meanRGB])
train_meanB = np.mean([m[2] for m in train_meanRGB])
train_stdR = np.mean([s[0] for s in train_stdRGB])
train_stdG = np.mean([s[1] for s in train_stdRGB])
train_stdB = np.mean([s[2] for s in train_stdRGB])


val_meanRGB = [np.mean(x.numpy(), axis=(1, 2)) for x, _ in val_ds]
val_stdRGB = [np.std(x.numpy(), axis=(1, 2)) for x, _ in val_ds]

val_meanR = np.mean([m[0] for m in val_meanRGB])
val_meanG = np.mean([m[1] for m in val_meanRGB])
val_meanB = np.mean([m[2] for m in val_meanRGB])

val_stdR = np.mean([s[0] for s in val_stdRGB])
val_stdG = np.mean([s[1] for s in val_stdRGB])
val_stdB = np.mean([s[2] for s in val_stdRGB])

print(train_meanR, train_meanG, train_meanB)
print(val_meanR, val_meanG, val_meanB)

0.20443186 0.622418 0.4943464
0.1723546 0.5275659 0.5340862


In [5]:
test_meanRGB = [np.mean(x.numpy(), axis=(1, 2)) for x, _ in test_ds]
test_stdRGB = [np.std(x.numpy(), axis=(1, 2)) for x, _ in test_ds]

test_meanR = np.mean([m[0] for m in test_meanRGB])
test_meanG = np.mean([m[1] for m in test_meanRGB])
test_meanB = np.mean([m[2] for m in test_meanRGB])

test_stdR = np.mean([s[0] for s in test_stdRGB])
test_stdG = np.mean([s[1] for s in test_stdRGB])
test_stdB = np.mean([s[2] for s in test_stdRGB])

print(test_meanR, test_meanG, test_meanB)

0.17287017 0.56349987 0.52504873


In [6]:
train_transformation = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((300, 300)),
    transforms.Normalize([train_meanR, train_meanG, train_meanB], [
                         train_stdR, train_stdG, train_stdB]),
    transforms.RandomHorizontalFlip()
])

val_transformation = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((300, 300)),
    transforms.Normalize([val_meanR, val_meanG, val_meanB], [
                         val_stdR, val_stdG, val_stdB]),
])

test_transformation = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((300, 300)),
    transforms.Normalize([test_meanR, test_meanG, test_meanB], [
                         test_stdR, test_stdG, test_stdB]),
])

train_ds.transform = train_transformation
val_ds.transform = val_transformation
test_ds.transform = test_transformation

train_dl = DataLoader(train_ds, batch_size=4, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=4, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=4, shuffle=True)

classes = ('bgs', 'crying')

In [None]:
# import matplotlib.pyplot as plt
# import numpy as np
# %matplotlib inline

# def imshow(img):
#     img = img / 2 + 0.5 
#     npimg = img.numpy()
#     plt.imshow(np.transpose(npimg, (1, 2, 0)))
#     plt.show()

# dataiter = iter(train_dl)
# images, labels = dataiter.next()
# vis.images(images/2 + 0.5)

# imshow(torchvision.utils.make_grid(images))

# print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

In [7]:
vgg16_layer = [64,64, 'M', 128, 128, 'M', 256, 256,256, 'M', 512,512,512, 'M',512,512,512,'M']

In [8]:
def make_Architecture(vgg16_layer, batch_norm=False):
    layers = []
    in_channels = 3

    for t in vgg16_layer:
        if t == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, t, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(t), nn.ReLU(True)]
            else:
                layers += [conv2d, nn.ReLU(True)]
            in_channels = t

    return nn.Sequential(*layers)

In [9]:
class VGG(nn.Module):
    
    def __init__(self, Architecture, num_classes=2, init_weights=True):
        super().__init__()
        self.Architecture = Architecture
        
        self.classifier = nn.Sequential(
            nn.Linear(512 * 9 * 9, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.Architecture(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


In [10]:
vgg16 = VGG(make_Architecture(vgg16_layer), 2, True).to(device)
vgg16

VGG(
  (Architecture): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilati

In [11]:
a = torch.Tensor(8, 3, 300, 300).to(device)
out = vgg16(a)

In [12]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.SGD(vgg16.parameters(), lr=0.005, momentum=0.9)

lr_sche = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9)

In [13]:
summary(vgg16, input_size=(3, 300, 300), device=device)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 300, 300]           1,792
              ReLU-2         [-1, 64, 300, 300]               0
            Conv2d-3         [-1, 64, 300, 300]          36,928
              ReLU-4         [-1, 64, 300, 300]               0
         MaxPool2d-5         [-1, 64, 150, 150]               0
            Conv2d-6        [-1, 128, 150, 150]          73,856
              ReLU-7        [-1, 128, 150, 150]               0
            Conv2d-8        [-1, 128, 150, 150]         147,584
              ReLU-9        [-1, 128, 150, 150]               0
        MaxPool2d-10          [-1, 128, 75, 75]               0
           Conv2d-11          [-1, 256, 75, 75]         295,168
             ReLU-12          [-1, 256, 75, 75]               0
           Conv2d-13          [-1, 256, 75, 75]         590,080
             ReLU-14          [-1, 256,

In [None]:
# loss_plt = vis.line(Y=torch.Tensor(1).zero_(), opts=dict( title='loss_tracker', legend=['loss'], showlegend=True ))

In [14]:
import matplotlib.pyplot as plt


In [15]:
epochs = 20

train_loss = []

for epoch in range(epochs):  
    running_loss = 0.0
    lr_sche.step()
    for i, data in enumerate(train_dl, 0):
        
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = vgg16(inputs)
        loss = criterion(outputs, labels)
        train_loss.append(loss)
        
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 30 == 29: 
            # loss_tracker(loss_plt, torch.Tensor([running_loss/30]), torch.Tensor([i + epoch*len(train_dl)]))
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 30 ))
            running_loss = 0.0

print('Finished Training')




[1,    30] loss: 0.727
[1,    60] loss: 0.638
[1,    90] loss: 0.666
[1,   120] loss: 0.582
[1,   150] loss: 0.610
[1,   180] loss: 0.630
[1,   210] loss: 0.630
[1,   240] loss: 0.592
[1,   270] loss: 0.530
[1,   300] loss: 0.683
[1,   330] loss: 0.626
[1,   360] loss: 0.658
[1,   390] loss: 0.613
[1,   420] loss: 0.664
[1,   450] loss: 0.590
[1,   480] loss: 0.680
[1,   510] loss: 0.633
[1,   540] loss: 0.642
[1,   570] loss: 0.644
[1,   600] loss: 0.627
[1,   630] loss: 0.655
[1,   660] loss: 0.573
[1,   690] loss: 0.659
[1,   720] loss: 0.654
[1,   750] loss: 0.662
[1,   780] loss: 0.605
[1,   810] loss: 0.680
[1,   840] loss: 0.657
[1,   870] loss: 0.628
[1,   900] loss: 0.636
[1,   930] loss: 0.679
[1,   960] loss: 0.634
[1,   990] loss: 0.600
[1,  1020] loss: 0.719
[1,  1050] loss: 0.678
[1,  1080] loss: 0.663
[1,  1110] loss: 0.671
[1,  1140] loss: 0.618
[1,  1170] loss: 0.579
[1,  1200] loss: 0.633
[1,  1230] loss: 0.654
[1,  1260] loss: 0.608
[1,  1290] loss: 0.664
[1,  1320] 

In [17]:
val_loss = []
test_loss = []

val_correct = 0
val_total = 0

test_correct = 0
test_total = 0

# val_dl 뽑아서 loss plot 만들기
with torch.no_grad():
    for data in test_dl:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = vgg16(images)

        _, predicted = torch.max(outputs.data, 1)

        val_total += labels.size(0)

        val_correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 300 test images: %d %%' %(100 * val_correct / val_total))

# test_dl 뽑아서 loss plot 만들기
with torch.no_grad():
    for data in test_dl:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = vgg16(images)

        _, predicted = torch.max(outputs.data, 1)

        test_total += labels.size(0)

        test_correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 300 test images: %d %%' % (100 * test_correct / test_total))


Accuracy of the network on the 300 test images: 57 %
Accuracy of the network on the 300 test images: 57 %


In [None]:
dataiter = iter(val_dl)
images, labels = dataiter.next()

imshow(torchvision.utils.make_grid(images))
print('Class: ', ' '.join('%5s' % classes[labels[j]] for j in range(8)))

In [None]:
outputs = vgg16(images.to(device))

In [None]:
_, predicted = torch.max(outputs, 1)

print('Predicted: ', ' '.join( '%5s' % classes[predicted[j]] for j in range(8) ))

In [None]:
correct = 0
total = 0

with torch.no_grad():
    for data in test_dl:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        outputs = vgg16(images)

        _, predicted = torch.max(outputs.data, 1)

        total += labels.size(0)

        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 300 test images: %d %%' % (100 * correct / total))
