In [1]:
import torch as t
import os
from torch.utils import data
from PIL import Image
import numpy as np
import torch.nn as nn
import time
import random

random.seed(20190412)
t.manual_seed(20190412)
t.cuda.manual_seed(20190412)


path = '/media/wcw/SeaGate316G: Data/kaggle/data/Dogs_vs_Cats/data/'# + 'train'

# 读取数据

In [2]:
import torchvision.transforms as T
img_shape = (3, 224, 224)

def read_raw_img(path, resize, L=False):
    img = Image.open(path)
    if resize:
        img = img.resize(resize)
    if L:
        img = img.convert('L')
    return np.asarray(img)

class DogCat(data.Dataset):
    def __init__(self,path, batch_size, img_shape):
        self.batch_size = batch_size
        self.img_shape = img_shape
        imgs = os.listdir(path)
        random.shuffle(imgs)
        self.imgs = [os.path.join(path, img) for img in imgs]
        
#         normalize = T.Normalize(mean = [0.485, 0.456, 0.406],
#                                 std = [0.229, 0.224, 0.225])
#         self.transforms = T.Compose([T.Resize(224), 
#                                      T.CenterCrop(224),
#                                      T.ToTensor(),
#                                      normalize])
        
    def __getitem__(self, index):
        start = index * self.batch_size
        end = min(start + self.batch_size, len(self.imgs))
        size = end - start
        assert size > 0
        
        img_paths = self.imgs[start:end]
        a = t.zeros((size,) + self.img_shape, requires_grad=True)
        b = t.zeros((size, 1))
        
        for i in range(size):
            img = read_raw_img(img_paths[i], self.img_shape[1:], L=False).transpose((2,1,0))
#             img = Image.open(img_paths[i])
            a[i] = t.from_numpy(img)
#             a[i] = self.transforms(img)
            b[i] = 1 if 'dog' in img_paths[i].split('/')[-1].split('.')[0] else 0
        return a, b
    
    def __len__(self):
        return (len(self.imgs) + self.batch_size - 1)// self.batch_size
    
train = DogCat(path+'train', 32, img_shape)    

# 构建模型

In [3]:
import math
class Vgg16(nn.Module):

    def __init__(self, features, num_classes=1, init_weights=True):
        super(Vgg16, self).__init__()
        self.features = features
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        x = t.sigmoid(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
                if m.bias is not None:
                    m.bias.data.zero_()
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.weight.data.normal_(0, 0.01)
                m.bias.data.zero_()


def make_layers(cfg, mode, batch_norm=False):
    layers = []
    if mode == 'RGB':
        in_channels = 3
    elif mode == 'L':
        in_channels = 1
    else:
        print('only RGB or L mode')
        
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']

# 损失函数与优化器

In [4]:
vgg = Vgg16(make_layers(cfg, 'RGB')).cuda()
print(vgg)
criterion = nn.BCELoss()
optimizer = t.optim.Adam(vgg.parameters(),lr=0.001)

train = DogCat(path+'train', batch_size=32, img_shape=img_shape) 

Vgg16(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv

# 模型训练

In [23]:
tic = time.time()

for step, (x, y_) in enumerate(train):
    optimizer.zero_grad()

    x = x.cuda()
    y_ = y_.cuda()

    y = vgg(x)


#     loss = -t.mean(y_ * t.log(y) + (1 - y_) * t.log(1 - y))
    loss = criterion(y.squeeze(), y_)

    loss.backward()
    optimizer.step()
toc = time.time()

RuntimeError: leaf variable has been moved into the graph interior

# 模型评估

In [None]:
accs = []
test = DogCat(path+'test', batch_size=32, img_shape=img_shape)

for step, (x, y_) in enumerate(test): 
#     x = x.cuda()
#     y_ = y_.cuda()
    
    y = vgg(x)
    y[y>0.5] = 1
    y[y<=0.5]=0
    acc = y.eq(y_).sum().item()/y.shape[0]
#     acc = t.max(y, 1)[1].eq(t.max(y_, 1)[1]).sum().item()/y.shape[0]

    accs.append(acc)
    
np.mean(accs)    