解压数据集
训练数据集train.zip和测试数据集test.zip都是压缩格式，下载后它们的路径可以如下：

../data/kaggle_dog/train.zip
../data/kaggle_dog/test.zip
../data/kaggle_dog/labels.csv.zip
为了使网页编译快一点，我们在git repo里仅仅存放小数据样本（’train_valid_test_tiny.zip’）。执行以下代码会从git repo里解压生成小数据样本。

In [1]:
# 如果训练下载的Kaggle的完整数据集，把demo改为False。
demo = False
data_dir = './'
needZip =False

if demo:
    zipfiles= ['train_valid_test_tiny.zip']
else:
    zipfiles= ['train.zip', 'test.zip', 'labels.csv.zip']
if needZip:
    import zipfile
    for fin in zipfiles:
        with zipfile.ZipFile(data_dir + '/' + fin, 'r') as zin:
            zin.extractall(data_dir)

In [2]:
demo = False
data_dir = '../data'

整理数据集
对于Kaggle的完整数据集，我们需要定义下面的reorg_dog_data函数来整理一下。整理后，同一类狗的图片将出现在在同一个文件夹下，便于Gluon稍后读取。

函数中的参数如data_dir、train_dir和test_dir对应上述数据存放路径及原始训练和测试的图片集文件夹名称。参数label_file为训练数据标签的文件名称。参数input_dir是整理后数据集文件夹名称。参数valid_ratio是验证集中每类狗的数量占原始训练集中数量最少一类的狗的数量（66）的比重。

In [3]:
import math
import os
import shutil
from collections import Counter

def reorg_dog_data(data_dir, label_file, train_dir, test_dir, input_dir,
                   valid_ratio):
    # 读取训练数据标签。
    with open(os.path.join(data_dir, label_file), 'r') as f:
        # 跳过文件头行（栏名称）。
        lines = f.readlines()[1:]
        tokens = [l.rstrip().split(',') for l in lines]
        idx_label = dict(((idx, label) for idx, label in tokens))
    labels = set(idx_label.values())

    num_train = len(os.listdir(os.path.join(data_dir, train_dir)))
    # 训练集中数量最少一类的狗的数量。
    min_num_train_per_label = (
        Counter(idx_label.values()).most_common()[:-2:-1][0][1])
    # 验证集中每类狗的数量。
    num_valid_per_label = math.floor(min_num_train_per_label * valid_ratio)
    label_count = dict()

    def mkdir_if_not_exist(path):
        if not os.path.exists(os.path.join(*path)):
            os.makedirs(os.path.join(*path))

    # 整理训练和验证集。
    for train_file in os.listdir(os.path.join(data_dir, train_dir)):
        idx = train_file.split('.')[0]
        label = idx_label[idx]
        mkdir_if_not_exist([data_dir, input_dir, 'train_valid', label])
        shutil.copy(os.path.join(data_dir, train_dir, train_file),
                    os.path.join(data_dir, input_dir, 'train_valid', label))
        if label not in label_count or label_count[label] < num_valid_per_label:
            mkdir_if_not_exist([data_dir, input_dir, 'valid', label])
            shutil.copy(os.path.join(data_dir, train_dir, train_file),
                        os.path.join(data_dir, input_dir, 'valid', label))
            label_count[label] = label_count.get(label, 0) + 1
        else:
            mkdir_if_not_exist([data_dir, input_dir, 'train', label])
            shutil.copy(os.path.join(data_dir, train_dir, train_file),
                        os.path.join(data_dir, input_dir, 'train', label))

    # 整理测试集。
    mkdir_if_not_exist([data_dir, input_dir, 'test', 'unknown'])
    for test_file in os.listdir(os.path.join(data_dir, test_dir)):
        shutil.copy(os.path.join(data_dir, test_dir, test_file),
                    os.path.join(data_dir, input_dir, 'test', 'unknown'))

再次强调，为了使网页编译快一点，我们在这里仅仅使用小数据样本。相应地，我们仅将批量大小设为2。实际训练和测试时应使用Kaggle的完整数据集并调用reorg_dog_data函数整理便于Gluon读取的格式。由于数据集较大，批量大小batch_size大小可设为一个较大的整数，例如128。

In [4]:
if demo:
    # 注意：此处使用小数据集为便于网页编译。
    input_dir = 'train_valid_test_tiny'
    # 注意：此处相应使用小批量。对Kaggle的完整数据集可设较大的整数，例如128。
    batch_size = 2
else:
    label_file = 'labels.csv'
    train_dir = 'train'
    test_dir = 'test'
    input_dir = 'train_valid_test'
    batch_size = 64
    valid_ratio = 0.1
    #reorg_dog_data(data_dir, label_file, train_dir, test_dir, input_dir,
                 #  valid_ratio)

使用Gluon读取整理后的数据集
为避免过拟合，我们在这里使用image.CreateAugmenter来增广数据集。例如我们设rand_mirror=True即可随机对每张图片做镜面反转。以下我们列举了该函数里的所有参数，这些参数都是可以调的。

In [5]:
from mxnet import autograd
from mxnet import gluon
from mxnet import image
from mxnet import init
from mxnet import nd
from mxnet.gluon.data import vision
import numpy as np

def transform_train(data, label):
    im = image.imresize(data.astype('float32') / 255, 363, 363)
    #im = image.imresize(data.astype('float32') / 255, 400, 400)
    auglist = image.CreateAugmenter(data_shape=(3, 363, 363), resize=0, 
                        rand_crop=False, rand_resize=False, rand_mirror=True,
                        mean=np.array([0.485, 0.456, 0.406]), std=np.array([0.229, 0.224, 0.225]), 
                        brightness=0, contrast=0, 
                        saturation=0, hue=0, 
                        pca_noise=0, rand_gray=0, inter_method=2)
    for aug in auglist:
        im = aug(im)
    # 将数据格式从"高*宽*通道"改为"通道*高*宽"。
    im = nd.transpose(im, (2,0,1))
    return (im, nd.array([label]).asscalar().astype('float32'))

def transform_test(data, label):
    im = image.imresize(data.astype('float32') / 255, 363, 363)
    auglist = image.CreateAugmenter(data_shape=(3, 363, 363),
                        mean=np.array([0.485, 0.456, 0.406]),
                        std=np.array([0.229, 0.224, 0.225]))
    for aug in auglist:
        im = aug(im)
    im = nd.transpose(im, (2,0,1))
    return (im, nd.array([label]).asscalar().astype('float32'))

接下来，我们可以使用Gluon中的ImageFolderDataset类来读取整理后的数据集。

In [6]:
input_str = data_dir + '/' + input_dir + '/'

# 读取原始图像文件。flag=1说明输入图像有三个通道（彩色）。
train_ds = vision.ImageFolderDataset(input_str + 'train', flag=1,
                                     transform=transform_train)
valid_ds = vision.ImageFolderDataset(input_str + 'valid', flag=1,
                                     transform=transform_test)
train_valid_ds = vision.ImageFolderDataset(input_str + 'train_valid',
                                           flag=1, transform=transform_train)
test_ds = vision.ImageFolderDataset(input_str + 'test', flag=1,
                                     transform=transform_test)

loader = gluon.data.DataLoader
train_data = loader(train_ds, batch_size, shuffle=True, last_batch='keep')
valid_data = loader(valid_ds, batch_size, shuffle=True, last_batch='keep')
train_valid_data = loader(train_valid_ds, batch_size, shuffle=True,
                          last_batch='keep')
test_data = loader(test_ds, batch_size, shuffle=False, last_batch='keep')

定义模型

In [7]:
from mxnet import gluon
from mxnet import init
#from mxnet.gluon.data import vision
from mxnet.gluon.model_zoo import vision
from mxnet.gluon import nn

def get_features(ctx):
    inception = vision.inception_v3(pretrained=True,ctx=ctx)
    return inception.features


def get_output(ctx,ParamsName=None):
    net = nn.HybridSequential()
    with net.name_scope():
        net.add(nn.Dropout(.2))
        net.add(nn.Dense(256, activation="relu"))
        net.add(nn.Dropout(.6))
        net.add(nn.Dense(120))
    if ParamsName is not None:
        #net.collect_params().load(ParamsName,ctx)
        net.load_params(ParamsName,ctx)
    else:
        net.initialize(init = init.Xavier(),ctx=ctx)
    return net

def get_net(ParamsName,ctx):
    output = get_output(ctx,ParamsName)
    features = get_features(ctx)
    net = nn.HybridSequential()
    with net.name_scope():
        net.add(features)
        net.add(output)
    return net

In [None]:
import mxnet as mx
net2=get_features(mx.gpu())
print(net2)

HybridSequential(
  (0): HybridSequential(
    (0): Conv2D(3 -> 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (1): BatchNorm(momentum=0.9, axis=1, fix_gamma=False, eps=0.001, in_channels=32)
    (2): Activation(relu)
  )
  (1): HybridSequential(
    (0): Conv2D(32 -> 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): BatchNorm(momentum=0.9, axis=1, fix_gamma=False, eps=0.001, in_channels=32)
    (2): Activation(relu)
  )
  (2): HybridSequential(
    (0): Conv2D(32 -> 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BatchNorm(momentum=0.9, axis=1, fix_gamma=False, eps=0.001, in_channels=64)
    (2): Activation(relu)
  )
  (3): MaxPool2D(size=(3, 3), stride=(2, 2), padding=(0, 0), ceil_mode=False)
  (4): HybridSequential(
    (0): Conv2D(64 -> 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): BatchNorm(momentum=0.9, axis=1, fix_gamma=False, eps=0.001, in_channels=80)
    (2): Activation(relu)
  )
  (5): HybridSequential(
    (0): Con

In [None]:
from mxnet import nd
import numpy as np
import mxnet as mx
import pandas as pd
import pickle
from tqdm import tqdm

net = get_features(mx.gpu())
net.hybridize()

def SaveNd(data,net,name):
    x =[]
    y =[]
    print('提取特征 %s' % name)
    for fear,label in tqdm(data):
        x.append(net(fear.as_in_context(mx.gpu())).as_in_context(mx.cpu()))
        y.append(label)
    x = nd.concat(*x,dim=0)
    y = nd.concat(*y,dim=0)
    print('保存特征 %s' % name)
    nd.save(name,[x,y])


SaveNd(train_data,net,'train_inception_v3.nd')
SaveNd(valid_data,net,'valid_inception_v3.nd')
SaveNd(train_valid_data,net,'input_inception_v3.nd')
# SaveNd(test_data,net,'test_resnet152_v1.nd')
ids = ids = sorted(os.listdir(os.path.join(data_dir, input_dir, 'test/unknown')))
synsets = train_valid_ds.synsets
f = open('ids_synsets','wb')
pickle.dump([ids,synsets],f)
f.close()

提取特征 train_inception_v3.nd


100%|████████████████████████████████████████████████████████████████████████████████| 149/149 [05:39<00:00,  2.17s/it]


保存特征 train_inception_v3.nd
提取特征 valid_inception_v3.nd


100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:17<00:00,  1.16s/it]


保存特征 valid_inception_v3.nd
提取特征 input_inception_v3.nd


 95%|████████████████████████████████████████████████████████████████████████████    | 152/160 [05:27<00:16,  2.02s/it]

In [None]:
import datetime
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
from mxnet import autograd
from mxnet import gluon
from mxnet import image
from mxnet import init
from mxnet import nd
from mxnet.gluon.data import vision
from mxnet.gluon import nn
from mxnet import nd
import pandas as pd
import mxnet as mx
import pickle

train_nd = nd.load('train_inception_v3.nd')

valid_nd = nd.load('valid_inception_v3.nd')

input_nd = nd.load('input_inception_v3.nd')

f = open('ids_synsets','rb')
ids_synsets = pickle.load(f)
f.close()

num_epochs = 73
batch_size = 64
learning_rate = 1e-4
weight_decay = 1e-5
lr_period = 40
lr_decay = 0.1
pngname='1'
modelparams='1'

train_data_d = gluon.data.DataLoader(gluon.data.ArrayDataset(train_nd[0],train_nd[1]), batch_size=batch_size,shuffle=True)
valid_data_d = gluon.data.DataLoader(gluon.data.ArrayDataset(valid_nd[0],valid_nd[1]), batch_size=batch_size,shuffle=True)
input_data_d = gluon.data.DataLoader(gluon.data.ArrayDataset(input_nd[0],input_nd[1]), batch_size=batch_size,shuffle=True)


def get_loss(data, net, ctx):
    loss = 0.0
    for feas, label in data:
        label = label.as_in_context(ctx)
        output = net(feas.as_in_context(ctx))
        cross_entropy = softmax_cross_entropy(output, label)
        loss += nd.mean(cross_entropy).asscalar()
    return loss / len(data)

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

def train(net, train_data, valid_data, num_epochs, lr, wd, ctx, lr_period, 
          lr_decay):
    trainer = gluon.Trainer(
        net.collect_params(), 'adam', {'learning_rate': lr, 'wd': wd})
    #trainer = gluon.Trainer(
     #   net.collect_params(), 'sgd', {'learning_rate': lr, 'momentum': 0.9,
      #                                'wd': wd})
    train_loss = []
    if valid_data is not None:
        test_loss = []
    
    prev_time = datetime.datetime.now()
    for epoch in range(num_epochs):
        _loss = 0.
        #if epoch > 0 and epoch % lr_period == 0:
         #   trainer.set_learning_rate(trainer.learning_rate * lr_decay)
        for data, label in train_data:
            label = label.as_in_context(ctx)
            with autograd.record():
                output = net(data.as_in_context(ctx))
                loss = softmax_cross_entropy(output, label)
            loss.backward()
            trainer.step(batch_size)
            _loss += nd.mean(loss).asscalar()
        cur_time = datetime.datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)
        __loss = _loss/len(train_data)
        train_loss.append(__loss)
        
        if valid_data is not None:  
            valid_loss = get_loss(valid_data, net, ctx)
            epoch_str = ("Epoch %d. Train loss: %f, Valid loss %f, "
                         % (epoch,__loss , valid_loss))
            test_loss.append(valid_loss)
        else:
            epoch_str = ("Epoch %d. Train loss: %f, "
                         % (epoch, __loss))
            
        prev_time = cur_time
        print(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))
        

    plt.plot(train_loss, 'r')
    if valid_data is not None: 
        plt.plot(test_loss, 'g')
    plt.legend(['Train_Loss', 'Test_Loss'], loc=2)


    plt.savefig(pngname, dpi=1000)
    #net.collect_params().save(modelparams)
    savefilename = "./inception_v3.params"
    net.save_params(savefilename)

ctx = mx.gpu()
net = get_output(ctx)
net.hybridize()

#train(net, input_data_d,None, num_epochs, learning_rate, weight_decay, 
 #     ctx, lr_period, lr_decay)
train(net, train_data_d,valid_data_d, num_epochs, learning_rate, weight_decay, 
      ctx, lr_period, lr_decay)

In [None]:
import datetime
import matplotlib
matplotlib.use('agg')
import matplotlib.pyplot as plt
from mxnet import autograd
from mxnet import gluon
from mxnet import image
from mxnet import init
from mxnet import nd
#from mxnet.gluon.data import vision
from mxnet.gluon.model_zoo import vision
from mxnet.gluon import nn
from mxnet import nd
import pandas as pd
import mxnet as mx
import pickle
import numpy as np
from tqdm import tqdm
#from model import get_net

data_dir = './'
test_dir = 'test'
input_dir = 'train_valid_test'
valid_dir = 'valid'
input_str = data_dir + '/' + input_dir + '/'

netparams ="./inception_v3.params"
csvname = 'p2_2.csv'
ids_synsets_name = 'ids_synsets'

f = open(ids_synsets_name,'rb')
ids_synsets = pickle.load(f)
f.close()

def SaveTest(test_data,net,ctx,name,ids,synsets):
    outputs = []
    for data, label in tqdm(test_data):
        output = nd.softmax(net(data.as_in_context(ctx)))
        outputs.extend(output.asnumpy())
    with open(name, 'w') as f:
        f.write('id,' + ','.join(synsets) + '\n')
        for i, output in zip(ids, outputs):
            f.write(i.split('.')[0] + ',' + ','.join(
                [str(num) for num in output]) + '\n')

net = get_net(netparams,mx.gpu())
net.hybridize()

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
#print(get_loss(valid_data,net,mx.gpu()))

SaveTest(test_data,net,mx.gpu(),csvname,ids_synsets[0],ids_synsets[1])