In [1]:
import pandas as pd
import os
import itertools
import datetime
import sys
sys.path.append('../')
%run ../utils.ipynb
from functools import partial

root_dir = '/data/ai/data/kaggle_dog-breed-identification'
data_dir = os.path.join(root_dir, "data")
params_dir = os.path.join(root_dir, "params")
grads_dir = os.path.join(root_dir, "grads")

if not os.path.exists(params_dir):
    os.mkdir(params_dir)
if not os.path.exists(grads_dir):
    os.mkdir(grads_dir)
    
demo=False
first = False 
if demo:
    # 注意：此处使用小数据集为便于网页编译。
    input_dir = 'train_valid_test_tiny'
    # 注意：此处相应使用小批量。对Kaggle的完整数据集可设较大的整数，例如128。
    batch_size = 2
else:
    label_file = 'labels.csv'
    train_dir = 'train'
    test_dir = 'test'
    input_dir = 'train_valid_test'
    batch_size = 128
    valid_ratio = 0.1
    if first:
        reorg_dog_data(data_dir, label_file, train_dir, test_dir, input_dir, valid_ratio)

05:24:26 INFO:load utils


In [2]:
from mxnet import autograd
from mxnet import gluon
from mxnet import image
from mxnet import init
from mxnet import nd
from mxnet.gluon.data import vision
import numpy as np
from mxnet.gluon import nn
from mxnet import nd
import mxnet as mx
import mxnet.gluon as gl

mean = np.array([ 0.39186783, 0.45182955, 0.47607605])
std = np.array([ 0.26173923, 0.2573802, 0.26252426])

random_shape = int(np.random.uniform() * 224 + 256)  

def transform_train(data, label):
    im = data.astype('float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 96, 96), resize=random_shape,
    #auglist = image.CreateAugmenter(data_shape=(3, 224, 224), resize=random_shape,
                        rand_crop=True, rand_resize=True, rand_mirror=True,
                        mean= mean,
                        std= std,
                        brightness=0, contrast=0,
                        saturation=0, hue=0,
                        pca_noise=0.01, rand_gray=0, inter_method=2)
    for aug in auglist:
        im = aug(im)
    # 将数据格式从"高*宽*通道"改为"通道*高*宽"。
    im = nd.transpose(im, (2,0,1))
    return (im, nd.array([label]).asscalar().astype('float32'))

def transform_test(data, label):
    im = data.astype('float32') / 255
    auglist = image.CreateAugmenter(data_shape=(3, 96, 96), 
    #auglist = image.CreateAugmenter(data_shape=(3, 224, 224), 
                        mean=mean,
                        std=std)
    for aug in auglist:
        im = aug(im)
    im = nd.transpose(im, (2,0,1))
    return (im, nd.array([label]).asscalar().astype('float32'))

## Load data

In [3]:
input_str = data_dir + '/' + input_dir + '/'

# 读取原始图像文件。flag=1说明输入图像有三个通道（彩色）。
train_ds = vision.ImageFolderDataset(input_str + 'train', flag=1, transform=transform_train)
valid_ds = vision.ImageFolderDataset(input_str + 'valid', flag=1, transform=transform_test)
train_valid_ds = vision.ImageFolderDataset(input_str + 'train_valid', flag=1, transform=transform_train)
test_ds = vision.ImageFolderDataset(input_str + 'test', flag=1, transform=transform_test)

loader = gluon.data.DataLoader
train_data = loader(train_ds, batch_size, shuffle=True, last_batch='keep')
valid_data = loader(valid_ds, batch_size, shuffle=True, last_batch='keep')
train_valid_data = loader(train_valid_ds, batch_size, shuffle=True, last_batch='keep')
test_data = loader(test_ds, batch_size, shuffle=False, last_batch='keep')


print("batch size", batch_size)
data, label = list(itertools.islice(train_data, 1))[0]

#ctx = [mx.gpu(i) for i in range(1)]
ctx = mx.cpu(0)
label = label.as_in_context(ctx)
print(label.shape, label.dtype)
data = data.as_in_context(ctx)
print(data.shape, data.dtype)
print("total train data", len(train_ds))

for img, _ in train_ds:
    #print(img)
    break


batch size 128
(128,) <class 'numpy.float32'>
(128, 3, 96, 96) <class 'numpy.float32'>
total train data 9502


In [4]:
#for _, i in net.features.collect_params().items():
#    i.lr_mult = 0.1

In [5]:

def get_loss(data, net, ctx):
    loss = 0.0
    for feas, label in data:
        label = label.as_in_context(ctx)
        output = net(feas.as_in_context(ctx))
        cross_entropy = softmax_cross_entropy(output, label)
        loss += nd.mean(cross_entropy).asscalar()
    return loss / len(data)

def get_acc(output, label):
    #pred = output.argmax(1, keepdims=True)
    pred = nd.argmax(output , axis=1)
    correct = (pred == label).sum()
    return correct.asscalar()

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

def train(model_name, net, train_data, valid_data, trainer, ctx, start_epoch=0, end_epoch=3, lr_period=80, lr_decay=0.1):
    print("model", model_name)
    if trainer is None or net is None:
        print("Need trainer")
        return
    prev_time = datetime.datetime.now()
    for epoch in range(start_epoch, end_epoch):
        print("epoch", epoch, "learning rate", trainer.learning_rate)
        train_loss = 0.0
        correct = 0
        total = 0
        for data, label in train_data:
            label = label.as_in_context(ctx)
            batch_size = label.shape[0]
            with autograd.record():
                output = net(data.as_in_context(ctx))
                loss = softmax_cross_entropy(output, label)
            loss.backward()
            trainer.step(batch_size)
            train_loss += nd.mean(loss).asscalar()
            #print("output", nd.argmax(output, axis=1))
            #print("label", label)
            correct += get_acc(output, label)
            total += batch_size
        cur_time = datetime.datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)
        print('total', total, 'batches', len(train_data))
        if valid_data is not None:
            valid_correct = 0
            valid_total = 0
            valid_loss = 0
            for data, label in valid_data:
                batch_size = data.shape[0]
                data = data.as_in_context(ctx)
                label = label.as_in_context(ctx)
                output = net(data)
                loss = softmax_cross_entropy(output, label)
                valid_loss += nd.mean(loss).asscalar()
                valid_correct += get_acc(output, label)
                valid_total += batch_size
            valid_acc = valid_correct / valid_total
        #if valid_data is not None:
        #    valid_loss = get_loss(valid_data, net, ctx)
        #    epoch_str = ("Epoch %d. Train loss: %f, Valid loss: %f, Train accuracy: %.4f, "
        #                 % (epoch, train_loss / len(train_data), valid_loss , correct / total))
            epoch_str = ("Epoch %d. Train loss: %f, Valid loss: %f, Train accuracy: %.4f, Valid accuracy: %.4f"
                         % (epoch, train_loss / len(train_data), valid_loss / len(valid_data), correct / total, valid_acc))
        else:
            epoch_str = ("Epoch %d. Train loss: %f, "
                         % (epoch, train_loss / len(train_data)))
        prev_time = cur_time
        print(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))
        if (epoch % 2 == 0):
            file = "m_%s_e_%s_v_%s_t_%s_lr_%s" % (model_name, epoch, valid_loss / len(valid_data), train_loss/len(train_data), trainer.learning_rate)
            params_file = os.path.join(os.path.join(params_dir, model_name, file + ".params"))
            net.save_params(params_file)
            #net.collect_params().save(params_file)
            grads_file =  os.path.join(grads_dir, model_name, file + ".h5")
            print(grads_file)
            write_net_params(net, grads_file)

## Test learning rate

In [6]:
#ctx = utils.try_gpu()
ctx=mx.gpu()
num_epochs = 1000
learning_rate = 1e-4
weight_decay = 5e-4
lr_period = 200
lr_decay = 0.1

#net = get_net(ctx)
pretrained_net=dict()
finetune_net=dict()

#for _, i in net.features.collect_params().items():
#   i.lr_mult = 0.1

#trainer = gluon.Trainer( net.collect_params(), 'sgd', {'learning_rate': learning_rate, 'momentum': 0.9, 'wd': weight_decay})
train_lr =  partial(train, train_data=train_data, valid_data=valid_data, ctx=ctx, lr_decay=lr_decay)
"""
for learning_rate in [1e-2, 1e-3, 1e-4]:
    pretrained_net[model] = gl.model_zoo.vision.resnet152_v1(pretrained=True)
    finetune_net[model] = gl.model_zoo.vision.resnet152_v1(classes=120)
    finetune_net[model].features = pretrained_net[model].features
    finetune_net[model].classifier.initialize(init.Xavier())
    
    trainer = gluon.Trainer(finetune_net[model].collect_params(), 'adam', {'learning_rate': learning_rate, 'wd': weight_decay})
    train_lr(net=finetune_net[model], trainer=trainer)
"""



"\nfor learning_rate in [1e-2, 1e-3, 1e-4]:\n    pretrained_net[model] = gl.model_zoo.vision.resnet152_v1(pretrained=True)\n    finetune_net[model] = gl.model_zoo.vision.resnet152_v1(classes=120)\n    finetune_net[model].features = pretrained_net[model].features\n    finetune_net[model].classifier.initialize(init.Xavier())\n    \n    trainer = gluon.Trainer(finetune_net[model].collect_params(), 'adam', {'learning_rate': learning_rate, 'wd': weight_decay})\n    train_lr(net=finetune_net[model], trainer=trainer)\n"

In [7]:
res152_v1_model="resnet152_v1"
dense161_model = "densenet161"
vgg16_bn_model = "vgg16_bn"
inception_v3_model = "inception_v3"
res50_v2_model="resnet50_v2"
res18_v2_model="resnet18_v2"
pretrained_net=dict()
#pretrained_net[res18_v2_model] = gl.model_zoo.vision.resnet18_v2(prefix=res18_v2_model + "_", pretrained=True)
#pretrained_net[res50_v2_model] = gl.model_zoo.vision.resnet50_v2(pretrained=True)
#print(pretrained_net[res50_v2_model].features[1].weight.data()[0][0])

pretrained_net[res152_v1_model] = gl.model_zoo.vision.resnet152_v1(prefix=res152_v1_model + "_", pretrained=True)
#pretrained_net[dense161_model] = gl.model_zoo.vision.densenet161(pretrained=True)
#pretrained_net[vgg16_bn_model] = gl.model_zoo.vision.vgg16_bn(pretrained=True)
#pretrained_net[inception_v3_model] = gl.model_zoo.vision.inception_v3(pretrained=True)

"""
# Train indivisually
    if (model == res152_v1_model):
        finetune_net = gl.model_zoo.vision.resnet152_v1(classes=120, ctx=ctx)
    if (model == res50_v2_model):
        finetune_net = gl.model_zoo.vision.resnet50_v2(classes=120, ctx=ctx)
    if (model == vgg16_bn_model):
        finetune_net = gl.model_zoo.vision.vgg16_bn(classes=120, ctx=ctx)
    if (model == dense161_model):
        finetune_net = gl.model_zoo.vision.dense161(classes=120, ctx=ctx)
    if (model == inception_v3_model):
        finetune_net = gl.model_zoo.vision.inception_v3(classes=120, ctx=ctx)
        
    finetune_net.features = pretrained_net[model].features
    finetune_net.features = pretrained_net[model].features
    finetune_net.classifier.initialize(init.Xavier(), ctx=ctx)
    return finetune_net
"""
# Train togeter
class AllInOneModel(nn.HybridBlock):
    def __init__(self, name, networks, ** kwargs):
        print("kwargs", kwargs)
        super(AllInOneModel, self).__init__(**kwargs)
        self.networks = networks
        self.dense_adapters = dict()
        for name in networks:
            #print(name, networks[name].features)
            #print(name, networks[name].features[-1][-1])
            #print(name, networks[name].features[-1][-1].body[-1])
            #self.dense_adapters[name] = nn.Dense(128, activation="relu") 
            pass
        with self.name_scope():
            net = self.classifier = nn.HybridSequential()
            with net.name_scope():
                net.add(nn.Dense(256, activation='relu'))
                net.add(nn.Dropout(0.5))
                net.add(nn.Dense(120))
                
    def hybrid_forward(self, F, x):
        print("hybrid")
        #   net.add(nd.concatenate(features, axis=-1))
        #for i, network in enumerate([self.networks[name].features for name in self.networks.keys() if name != dense161_model]):
        for i, network in enumerate(self.networks):
            #print("x", x.shape)
            #print(network)
            nt = network(x)
            # reshape to (batch, )
            #nt = nt.reshape(nt.shape[:2])
            #print("nt", nt.shape)
            if i == 0:
                nts = nt
            else:
                nts = mx.nd.concat(nts, nt, dim=1)
                #print("nts[]", nts.shape)
                
        out = nts
        # print("nts", nts.shape)
        out = self.net(out)
        return out 
    
    def forward(self, x):
        #print("only forward")
        #   net.add(nd.concatenate(features, axis=-1))
        #for i, network in enumerate([self.networks[name].features for name in self.networks.keys() if name != dense161_model]):
        for i, network in enumerate(self.networks):
            #print("x", x.shape)
            #print(network)
            nt = network(x)
            # reshape to (batch, )
            #nt = nt.reshape(nt.shape[:2])
            #print("nt", nt.shape)
            if i == 0:
                nts = nt
            else:
                nts = mx.nd.concat(nts, nt, dim=1)
                #print("nts[]", nts.shape)
                
        out = nts
        # print("nts", nts.shape)
        out = self.classifier(out)
        return out 

def get_net(name):
    networks = [pretrained_net[name].features for name in pretrained_net.keys() if name != dense161_model]
    net = AllInOneModel(name, networks, prefix=name)
    net.classifier.initialize(ctx=ctx, init=mx.init.Xavier())
    net.hybridize()
    return net


In [None]:
#model = res50_v2_model
model_name =  "_".join(pretrained_net.keys()) 
#net=get_net(model_name)
#net.hybridize()
params_model_dir = os.path.join(params_dir, model_name)
grads_model_dir = os.path.join(grads_dir, model_name)
if not os.path.exists(params_model_dir):
    os.mkdir(params_model_dir)
if not os.path.exists(grads_model_dir):
    os.mkdir(grads_model_dir)
 
#file= "m_resnet152_v1_e_18_v_2.81106563409_t_3.09018225034_lr_0.0001.params"
#file = "m_resnet152_v1_e_188_v_2.22175089518_t_0.272732603749_lr_1e-05.params"
#file = "m_resnet152_v1_e_248_v_2.21313051383_t_0.274849429925_lr_1.0000000000000002e-06.params"
#file = "m_resnet152_v1_e_250_v_2.80912895997_t_3.26908252398_lr_0.0001.params"
#file = "m_resnet50_v2_e_113_v_2.66837227345_t_2.69866662979_lr_0.001.params"
#file = "m_resnet18_v2_e_6_v_1.63639547427_t_2.00945680173_lr_0.01.params"
file = "m_resnet18_v2_e_1008_v_1.50832762321_t_1.32757116804_lr_0.1.params"
params_file = os.path.join(params_model_dir, file)
#print(params_file)

from mxnet.gluon.model_zoo import vision as models
#finetune_net = models.resnet18_v2(prefix=res18_v2_model + "_", classes=120)
finetune_net = models.resnet152_v1(prefix=res152_v1_model + "_", classes=120)
finetune_net.features = pretrained_net[res152_v1_model].features
finetune_net.classifier.initialize(init.Xavier())
net=finetune_net
#print(net.collect_params())
#net.load_params(params_file, ctx=ctx)
net.collect_params().reset_ctx(ctx)
net.hybridize()

start_epoch= 0 
end_epoch=100
learning_rate = 1e-2
lr_period = 200
weight_decay= 5e-4
batch_size=128
#trainer = gluon.Trainer( net.collect_params(), 'adam', {'learning_rate': learning_rate, 'wd': weight_decay})
#trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': learning_rate, 'wd': weight_decay})
lr_sch = mx.lr_scheduler.FactorScheduler(step=400, factor=0.9)
trainer = gluon.Trainer(net.collect_params(), 'adam',  {'learning_rate': 1e-4, 'wd': 1e-5, 'lr_scheduler': lr_sch})
train_lr(model_name=model_name, net=net, trainer=trainer, start_epoch=start_epoch, 
         end_epoch=end_epoch, lr_period=lr_period)

model resnet152_v1
epoch 0 learning rate 0.0001
total 9502 batches 75
Epoch 0. Train loss: 3.260848, Valid loss: 2.594458, Train accuracy: 0.2784, Valid accuracy: 0.3583Time 00:02:38, lr 0.0001
/data/ai/data/kaggle_dog-breed-identification/grads/resnet152_v1/m_resnet152_v1_e_0_v_2.59445750713_t_3.26084752083_lr_0.0001.h5
epoch 1 learning rate 0.0001
total 9502 batches 75
Epoch 1. Train loss: 1.805435, Valid loss: 2.331380, Train accuracy: 0.5304, Valid accuracy: 0.3972Time 00:02:42, lr 0.0001
epoch 2 learning rate 0.0001
total 9502 batches 75
Epoch 2. Train loss: 1.561534, Valid loss: 2.199375, Train accuracy: 0.5838, Valid accuracy: 0.4278Time 00:02:39, lr 0.0001
/data/ai/data/kaggle_dog-breed-identification/grads/resnet152_v1/m_resnet152_v1_e_2_v_2.19937547048_t_1.56153410435_lr_0.0001.h5
epoch 3 learning rate 0.0001
total 9502 batches 75
Epoch 3. Train loss: 1.394785, Valid loss: 2.236949, Train accuracy: 0.6226, Valid accuracy: 0.4097Time 00:02:40, lr 0.0001
epoch 4 learning rate 0

In [None]:
import mxnet.ndarray as nd
nd.waitall??

In [None]:
outputs = []
for data, label in test_data:
    output = nd.softmax(net(data.as_in_context(ctx)))
    outputs.extend(output.asnumpy())
ids = sorted(os.listdir(os.path.join(data_dir, input_dir, 'test/unknown')))
with open('submission.csv', 'w') as f:
    f.write('id,' + ','.join(train_valid_ds.synsets) + '\n')
    for i, output in zip(ids, outputs):
        f.write(i.split('.')[0] + ',' + ','.join(
            [str(num) for num in output]) + '\n')


In [None]:
get_net()

In [None]:
finetune_net.classifier.initialize(init.Xavier())
net=finetune_net
#net.save_params("/tmp/t")

print(finetune_net.collect_params())

In [None]:
print(finetune_net.classifier.collect_params())

In [None]:
range