In [1]:
import mxnet as mx
from mxnet import gluon
from mxnet import ndarray as nd
from mxnet import autograd
from mxnet.gluon import nn

import os
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

ctx = mx.gpu()

In [2]:
root_dir = "/data/ai/data/kaggle_dog-breed-identification/"
data_dir = os.path.join(root_dir, "data")
features_dir = os.path.join(root_dir, "features")

In [3]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

def accuracy(output, labels):
    return nd.mean(nd.argmax(output, axis=1) == labels).asscalar()

def evaluate(net, data_iter):
    loss, acc, n = 0., 0., 0.
    steps = len(data_iter)
    for data, label in data_iter:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)
        output = net(data)
        acc += accuracy(output, label)
        loss += nd.mean(softmax_cross_entropy(output, label)).asscalar()
    return loss/steps, acc/steps

In [4]:
def load_data(model_name, batch_size=128, train_size=0.8):
    features = nd.load(os.path.join(features_dir,'features_train_%s.nd' % model_name))[0]
    labels = nd.load(os.path.join(data_dir,'labels.nd'))[0]
    print("features", features.shape, "labels", labels.shape)

    n_train = int(features.shape[0]*train_size)

    X_train = features[:n_train]
    y_train = labels[:n_train]

    X_val = features[n_train:]
    y_val = labels[n_train:]

    dataset_train = gluon.data.ArrayDataset(X_train, y_train)
    dataset_val = gluon.data.ArrayDataset(X_val, y_val)

    data_iter_train = gluon.data.DataLoader(dataset_train, batch_size)
    data_iter_val = gluon.data.DataLoader(dataset_val, batch_size)
    
    return data_iter_train, data_iter_val

In [47]:
# D1:512 No activation, drop 0.9 : 0.4647
# D1:512 No activation, drop 0.8 : 0.4430
# D1:512 No activation, drop 0.5 : 0.4941 
# D1:512 No activation, drop 0.3 : 0.5275
# D1:512 activation, drop 0.8 : 0.4667
# D1:512 activation, drop 0.5 : 0.4476
# D1:256 activation, drop 0.5 : 0.4540
# D1:512 No activation, Batch Norm, activation, drop 0.8 : 0.4649
# D1:512 No activation, Batch Norm,  drop 0.8 : 0.4594
# D1:1024 No activation, drop 0.8 : 0.5080
# D1:1024 activation, drop 0.8: 0.4444
# D1:1024 activation, drop 0.9: 0.4794
# No D1, No drop: 0.4929 
# No D1, Drop 0.8: 0.5119
# No D1, Batch norm, activation, drop 0.8: 0.5338

def build_model():
    net = nn.Sequential()
    with net.name_scope():
        net.add(nn.Dense(512, activation='relu'))
        #net.add(nn.BatchNorm(axis=1))
        #net.add(nn.Dropout(0.5))
        net.add(nn.Dropout(0.85))
        net.add(nn.Dense(120))

    net.initialize(ctx=ctx)
    return net

In [32]:
def train_model(model_name):
    epochs = 50
    batch_size = 128
    
    data_iter_train, data_iter_val = load_data(model_name, batch_size)
    net = build_model()
    
    trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': 1e-4, 'wd': 1e-5})
    
    for epoch in range(epochs):
        train_loss = 0.
        train_acc = 0.
        steps = len(data_iter_train)
        for data, label in data_iter_train:
            data, label = data.as_in_context(ctx), label.as_in_context(ctx)

            with autograd.record():
                output = net(data)
                loss = softmax_cross_entropy(output, label)

            loss.backward()
            trainer.step(batch_size)

            train_loss += nd.mean(loss).asscalar()
            train_acc += accuracy(output, label)

        val_loss, val_acc = evaluate(net, data_iter_val)
    
    print("Epoch %d. loss: %.4f, acc: %.2f%%, val_loss %.4f, val_acc %.2f%% Model: %s" % (
        epoch+1, train_loss/steps, train_acc/steps*100, val_loss, val_acc*100, model_name))
    
    return val_loss

In [33]:
from mxnet.gluon.model_zoo.model_store import _model_sha1

losses = []

for model_name in sorted(_model_sha1.keys()):
    if model_name in {"vgg11", "vgg11_bn", "vgg13", "vgg13_bn", 'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn', 
                     "densenet161", "densenet201" }:
        pass
    else:
        print("training", model_name)
        val_loss = train_model(model_name)
        losses.append((model_name, val_loss))

training alexnet
features (10222, 9216) labels (10222,)


KeyboardInterrupt: 

In [9]:
df = pd.DataFrame(losses, columns=['model', 'val_loss'])
df = df.sort_values('val_loss')
df.head(20)

Unnamed: 0,model,val_loss
3,inceptionv3,0.284268
5,resnet152_v1,0.402438
4,resnet101_v1,0.412058
10,resnet50_v1,0.512563
2,densenet169,0.516145
11,resnet50_v2,0.597712
8,resnet34_v1,0.620733
1,densenet121,0.622118
9,resnet34_v2,0.676297
6,resnet18_v1,0.871806


In [10]:
df.to_csv('models.csv', index=None)
pd.read_csv('models.csv')
for i, (model_name, val_loss) in df.iterrows():
    print('%s | %s' % (model_name, val_loss))
df.head(10)

inceptionv3 | 0.28426791448146105
resnet152_v1 | 0.40243762359023094
resnet101_v1 | 0.4120584465563297
resnet50_v1 | 0.5125632397830486
densenet169 | 0.5161446258425713
resnet50_v2 | 0.5977117847651243
resnet34_v1 | 0.6207333207130432
densenet121 | 0.6221175324171782
resnet34_v2 | 0.6762965656816959
resnet18_v1 | 0.8718055002391338
resnet18_v2 | 1.023909479379654
squeezenet1.1 | 1.9452572986483574
squeezenet1.0 | 1.9783090502023697
alexnet | 3.152370259165764


Unnamed: 0,model,val_loss
3,inceptionv3,0.284268
5,resnet152_v1,0.402438
4,resnet101_v1,0.412058
10,resnet50_v1,0.512563
2,densenet169,0.516145
11,resnet50_v2,0.597712
8,resnet34_v1,0.620733
1,densenet121,0.622118
9,resnet34_v2,0.676297
6,resnet18_v1,0.871806


In [11]:
def load_models_data(model_names, batch_size=128, train_size=0.8):
    features = [nd.load(os.path.join(features_dir, 'features_train_%s.nd' % model_name))[0] for model_name in model_names]
    features = nd.concat(*features, dim=1)
    labels = nd.load(os.path.join(data_dir, 'labels.nd'))[0]
    
    #X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size= 1 - train_size)
    n_train = int(features.shape[0]*train_size)
    
    X_train = features[:n_train]
    y_train = labels[:n_train]

    X_val = features[n_train:]
    y_val = labels[n_train:]
    
    dataset_train = gluon.data.ArrayDataset(X_train, y_train)
    dataset_val = gluon.data.ArrayDataset(X_val, y_val)

    data_iter_train = gluon.data.DataLoader(dataset_train, batch_size, shuffle=True)
    data_iter_val = gluon.data.DataLoader(dataset_val, batch_size)
    
    return data_iter_train, data_iter_val

In [48]:
net = build_model()

epochs = 500
batch_size = 128

#model_names = ['inceptionv3', 'resnet152_v1']
model_names = ['inceptionv3', 'resnet152_v1', 'resnet101_v1']#, 'resnet50_v1'] #, 'densenet169'] #, 'resnet101_v1']#, 'resnet152_v1'] #resnet34_v2']#, 'resnet50_v2']
data_iter_train, data_iter_val = load_models_data(model_names, batch_size=batch_size)


lr_sch = mx.lr_scheduler.FactorScheduler(step=400, factor=0.9)
trainer = gluon.Trainer(net.collect_params(), 'adam', 
                        {'learning_rate': 1e-4, 'wd': 1e-5, 'lr_scheduler': lr_sch})


In [49]:
for epoch in range(epochs):
    train_loss = 0.
    train_acc = 0.
    steps = len(data_iter_train)
    for data, label in data_iter_train:
        data, label = data.as_in_context(ctx), label.as_in_context(ctx)

        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)

        loss.backward()
        trainer.step(batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output, label)

    val_loss, val_acc = evaluate(net, data_iter_val)

    print("Epoch %d. loss: %.4f, acc: %.2f%%, val_loss %.4f, val_acc %.2f%%" % (
        epoch+1, train_loss/steps, train_acc/steps*100, val_loss, val_acc*100))

Epoch 1. loss: 5.2794, acc: 1.99%, val_loss 4.4801, val_acc 17.06%
Epoch 2. loss: 4.4706, acc: 5.23%, val_loss 3.9625, val_acc 39.76%
Epoch 3. loss: 4.0767, acc: 10.44%, val_loss 3.2703, val_acc 59.13%
Epoch 4. loss: 3.6066, acc: 18.44%, val_loss 2.5628, val_acc 69.54%
Epoch 5. loss: 3.1294, acc: 26.96%, val_loss 1.9564, val_acc 76.43%
Epoch 6. loss: 2.7592, acc: 33.36%, val_loss 1.5276, val_acc 80.54%
Epoch 7. loss: 2.4338, acc: 39.86%, val_loss 1.2044, val_acc 82.93%
Epoch 8. loss: 2.2081, acc: 44.57%, val_loss 1.0068, val_acc 85.23%
Epoch 9. loss: 1.9641, acc: 49.65%, val_loss 0.8403, val_acc 85.62%
Epoch 10. loss: 1.8241, acc: 52.88%, val_loss 0.7373, val_acc 86.70%
Epoch 11. loss: 1.6895, acc: 55.90%, val_loss 0.6559, val_acc 87.24%
Epoch 12. loss: 1.5929, acc: 57.77%, val_loss 0.5862, val_acc 88.12%
Epoch 13. loss: 1.4800, acc: 60.31%, val_loss 0.5438, val_acc 88.17%
Epoch 14. loss: 1.3733, acc: 63.00%, val_loss 0.5020, val_acc 88.51%
Epoch 15. loss: 1.3105, acc: 64.45%, val_loss

Epoch 120. loss: 0.3847, acc: 88.54%, val_loss 0.2702, val_acc 90.86%
Epoch 121. loss: 0.3943, acc: 87.78%, val_loss 0.2703, val_acc 91.10%
Epoch 122. loss: 0.3920, acc: 88.38%, val_loss 0.2702, val_acc 91.10%
Epoch 123. loss: 0.3857, acc: 88.60%, val_loss 0.2712, val_acc 91.00%
Epoch 124. loss: 0.3856, acc: 88.57%, val_loss 0.2709, val_acc 91.05%
Epoch 125. loss: 0.3883, acc: 88.42%, val_loss 0.2695, val_acc 91.15%
Epoch 126. loss: 0.3798, acc: 88.68%, val_loss 0.2688, val_acc 91.00%
Epoch 127. loss: 0.3839, acc: 88.89%, val_loss 0.2701, val_acc 91.00%
Epoch 128. loss: 0.3777, acc: 88.65%, val_loss 0.2699, val_acc 91.20%
Epoch 129. loss: 0.3732, acc: 88.75%, val_loss 0.2711, val_acc 90.85%
Epoch 130. loss: 0.3779, acc: 88.12%, val_loss 0.2704, val_acc 90.90%
Epoch 131. loss: 0.3809, acc: 88.59%, val_loss 0.2693, val_acc 91.05%
Epoch 132. loss: 0.3856, acc: 88.30%, val_loss 0.2689, val_acc 91.15%
Epoch 133. loss: 0.3804, acc: 88.67%, val_loss 0.2688, val_acc 90.95%
Epoch 134. loss: 0.3

Epoch 238. loss: 0.3291, acc: 90.30%, val_loss 0.2675, val_acc 91.25%
Epoch 239. loss: 0.3390, acc: 89.89%, val_loss 0.2676, val_acc 91.25%
Epoch 240. loss: 0.3388, acc: 89.36%, val_loss 0.2673, val_acc 91.25%
Epoch 241. loss: 0.3347, acc: 89.80%, val_loss 0.2673, val_acc 91.29%
Epoch 242. loss: 0.3271, acc: 90.33%, val_loss 0.2672, val_acc 91.25%
Epoch 243. loss: 0.3416, acc: 89.34%, val_loss 0.2672, val_acc 91.29%
Epoch 244. loss: 0.3344, acc: 89.48%, val_loss 0.2672, val_acc 91.29%
Epoch 245. loss: 0.3215, acc: 90.74%, val_loss 0.2673, val_acc 91.29%
Epoch 246. loss: 0.3305, acc: 90.33%, val_loss 0.2671, val_acc 91.29%
Epoch 247. loss: 0.3318, acc: 89.75%, val_loss 0.2672, val_acc 91.29%
Epoch 248. loss: 0.3487, acc: 89.44%, val_loss 0.2671, val_acc 91.29%
Epoch 249. loss: 0.3329, acc: 90.07%, val_loss 0.2671, val_acc 91.29%
Epoch 250. loss: 0.3337, acc: 89.97%, val_loss 0.2673, val_acc 91.34%
Epoch 251. loss: 0.3320, acc: 90.27%, val_loss 0.2673, val_acc 91.34%
Epoch 252. loss: 0.3

Epoch 356. loss: 0.3274, acc: 90.00%, val_loss 0.2666, val_acc 91.29%
Epoch 357. loss: 0.3218, acc: 90.14%, val_loss 0.2666, val_acc 91.25%
Epoch 358. loss: 0.3282, acc: 90.46%, val_loss 0.2666, val_acc 91.29%
Epoch 359. loss: 0.3149, acc: 90.42%, val_loss 0.2667, val_acc 91.25%
Epoch 360. loss: 0.3323, acc: 89.94%, val_loss 0.2666, val_acc 91.25%
Epoch 361. loss: 0.3263, acc: 90.64%, val_loss 0.2666, val_acc 91.25%
Epoch 362. loss: 0.3117, acc: 90.66%, val_loss 0.2666, val_acc 91.25%
Epoch 363. loss: 0.3322, acc: 89.98%, val_loss 0.2666, val_acc 91.25%
Epoch 364. loss: 0.3241, acc: 90.37%, val_loss 0.2666, val_acc 91.25%
Epoch 365. loss: 0.3202, acc: 90.13%, val_loss 0.2666, val_acc 91.29%
Epoch 366. loss: 0.3157, acc: 90.87%, val_loss 0.2666, val_acc 91.29%
Epoch 367. loss: 0.3230, acc: 90.21%, val_loss 0.2666, val_acc 91.25%
Epoch 368. loss: 0.3346, acc: 89.85%, val_loss 0.2666, val_acc 91.25%
Epoch 369. loss: 0.3365, acc: 89.86%, val_loss 0.2666, val_acc 91.25%
Epoch 370. loss: 0.3

Epoch 474. loss: 0.3250, acc: 90.58%, val_loss 0.2666, val_acc 91.25%
Epoch 475. loss: 0.3356, acc: 90.15%, val_loss 0.2666, val_acc 91.25%
Epoch 476. loss: 0.3254, acc: 90.22%, val_loss 0.2666, val_acc 91.25%
Epoch 477. loss: 0.3178, acc: 90.85%, val_loss 0.2666, val_acc 91.25%
Epoch 478. loss: 0.3250, acc: 90.62%, val_loss 0.2666, val_acc 91.25%
Epoch 479. loss: 0.3264, acc: 89.96%, val_loss 0.2666, val_acc 91.25%
Epoch 480. loss: 0.3153, acc: 90.48%, val_loss 0.2666, val_acc 91.25%
Epoch 481. loss: 0.3219, acc: 90.27%, val_loss 0.2666, val_acc 91.25%
Epoch 482. loss: 0.3217, acc: 90.28%, val_loss 0.2666, val_acc 91.25%
Epoch 483. loss: 0.3312, acc: 90.16%, val_loss 0.2666, val_acc 91.25%
Epoch 484. loss: 0.3284, acc: 90.25%, val_loss 0.2666, val_acc 91.25%
Epoch 485. loss: 0.3402, acc: 89.81%, val_loss 0.2666, val_acc 91.25%
Epoch 486. loss: 0.3202, acc: 90.42%, val_loss 0.2666, val_acc 91.25%
Epoch 487. loss: 0.3129, acc: 90.85%, val_loss 0.2666, val_acc 91.25%
Epoch 488. loss: 0.3

In [37]:
features_test = [nd.load(os.path.join(features_dir,'features_test_%s.nd' % model_name))[0] for model_name in model_names]
features_test = nd.concat(*features_test, dim=1)

In [38]:
output = nd.softmax(net(features_test.as_in_context(ctx))).asnumpy()
df_pred = pd.read_csv(os.path.join(data_dir,'sample_submission.csv'))
for i, c in enumerate(df_pred.columns[1:]):
    df_pred[c] = output[:,i]

df_pred.to_csv('pred_0.260.csv', index=None)

In [15]:
zip(np.argmax(pd.read_csv('pred_0.28.csv').values[:,1:], axis=-1), np.argmax(df_pred.values[:,1:], axis=-1))[:10]

FileNotFoundError: File b'pred_0.28.csv' does not exist