In [1]:
!df -h

Filesystem      Size  Used Avail Use% Mounted on
overlay         359G   23G  318G   7% /
tmpfs           6.4G     0  6.4G   0% /dev
tmpfs           6.4G     0  6.4G   0% /sys/fs/cgroup
tmpfs           6.4G   12K  6.4G   1% /var/colab
/dev/sda1       365G   27G  339G   8% /opt/bin
shm             6.0G     0  6.0G   0% /dev/shm
tmpfs           6.4G     0  6.4G   0% /sys/firmware


In [2]:
!free -h

              total        used        free      shared  buff/cache   available
Mem:            12G        535M         10G        900K        1.7G         11G
Swap:            0B          0B          0B


In [3]:
!pip install mxnet-cu100 gluoncv

Collecting mxnet-cu100
[?25l  Downloading https://files.pythonhosted.org/packages/ae/36/40b6d201b46495513f7a7fa25fe8b7d85b3602a22efba119e8146d5f1601/mxnet_cu100-1.4.0.post0-py2.py3-none-manylinux1_x86_64.whl (487.9MB)
[K    100% |████████████████████████████████| 487.9MB 37kB/s 
[?25hCollecting gluoncv
[?25l  Downloading https://files.pythonhosted.org/packages/7b/2b/73c2cfd4fc72101f9087bf1d2290286418a1428274d5c3d022146baed021/gluoncv-0.3.0-py2.py3-none-any.whl (238kB)
[K    100% |████████████████████████████████| 245kB 10.4MB/s 
Collecting requests>=2.20.0 (from mxnet-cu100)
[?25l  Downloading https://files.pythonhosted.org/packages/7d/e3/20f3d364d6c8e5d2353c72a67778eb189176f08e873c9900e10c0287b84b/requests-2.21.0-py2.py3-none-any.whl (57kB)
[K    100% |████████████████████████████████| 61kB 22.4MB/s 
[?25hCollecting graphviz<0.9.0,>=0.8.1 (from mxnet-cu100)
  Downloading https://files.pythonhosted.org/packages/53/39/4ab213673844e0c004bed8a0781a0721a3f6bb23eb8854ee75c236428892/

In [0]:
from __future__ import division

import argparse, time, logging, random, math

import numpy as np
import mxnet as mx

from mxnet import gluon, nd
from mxnet import autograd as ag
from mxnet.gluon import nn
from mxnet.gluon.data.vision import transforms

from gluoncv.model_zoo import get_model
from gluoncv.utils import makedirs, TrainingHistory

In [0]:
# !pip install mxnet-cu100

In [0]:
# number of GPUs to use
num_gpus = 1
ctx = [mx.gpu(i) for i in range(num_gpus)]

# Get the model CIFAR_ResNet20_v1, with 10 output classes, without pre-trained weights
net = get_model('cifar_resnet20_v1', classes=10, pretrained=False)
# net.collect_params().reset_ctx(ctx)

net.initialize(mx.init.Xavier(), ctx = ctx)
# net.hybridize()


In [0]:
transform_train = transforms.Compose([
    # Randomly crop an area, and then resize it to be 32x32
#     transforms.RandomResizedCrop(28),
    # Randomly flip the image horizontally
    transforms.RandomFlipLeftRight(),
    # Randomly jitter the brightness, contrast and saturation of the image
    transforms.RandomColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
    # Randomly adding noise to the image
    transforms.RandomLighting(0.1),
    # Transpose the image from height*width*num_channels to num_channels*height*width
    # and map values from [0, 255] to [0,1]
    transforms.ToTensor(),
    # Normalize the image with mean and standard deviation calculated across all images
#     transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
    # one channel
    transforms.Normalize([0.4914], [0.2023])

])

In [0]:
transform_test = transforms.Compose([
#     transforms.Resize(28),
    transforms.ToTensor(),
#     transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
    transforms.Normalize([0.4914], [0.2023])
])

In [0]:
# Batch Size for Each GPU
per_device_batch_size = 64
# Number of data loader workers
num_workers = 8
# Calculate effective total batch size
batch_size = per_device_batch_size * num_gpus

# Set train=True for training data
# Set shuffle=True to shuffle the training data
train_data = gluon.data.DataLoader(
    gluon.data.vision.FashionMNIST(train=True).transform_first(transform_train),
    batch_size=batch_size, shuffle=True, last_batch='discard', num_workers=num_workers)

# Set train=False for validation data
val_data = gluon.data.DataLoader(
    gluon.data.vision.FashionMNIST(train=False).transform_first(transform_test),
    batch_size=batch_size, shuffle=False, num_workers=num_workers)

In [0]:
# Learning rate decay factor
lr_decay = 0.1
# Epochs where learning rate decays
lr_decay_epoch = [80, 150, np.inf]

# Nesterov accelerated gradient descent
optimizer = 'nag'
# Set parameters
optimizer_params = {'learning_rate': 0.1, 'wd': 0.0001, 'momentum': 0.9}

# Define our trainer for net
trainer = gluon.Trainer(net.collect_params(), optimizer, optimizer_params)

In [0]:
loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
# print(net)

In [0]:
train_metric = mx.metric.Accuracy()
train_history = TrainingHistory(['training-error', 'validation-error'])

In [0]:
def test(ctx, val_data):
    metric = mx.metric.Accuracy()
    for i, batch in enumerate(val_data):
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
        outputs = [net(X) for X in data]
        metric.update(label, outputs)
    return metric.get()

In [0]:
epochs=200
lr_decay_count = 0

for epoch in range(epochs):
    tic = time.time()
    train_metric.reset()
    train_loss = 0

    # Learning rate decay
    if epoch == lr_decay_epoch[lr_decay_count]:
        trainer.set_learning_rate(trainer.learning_rate*lr_decay)
        lr_decay_count += 1

    # Loop through each batch of training data
    for i, batch in enumerate(train_data):
        # Extract data and label
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)

        # AutoGrad
        with ag.record():
            output = [net(X) for X in data]
            loss = [loss_fn(yhat, y) for yhat, y in zip(output, label)]

        # Backpropagation
        for l in loss:
            l.backward()

        # Optimize
        trainer.step(batch_size)

        # Update metrics
        train_loss += sum([l.sum().asscalar() for l in loss])
        train_metric.update(label, output)

    name, acc = train_metric.get()
    # Evaluate on Validation data
    name, val_acc = test(ctx, val_data)

    # Update history and print metrics
    train_history.update([1-acc, 1-val_acc])
    print('[Epoch %d] train=%f val=%f loss=%f time: %f learning_rate: %f'  %
        (epoch, acc, val_acc, train_loss, time.time()-tic ,trainer.learning_rate))

# We can plot the metric scores with:

train_history.plot()

[Epoch 0] train=0.835762 val=0.870200 loss=27173.214324 time: 74.967051 learning_rate: 0.100000
[Epoch 1] train=0.892910 val=0.896000 loss=17529.751056 time: 71.130558 learning_rate: 0.100000
[Epoch 2] train=0.907217 val=0.910900 loss=15300.600327 time: 71.327368 learning_rate: 0.100000
[Epoch 3] train=0.914805 val=0.915100 loss=14002.874303 time: 71.472617 learning_rate: 0.100000
[Epoch 4] train=0.919574 val=0.921600 loss=13249.398613 time: 76.085972 learning_rate: 0.100000
[Epoch 5] train=0.921525 val=0.921200 loss=12808.759727 time: 73.957966 learning_rate: 0.100000
[Epoch 6] train=0.926261 val=0.920300 loss=12164.984082 time: 74.551862 learning_rate: 0.100000
[Epoch 7] train=0.928862 val=0.926500 loss=11907.413461 time: 73.861351 learning_rate: 0.100000
[Epoch 8] train=0.932030 val=0.925200 loss=11416.310748 time: 70.457989 learning_rate: 0.100000
[Epoch 9] train=0.931547 val=0.920700 loss=11368.430602 time: 70.854014 learning_rate: 0.100000
[Epoch 10] train=0.933198 val=0.913900 l

In [0]:
# from google.colab import drive
# drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
net.save_parameters('/content/gdrive/My Drive/dive_deep_cifar10_resnet110_v2.params')

In [0]:
net.load_parameters('/content/gdrive/My Drive/dive_deep_cifar10_resnet56_v1.params', ctx=ctx)