In [0]:
# tiny-imagenet dataset has 200 categories
# and all images have 64x64 size,
# but i use 56x56 images because i do
# data augmentation by making random crops
IMAGE_SIZE = 56
NUM_CLASSES = 200
# if input image has spatial size [56, 56]
# then spatial size before the global average pooling is [4, 4]


BATCH_NORM_MOMENTUM = 0.1
# it differs from the default Tensorflow value (0.9),
# sometimes right momentum value is very important

N_SHUFFLE_UNITS = (1, 3, 1)
# number of shuffle units of stride 1 in each stage,
# in the original paper: [3, 7, 3].

# number of layers that the network will have:
# (sum(N_SHUFFLE_UNITS) + 3)*3 + 1 + 1

# stride in the first convolution layer:
# in the original paper they are using stride=2
# but because i use small 64x64 images i chose stride=1
FIRST_STRIDE = 1


# optimizer settings
MOMENTUM = 0.9
USE_NESTEROV = True
LR_REDUCE_FACTOR = 0.1


# input pipeline settings.
# you need to tweak these numbers for your system,
# it can accelerate training
SHUFFLE_BUFFER_SIZE = 10000
PREFETCH_BUFFER_SIZE = 1000
NUM_THREADS = 4

In [2]:
import tensorflow as tf
import math

def _channel_shuffle(X, groups):
    height, width, in_channels = X.shape.as_list()[1:]
    in_channels_per_group = int(in_channels/groups)

    # reshape
    shape = tf.stack([-1, height, width, groups, in_channels_per_group])
    X = tf.reshape(X, shape)

    # transpose
    X = tf.transpose(X, [0, 1, 2, 4, 3])

    # reshape
    shape = tf.stack([-1, height, width, in_channels])
    X = tf.reshape(X, shape)

    return X


def _mapping(
        X, is_training, num_classes=200,
        groups=3, dropout=0.5,
        complexity_scale_factor=0.75):
    """A ShuffleNet implementation.
    Arguments:
        X: A float tensor with shape [batch_size, image_height, image_width, 3].
        is_training: A boolean, whether the network is in the training mode.
        num_classes: An integer.
        groups: An integer, number of groups in group convolutions,
            only possible values are: 1, 2, 3, 4, 8.
        dropout: A floar number, dropout rate before the last linear layer.
        complexity_scale_factor: A floar number, to customize the network
            to a desired complexity you can apply a scale factor,
            in the original paper they are considering
            scale factor values: 0.25, 0.5, 1.0.
            It determines the width of the network.
    Returns:
        A float tensor with shape [batch_size, num_classes].
    """

    # 'out_channels' equals to second stage's number of output channels
    if groups == 1:
        out_channels = 144
    elif groups == 2:
        out_channels = 200
    elif groups == 3:
        out_channels = 240
    elif groups == 4:
        out_channels = 272
    elif groups == 8:
        out_channels = 384
    # all 'out_channels' are divisible by corresponding 'groups'

    # if you want you can decrease network's width
    out_channels = int(out_channels * complexity_scale_factor)

    with tf.variable_scope('features'):

        with tf.variable_scope('stage1'):

            with tf.variable_scope('conv1'):
                result = _conv(X, 24, kernel=3, stride=FIRST_STRIDE)

            result = _batch_norm(result, is_training)
            result = _nonlinearity(result)
            # in the original paper they are not using batch_norm and relu here

            result = _max_pooling(result)

        with tf.variable_scope('stage2'):

            with tf.variable_scope('unit1'):
                result = _first_shufflenet_unit(
                    result, is_training, groups, out_channels
                )

            for i in range(N_SHUFFLE_UNITS[0]):
                with tf.variable_scope('unit' + str(i + 2)):
                    result = _shufflenet_unit(result, is_training, groups)

            # number of channels in 'result' is out_channels

        with tf.variable_scope('stage3'):

            with tf.variable_scope('unit1'):
                result = _shufflenet_unit(result, is_training, groups, stride=2)

            for i in range(N_SHUFFLE_UNITS[1]):
                with tf.variable_scope('unit' + str(i + 2)):
                    result = _shufflenet_unit(result, is_training, groups)

            # number of channels in 'result' is 2*out_channels

        with tf.variable_scope('stage4'):

            with tf.variable_scope('unit1'):
                result = _shufflenet_unit(result, is_training, groups, stride=2)

            for i in range(N_SHUFFLE_UNITS[2]):
                with tf.variable_scope('unit' + str(i + 2)):
                    result = _shufflenet_unit(result, is_training, groups)

            # number of channels in 'result' is 4*out_channels

    with tf.variable_scope('classifier'):
        result = _global_average_pooling(result)

        result = _dropout(result, is_training, dropout)
        # in the original paper they are not using dropout here

        logits = _affine(result, num_classes)

    return logits


def _nonlinearity(X):
    return tf.nn.relu(X, name='ReLU')


def _dropout(X, is_training, rate=0.5):
    keep_prob = tf.constant(
        1.0 - rate, tf.float32,
        [], 'keep_prob'
    )
    result = tf.cond(
        is_training,
        lambda: tf.nn.dropout(X, keep_prob),
        lambda: tf.identity(X),
        name='dropout'
    )
    return result


def _batch_norm(X, is_training):
    return tf.layers.batch_normalization(
        X, scale=False, center=True,
        momentum=BATCH_NORM_MOMENTUM,
        training=is_training, fused=True
    )


def _global_average_pooling(X):
    return tf.reduce_mean(
        X, axis=[1, 2],
        name='global_average_pooling'
    )


def _max_pooling(X):
    return tf.nn.max_pool(
        X, [1, 3, 3, 1], [1, 2, 2, 1], 'SAME',
        name='max_pooling'
    )


def _avg_pooling(X):
    return tf.nn.avg_pool(
        X, [1, 3, 3, 1], [1, 2, 2, 1], 'SAME',
        name='avg_pooling'
    )


def _conv(X, filters, kernel=3, stride=1):

    in_channels = X.shape.as_list()[-1]

    # kaiming uniform initialization
    maxval = math.sqrt(6.0/in_channels)

    K = tf.get_variable(
        'kernel', [kernel, kernel, in_channels, filters],
        tf.float32, tf.random_uniform_initializer(-maxval, maxval)
    )

    b = tf.get_variable(
        'bias', [filters], tf.float32,
        tf.zeros_initializer()
    )

    return tf.nn.bias_add(
        tf.nn.conv2d(X, K, [1, stride, stride, 1], 'SAME'), b
    )


def _group_conv(X, filters, groups, kernel=1, stride=1):

    in_channels = X.shape.as_list()[3]
    in_channels_per_group = int(in_channels/groups)
    filters_per_group = int(filters/groups)

    # kaiming uniform initialization
    maxval = math.sqrt(6.0/in_channels_per_group)

    K = tf.get_variable(
        'kernel', [kernel, kernel, in_channels_per_group, filters],
        tf.float32, tf.random_uniform_initializer(-maxval, maxval)
    )

    # split channels
    X_channel_splits = tf.split(X, [in_channels_per_group]*groups, axis=3)
    K_filter_splits = tf.split(K, [filters_per_group]*groups, axis=3)

    results = []

    # do convolution for each split
    for i in range(groups):
        X_split = X_channel_splits[i]
        K_split = K_filter_splits[i]
        results += [tf.nn.conv2d(X_split, K_split, [1, stride, stride, 1], 'SAME')]

    return tf.concat(results, 3)


def _depthwise_conv(X, kernel=3, stride=1):

    in_channels = X.shape.as_list()[3]

    # kaiming uniform initialization
    maxval = math.sqrt(6.0/in_channels)

    W = tf.get_variable(
        'depthwise_kernel', [kernel, kernel, in_channels, 1],
        tf.float32, tf.random_uniform_initializer(-maxval, maxval)
    )

    return tf.nn.depthwise_conv2d(X, W, [1, stride, stride, 1], 'SAME')


def _shufflenet_unit(X, is_training, groups=3, stride=1):

    in_channels = X.shape.as_list()[3]
    result = X

    with tf.variable_scope('g_conv_1'):
        result = _group_conv(result, in_channels, groups)
        result = _batch_norm(result, is_training)
        result = _nonlinearity(result)

    with tf.variable_scope('channel_shuffle_2'):
        result = _channel_shuffle(result, groups)

    with tf.variable_scope('dw_conv_3'):
        result = _depthwise_conv(result, stride=stride)
        result = _batch_norm(result, is_training)

    with tf.variable_scope('g_conv_4'):
        result = _group_conv(result, in_channels, groups)
        result = _batch_norm(result, is_training)

    if stride < 2:
        result = tf.add(result, X)
    else:
        X = _avg_pooling(X)
        result = tf.concat([result, X], 3)

    result = _nonlinearity(result)
    return result


# first shufflenet unit is different from the rest
def _first_shufflenet_unit(X, is_training, groups, out_channels):

    in_channels = X.shape.as_list()[3]
    result = X
    out_channels -= in_channels

    with tf.variable_scope('g_conv_1'):
        result = _group_conv(result, out_channels, groups=1)
        result = _batch_norm(result, is_training)
        result = _nonlinearity(result)

    with tf.variable_scope('dw_conv_2'):
        result = _depthwise_conv(result, stride=2)
        result = _batch_norm(result, is_training)

    with tf.variable_scope('g_conv_3'):
        result = _group_conv(result, out_channels, groups)
        result = _batch_norm(result, is_training)

    X = _avg_pooling(X)
    result = tf.concat([result, X], 3)
    result = _nonlinearity(result)
    return result


def _affine(X, size):
    input_dim = X.shape.as_list()[1]

    # kaiming uniform initialization
    maxval = math.sqrt(6.0/input_dim)

    W = tf.get_variable(
        'kernel', [input_dim, size], tf.float32,
        tf.random_uniform_initializer(-maxval, maxval)
    )

    b = tf.get_variable(
        'bias', [size], tf.float32,
        tf.zeros_initializer()
    )

    return tf.nn.bias_add(tf.matmul(X, W), b)

In [0]:
import tensorflow as tf

def _get_data(num_classes, image_size):
    """Get images and targets in batches.
    Training data is augmented with random crops,
    flips, and color manipulations.
    When evaluating center crop is made.
    Arguments:
        num_classes: An integer.
        image_size: An integer, it is assumed that
            image_width = image_height = image_size.
    Returns:
        A dict with the following keys:
            'init_data': An op, initialize data sources and batch size.
            'train_init': An op, initialize train data iterator.
            'val_init': An op, initialize validation data iterator.
            'x_batch': A float tensor with shape [batch_size, image_size, image_size, 3],
                images have pixel values in range [0, 1].
            'y_batch': A float tensor with shape [batch_size, num_classes],
                targets are one-hot encoded.
    """

    batch_size = tf.Variable(
        tf.placeholder(tf.int64, [], 'batch_size'),
        trainable=False, collections=[]
    )
    train_file = tf.Variable(
        tf.placeholder(tf.string, [], 'train_file'),
        trainable=False, collections=[]
    )
    val_file = tf.Variable(
        tf.placeholder(tf.string, [], 'val_file'),
        trainable=False, collections=[]
    )
    init_data = tf.variables_initializer([batch_size, train_file, val_file])

    train_dataset = tf.data.TFRecordDataset(train_file)
    train_dataset = train_dataset.map(
        lambda x: _parse_and_preprocess(x, image_size, augmentation=True),
        num_parallel_calls=NUM_THREADS
    ).prefetch(PREFETCH_BUFFER_SIZE)

    train_dataset = train_dataset.shuffle(buffer_size=SHUFFLE_BUFFER_SIZE)
    train_dataset = train_dataset.batch(batch_size)
    train_dataset = train_dataset.repeat()

    val_dataset = tf.data.TFRecordDataset(val_file)
    val_dataset = val_dataset.map(
        lambda x: _parse_and_preprocess(x, image_size)
    )
    val_dataset = val_dataset.batch(batch_size)
    val_dataset = val_dataset.repeat()

    iterator = tf.data.Iterator.from_structure(
        train_dataset.output_types,
        train_dataset.output_shapes
    )
    train_init = iterator.make_initializer(train_dataset)
    val_init = iterator.make_initializer(val_dataset)

    x_batch, y_batch = iterator.get_next()
    y_batch = tf.one_hot(y_batch, num_classes, axis=1, dtype=tf.float32)

    data = {
        'init_data': init_data,
        'train_init': train_init, 'val_init': val_init,
        'x_batch': x_batch, 'y_batch': y_batch
    }
    return data


def _parse_and_preprocess(example_proto, image_size, augmentation=False):

    features = {
        'image': tf.FixedLenFeature([], tf.string),
        'target': tf.FixedLenFeature([], tf.int64)
    }
    parsed_features = tf.parse_single_example(example_proto, features)

    image = tf.image.decode_jpeg(parsed_features['image'], channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    target = parsed_features['target']

    if augmentation:
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_brightness(image, 0.15)
        image = tf.image.random_contrast(image, 0.8, 1.25)
        image = tf.image.random_hue(image, 0.1)
        image = tf.image.random_saturation(image, 0.8, 1.25)
        image = tf.random_crop(image, [image_size, image_size, 3])
    else:
        image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size)

    image = tf.clip_by_value(image, 0.0, 1.0)
    return image, target

In [0]:
def get_shufflenet(
        initial_lr=1e-2, weight_decay=1e-4,
        groups=3, dropout=0.5, complexity_scale_factor=0.75):
    """Create a ShuffleNet computational graph.
    Arguments:
        initial_lr: A floar number, initial learning rate.
        weight_decay: A floar number, L2 regularization.
        groups: An integer, number of groups in group convolutions,
            only possible values are: 1, 2, 3, 4, 8.
        dropout: A floar number, dropout rate before the last linear layer.
        complexity_scale_factor: A floar number, to customize the network
            to a desired complexity you can apply a scale factor,
            in the original paper they are considering
            scale factor values: 0.25, 0.5, 1.0.
            It determines the width of the network.
    Returns:
        graph: A Tensorflow graph.
        ops: A dict with ops.
    """

    graph = tf.Graph()
    with graph.as_default():

        with tf.variable_scope('control'):
            # it controls dropout and batch_norm layers
            is_training = tf.placeholder_with_default(True, [], 'is_training')

        with tf.device('/cpu:0'), tf.variable_scope('input_pipeline'):
            data = _get_data(NUM_CLASSES, IMAGE_SIZE)

        with tf.variable_scope('inputs'):
            X = tf.placeholder_with_default(
                data['x_batch'], [None, IMAGE_SIZE, IMAGE_SIZE, 3], 'X'
            )
            Y = tf.placeholder_with_default(
                data['y_batch'], [None, NUM_CLASSES], 'Y'
            )

        with tf.variable_scope('preprocessing'):
            mean = tf.constant([0.485, 0.456, 0.406], tf.float32, [3])
            std = tf.constant([0.229, 0.224, 0.225], tf.float32, [3])
            # these values are taken from here:
            # http://pytorch.org/docs/master/torchvision/models.html,
            # but they are not very important, i think.
            X -= mean
            X /= std

        logits = _mapping(
            X, is_training, NUM_CLASSES,
            groups, dropout, complexity_scale_factor
        )

        with tf.variable_scope('softmax'):
            predictions = tf.nn.softmax(logits)

        with tf.variable_scope('log_loss'):
            log_loss = tf.losses.softmax_cross_entropy(Y, logits)

        with tf.variable_scope('weight_decay'):
            _add_weight_decay(weight_decay)

        with tf.variable_scope('total_loss'):
            total_loss = tf.losses.get_total_loss()

        with tf.variable_scope('learning_rate'):
            learning_rate = tf.Variable(
                initial_lr, trainable=False,
                dtype=tf.float32, name='lr'
            )
            # you can reduce learning rate by some factor, usually 0.1
            drop_learning_rate = tf.assign(
                learning_rate, LR_REDUCE_FACTOR*learning_rate
            )

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops), tf.variable_scope('optimizer'):
            optimizer = tf.train.MomentumOptimizer(
                learning_rate, momentum=MOMENTUM, use_nesterov=USE_NESTEROV
            )
            grads_and_vars = optimizer.compute_gradients(total_loss)
            optimize = optimizer.apply_gradients(grads_and_vars)

        # add histograms of all gradients
        grad_summaries = tf.summary.merge(
            [tf.summary.histogram(v.name[:-2] + '_grad_hist', g)
             for g, v in grads_and_vars]
        )

        with tf.variable_scope('utilities'):
            init_variables = tf.global_variables_initializer()
            saver = tf.train.Saver()
            is_equal = tf.equal(tf.argmax(predictions, 1), tf.argmax(Y, 1))
            accuracy = tf.reduce_mean(tf.cast(is_equal, tf.float32))

        summaries = _add_summaries()

    graph.finalize()
    ops = {
        # initialization
        'init_variables': init_variables,
        'init_data': data['init_data'],
        'train_init': data['train_init'],
        'val_init': data['val_init'],

        # training
        'optimize': optimize, 'drop_learning_rate': drop_learning_rate,

        # evaluation
        'predictions': predictions,
        'log_loss': log_loss, 'accuracy': accuracy,
        'summaries': summaries, 'grad_summaries': grad_summaries,
        'saver': saver
    }
    return graph, ops


def _add_summaries():
    """Add histograms of all trainable variables."""

    summaries = []
    trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)

    for v in trainable_vars:
        summaries += [tf.summary.histogram(v.name[:-2] + '_hist', v)]

    return tf.summary.merge(summaries)


def _add_weight_decay(weight_decay):
    """Add L2 regularization to all trainable kernel weights."""

    weight_decay = tf.constant(
        weight_decay, tf.float32,
        [], 'weight_decay'
    )

    trainable = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    kernels = [v for v in trainable if 'kernel' in v.name]

    for K in kernels:
        l2_loss = tf.multiply(
            weight_decay, tf.nn.l2_loss(K)
        )
        tf.losses.add_loss(l2_loss)

In [5]:
!wget http://cs231n.stanford.edu/tiny-imagenet-200.zip
!unzip -qq 'tiny-imagenet-200.zip'

--2019-11-25 17:24:47--  http://cs231n.stanford.edu/tiny-imagenet-200.zip
Resolving cs231n.stanford.edu (cs231n.stanford.edu)... 171.64.68.10
Connecting to cs231n.stanford.edu (cs231n.stanford.edu)|171.64.68.10|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 248100043 (237M) [application/zip]
Saving to: ‘tiny-imagenet-200.zip’


2019-11-25 17:25:03 (15.3 MB/s) - ‘tiny-imagenet-200.zip’ saved [248100043/248100043]



In [6]:
import pandas as pd
import os
import shutil
from tqdm import tqdm

# a folder from tiny-imagenet-200.zip file
data_dir = './tiny-imagenet-200/'

# load validation metadata
annotations_file = os.path.join(data_dir, 'val', 'val_annotations.txt')
val_data = pd.read_csv(annotations_file, sep='\t', header=None)
val_data.drop([2, 3, 4, 5], axis=1, inplace=True)  # drop bounding boxes info
val_data.columns = ['img_name', 'img_class']
unique_classes = val_data.img_class.unique()


print('moving validation data')

# create new folders to move the data into
validation_dir = os.path.join(data_dir, 'validation')
os.mkdir(validation_dir)
for name in unique_classes:
    os.mkdir(os.path.join(validation_dir, name))

# loop over all classes
for name in tqdm(unique_classes):
    # choose images only from a specific class
    class_images = val_data.loc[val_data.img_class == name, 'img_name'].values
    # copy these images to a new folder
    for img in class_images:
        shutil.copyfile(
            os.path.join(data_dir, 'val', 'images', img),
            os.path.join(validation_dir, name, img)
        )


print('\nmoving training data')

# create new folders to move data into
training_dir = os.path.join(data_dir, 'training')
os.mkdir(training_dir)
for name in unique_classes:
    os.mkdir(os.path.join(training_dir, name))

# loop over all classes
for name in tqdm(unique_classes):
    # choose images only from a specific class
    class_images = os.listdir(os.path.join(data_dir, 'train', name, 'images'))
    # copy these images to a new folder
    for img in class_images:
        shutil.copyfile(
            os.path.join(data_dir, 'train', name, 'images', img),
            os.path.join(training_dir, name, img)
        )

print('\nvalidation data is in', validation_dir)
print('training data is in', training_dir)

  8%|▊         | 16/200 [00:00<00:01, 159.85it/s]

moving validation data


100%|██████████| 200/200 [00:01<00:00, 164.54it/s]
  1%|          | 2/200 [00:00<00:10, 18.58it/s]


moving training data


100%|██████████| 200/200 [00:11<00:00, 17.24it/s]


validation data is in ./tiny-imagenet-200/validation
training data is in ./tiny-imagenet-200/training





In [7]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tqdm import tqdm
from PIL import Image
import os
import io

"""The purpose of this script is
to convert image dataset that looks like:
    class1/image1.jpg
    class1/image44.jpg
    class1/image546.jpg
    ...
    class6/image55.jpg
    class6/image12.jpg
    class6/image76.jpg
    ...
to tfrecords format.
1. It assumes that each folder is separate class and
that the number of classes equals to the number of folders.
2. Also it assumes that validation and training folders
have the same subfolders (the same classes).
3. Additionally it outputs 'class_encoder.npy' file
that contains dictionary: folder_name -> class_index (integer).
"""
!mkdir 'save'

train_dir = './tiny-imagenet-200/training/'
val_dir = './tiny-imagenet-200/validation/'
save_dir = './save/'

def main():
    encoder = create_encoder(train_dir)
    # now you can get a folder's name from a class index

    np.save(os.path.join(save_dir, 'class_encoder.npy'), encoder)
    convert(train_dir, encoder, os.path.join(save_dir, 'train.tfrecords'))
    convert(val_dir, encoder, os.path.join(save_dir, 'val.tfrecords'))

    print('\nCreated two tfrecords files:')
    print(os.path.join(save_dir, 'train.tfrecords'))
    print(os.path.join(save_dir, 'val.tfrecords'))


def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def to_bytes(array):
    image = Image.fromarray(array)
    tmp = io.BytesIO()
    image.save(tmp, format='jpeg')
    return tmp.getvalue()


def convert(folder, encoder, tfrecords_filename):
    """Convert a folder with directories of images to tfrecords format.
    Arguments:
        folder: A path to a folder where directories with images are.
        encoder: A dict, folder_name -> integer.
        tfrecords_filename: A path where to save tfrecords file.
    """

    images_metadata = collect_metadata(folder, encoder)
    writer = tf.python_io.TFRecordWriter(tfrecords_filename)

    for _, row in tqdm(images_metadata.iterrows()):

        file_path = os.path.join(folder, row.img_path)

        # read an image
        image = Image.open(file_path)

        # convert to an array
        array = np.asarray(image, dtype='uint8')

        # some images are grayscale
        if array.shape[-1] != 3:
            array = np.stack([array, array, array], axis=2)

        # get class of the image
        target = int(row.class_number)

        feature = {
            'image': _bytes_feature(to_bytes(array)),
            'target': _int64_feature(target),
        }

        example = tf.train.Example(features=tf.train.Features(feature=feature))
        writer.write(example.SerializeToString())

    writer.close()


def create_encoder(folder):
    """Encode directories in the folder with integer values.
    Values are in the range 0..(n_directories - 1).
    Arguments:
        folder: A path to a folder where directories with images are.
            Each directory - separate class.
    Returns:
        A dict.
    """
    classes = os.listdir(folder)
    encoder = {n: i for i, n in enumerate(classes)}
    return encoder


def collect_metadata(folder, encoder):
    """Collect paths to images. Collect their classes.
    All paths must be with respect to 'folder'.
    Arguments:
        folder: A path to a folder where directories with images are.
            Each directory - separate class.
        encoder: A dict, folder_name -> integer.
    Returns:
        A pandas dataframe.
    """

    subdirs = list(os.walk(folder))[1:]
    metadata = []

    for dir_path, _, files in subdirs:
        dir_name = dir_path.split('/')[-1]
        for file_name in files:
            image_metadata = [dir_name, os.path.join(dir_name, file_name)]
            metadata.append(image_metadata)

    M = pd.DataFrame(metadata)
    M.columns = ['class_name', 'img_path']

    # encode folder names by integers
    M['class_number'] = M.class_name.apply(lambda x: encoder[x])

    # shuffle the dataframe
    M = M.sample(frac=1).reset_index(drop=True)

    return M

main()

100000it [01:42, 975.19it/s]
10000it [00:10, 982.30it/s]


Created two tfrecords files:
./save/train.tfrecords
./save/val.tfrecords





In [8]:
import tensorflow as tf
import shutil
import os
import json
import time
import sys
from tqdm import tqdm

run =0
reset = False
train_tfrecords = './save/train.tfrecords'
val_tfrecords = './save/val.tfrecords'
num_epochs = 40
batch_size = 128
steps_per_epoch = 500
validation_steps = 50
lr_patience = 5
lr_threshold = 0.01
patience = 10
threshold = 0.001
initial_lr = 0.1
weight_decay = 0.005
groups = 4
dropout = 0.2
complexity_scale_factor = 0.5


def train():

    # folders for logging and saving
    dir_to_log = os.path.join('logs', 'run' + str(run))
    dir_to_save = os.path.join('saved', 'run' + str(run))

    print('\nTraining logs and summaries will be in', dir_to_log)
    print('Saved model will be in', dir_to_save, '\n')

    # create these folders
    if reset and os.path.exists(dir_to_log):
        shutil.rmtree(dir_to_log)
    if reset and os.path.exists(dir_to_save):
        shutil.rmtree(dir_to_save)
    if not os.path.exists(dir_to_log):
        os.makedirs(dir_to_log)
    if not os.path.exists(dir_to_save):
        os.makedirs(dir_to_save)

    # files with losses and config
    training_info_file = os.path.join(dir_to_log, 'training_info.txt')
    model_config_file = os.path.join(dir_to_log, 'model_config.txt')
    print('Training/validation evaluations will be in', training_info_file)
    print('Model config will be in', model_config_file, '\n')

    # create the graph and start a session
    graph, ops = get_shufflenet(
        initial_lr, weight_decay,
        groups, dropout,
        complexity_scale_factor
    )
    sess = tf.Session(graph=graph)
    writer = tf.summary.FileWriter(dir_to_log, sess.graph)
    print('\nCreated the graph and started a session!')

    # check if to continue training or start from scratch
    warm = os.path.exists(training_info_file)  # warm start
    if warm and not reset:
        print('Restoring previously saved model and continuing training.\n')
        initial_epoch = sum(1 for line in open(training_info_file))
        try:
            ops['saver'].restore(sess, os.path.join(dir_to_save, 'model'))
        except:
            print('\nCan\'t restore the saved model, '
                  'maybe architectures don\'t match.')
            sys.exit()
    else:
        print('Training model from scratch.\n')
        initial_epoch = 1
        sess.run(ops['init_variables'])

    # initialize data sources
    data_dict = {
        'input_pipeline/train_file:0': train_tfrecords,
        'input_pipeline/val_file:0': val_tfrecords,
        'input_pipeline/batch_size:0': batch_size
    }
    sess.run(ops['init_data'], data_dict)

    losses = []  # training info will be collected here
    training_epochs = range(
        initial_epoch,
        initial_epoch + num_epochs
    )

    # begin training,
    # but you can interrupt training by ctrl-c,
    # your model will be saved
    try:
        for epoch in training_epochs:

            start_time = time.time()
            running_loss, running_accuracy = 0.0, 0.0
            sess.run(ops['train_init'])

            # at zeroth step also collect metadata and summaries
            run_options = tf.RunOptions(
                trace_level=tf.RunOptions.FULL_TRACE
            )
            run_metadata = tf.RunMetadata()

            # do epoch's zeroth step
            _, batch_loss, batch_accuracy, summary, grad_summary = sess.run([
                ops['optimize'], ops['log_loss'], ops['accuracy'],
                ops['summaries'], ops['grad_summaries']
            ], options=run_options, run_metadata=run_metadata)
            running_loss += batch_loss
            running_accuracy += batch_accuracy

            print('epoch', epoch)
            training_steps = tqdm(
                range(1, steps_per_epoch),
                initial=1, total=steps_per_epoch
            )

            # main training loop
            for step in training_steps:

                _, batch_loss, batch_accuracy = sess.run([
                    ops['optimize'], ops['log_loss'], ops['accuracy']
                ])
                running_loss += batch_loss
                running_accuracy += batch_accuracy

            # evaluate on the validation set
            val_loss, val_accuracy = _evaluate(
                sess, ops, validation_steps
            )
            train_loss = running_loss/steps_per_epoch
            train_accuracy = running_accuracy/steps_per_epoch

            # collect all losses and accuracies
            losses += [(
                epoch, train_loss, val_loss,
                train_accuracy, val_accuracy, time.time() - start_time
            )]
            writer.add_run_metadata(run_metadata, str(epoch))
            writer.add_summary(summary, epoch)
            writer.add_summary(grad_summary, epoch)
            print('loss: {0:.3f}, val_loss: {1:.3f}, '
                  'acc: {2:.3f}, val_acc: {3:.3f}, time: {4:.3f}\n'.format(*losses[-1][1:]))

            # consider a possibility of early stopping
            if _is_early_stopping(losses, patience, threshold):
                print('Early stopping!')
                break

            # consider a possibility of reducing learning rate by some factor
            _reduce_lr_on_plateau(
                sess, ops, losses,
                lr_patience, lr_threshold
            )
    except (KeyboardInterrupt, SystemExit):
        print(' Interruption detected, exiting the program...')

    sess.close()


def _evaluate(sess, ops, validation_steps):

    val_loss, val_accuracy = 0.0, 0.0
    sess.run(ops['val_init'])

    for i in range(validation_steps):
        batch_loss, batch_accuracy = sess.run(
            [ops['log_loss'], ops['accuracy']],
            {'control/is_training:0': False}
        )
        val_loss += batch_loss
        val_accuracy += batch_accuracy

    val_loss /= validation_steps
    val_accuracy /= validation_steps
    return val_loss, val_accuracy


# it decides if training must stop
def _is_early_stopping(losses, patience=10, threshold=0.01):

    # get validation set accuracies
    accuracies = [x[4] for x in losses]

    if len(losses) > (patience + 4):
        # running average
        average = (accuracies[-(patience + 4)] +
                   accuracies[-(patience + 3)] +
                   accuracies[-(patience + 2)] +
                   accuracies[-(patience + 1)] +
                   accuracies[-patience])/5.0
        return accuracies[-1] < average + threshold
    else:
        return False


def _reduce_lr_on_plateau(
        sess, ops, losses,
        patience=10, threshold=0.01):

    # get validation set accuracies
    accuracies = [x[4] for x in losses]

    if len(losses) > (patience + 4):
        # running average
        average = (accuracies[-(patience + 4)] +
                   accuracies[-(patience + 3)] +
                   accuracies[-(patience + 2)] +
                   accuracies[-(patience + 1)] +
                   accuracies[-patience])/5.0
        if accuracies[-1] < (average + threshold):
            sess.run(ops['drop_learning_rate'])
            print('Learning rate is dropped!\n')

if __name__ == '__main__':
    train()


Training logs and summaries will be in logs/run0
Saved model will be in saved/run0 

Training/validation evaluations will be in logs/run0/training_info.txt
Model config will be in logs/run0/model_config.txt 





Instructions for updating:
Use `tf.compat.v1.data.get_output_types(dataset)`.
Instructions for updating:
Use `tf.compat.v1.data.get_output_shapes(dataset)`.
Instructions for updating:
Use `tf.compat.v1.data.get_output_types(iterator)`.
Instructions for updating:
Use `tf.compat.v1.data.get_output_shapes(iterator)`.
Instructions for updating:
Use `tf.compat.v1.data.get_output_classes(iterator)`.
Instructions for updating:
Use keras.layers.BatchNormalization instead.  In particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not be used (consult the `tf.keras.layers.batch_normalization` documentation).
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `r

  0%|          | 1/500 [00:00<?, ?it/s]

epoch 1


100%|██████████| 500/500 [01:33<00:00,  5.48it/s]


loss: 4.897, val_loss: 4.325, acc: 0.049, val_acc: 0.090, time: 111.937



  0%|          | 2/500 [00:00<01:31,  5.44it/s]

epoch 2


100%|██████████| 500/500 [01:31<00:00,  5.46it/s]


loss: 4.332, val_loss: 4.210, acc: 0.087, val_acc: 0.103, time: 101.492



  0%|          | 2/500 [00:00<01:30,  5.50it/s]

epoch 3


100%|██████████| 500/500 [01:31<00:00,  5.47it/s]


loss: 4.191, val_loss: 4.128, acc: 0.103, val_acc: 0.113, time: 101.789



  0%|          | 2/500 [00:00<01:29,  5.57it/s]

epoch 4


100%|██████████| 500/500 [01:31<00:00,  5.46it/s]


loss: 4.148, val_loss: 4.233, acc: 0.110, val_acc: 0.103, time: 101.815



  0%|          | 2/500 [00:00<01:30,  5.48it/s]

epoch 5


100%|██████████| 500/500 [01:31<00:00,  5.43it/s]


loss: 4.101, val_loss: 4.120, acc: 0.115, val_acc: 0.115, time: 102.095



  0%|          | 2/500 [00:00<01:30,  5.49it/s]

epoch 6


100%|██████████| 500/500 [01:31<00:00,  5.41it/s]


loss: 4.083, val_loss: 4.006, acc: 0.119, val_acc: 0.133, time: 102.071



  0%|          | 2/500 [00:00<01:31,  5.43it/s]

epoch 7


100%|██████████| 500/500 [01:31<00:00,  5.45it/s]


loss: 4.058, val_loss: 4.082, acc: 0.123, val_acc: 0.120, time: 101.952



  0%|          | 2/500 [00:00<01:30,  5.53it/s]

epoch 8


100%|██████████| 500/500 [01:31<00:00,  5.51it/s]


loss: 4.049, val_loss: 3.910, acc: 0.121, val_acc: 0.148, time: 101.888



  0%|          | 2/500 [00:00<01:32,  5.36it/s]

epoch 9


100%|██████████| 500/500 [01:31<00:00,  5.47it/s]


loss: 4.047, val_loss: 4.001, acc: 0.126, val_acc: 0.137, time: 101.898



  0%|          | 2/500 [00:00<01:30,  5.48it/s]

epoch 10


100%|██████████| 500/500 [01:31<00:00,  5.48it/s]


loss: 4.032, val_loss: 4.114, acc: 0.125, val_acc: 0.107, time: 101.784

Learning rate is dropped!



  0%|          | 2/500 [00:00<01:31,  5.44it/s]

epoch 11


100%|██████████| 500/500 [01:31<00:00,  5.48it/s]


loss: 3.489, val_loss: 3.245, acc: 0.212, val_acc: 0.250, time: 101.503



  0%|          | 2/500 [00:00<01:32,  5.41it/s]

epoch 12


100%|██████████| 500/500 [01:31<00:00,  5.52it/s]


loss: 3.287, val_loss: 3.133, acc: 0.246, val_acc: 0.270, time: 101.586



  0%|          | 2/500 [00:00<01:30,  5.50it/s]

epoch 13


100%|██████████| 500/500 [01:31<00:00,  5.45it/s]


loss: 3.214, val_loss: 3.090, acc: 0.259, val_acc: 0.288, time: 101.879



  0%|          | 2/500 [00:00<01:30,  5.52it/s]

epoch 14


100%|██████████| 500/500 [01:31<00:00,  5.41it/s]


loss: 3.163, val_loss: 3.058, acc: 0.267, val_acc: 0.290, time: 102.000



  0%|          | 2/500 [00:00<01:32,  5.40it/s]

epoch 15


100%|██████████| 500/500 [01:31<00:00,  5.45it/s]


loss: 3.114, val_loss: 3.030, acc: 0.277, val_acc: 0.290, time: 102.106



  0%|          | 2/500 [00:00<01:31,  5.43it/s]

epoch 16


100%|██████████| 500/500 [01:31<00:00,  5.47it/s]


loss: 3.066, val_loss: 2.982, acc: 0.287, val_acc: 0.301, time: 102.036



  0%|          | 2/500 [00:00<01:31,  5.43it/s]

epoch 17


100%|██████████| 500/500 [01:31<00:00,  5.44it/s]


loss: 3.033, val_loss: 3.017, acc: 0.294, val_acc: 0.296, time: 102.081



  0%|          | 2/500 [00:00<01:31,  5.43it/s]

epoch 18


100%|██████████| 500/500 [01:31<00:00,  5.43it/s]


loss: 3.005, val_loss: 2.950, acc: 0.299, val_acc: 0.311, time: 102.128



  0%|          | 2/500 [00:00<01:31,  5.45it/s]

epoch 19


100%|██████████| 500/500 [01:31<00:00,  5.48it/s]


loss: 2.973, val_loss: 2.956, acc: 0.303, val_acc: 0.310, time: 101.987



  0%|          | 2/500 [00:00<01:31,  5.44it/s]

epoch 20


100%|██████████| 500/500 [01:31<00:00,  5.44it/s]


loss: 2.945, val_loss: 2.897, acc: 0.310, val_acc: 0.324, time: 101.946



  0%|          | 2/500 [00:00<01:30,  5.51it/s]

epoch 21


100%|██████████| 500/500 [01:31<00:00,  5.45it/s]


loss: 2.917, val_loss: 2.878, acc: 0.315, val_acc: 0.318, time: 101.940



  0%|          | 2/500 [00:00<01:33,  5.35it/s]

epoch 22


100%|██████████| 500/500 [01:31<00:00,  5.47it/s]


loss: 2.890, val_loss: 2.831, acc: 0.322, val_acc: 0.338, time: 101.892



  0%|          | 2/500 [00:00<01:32,  5.41it/s]

epoch 23


100%|██████████| 500/500 [01:31<00:00,  5.44it/s]


loss: 2.870, val_loss: 2.818, acc: 0.326, val_acc: 0.336, time: 102.045



  0%|          | 2/500 [00:00<01:31,  5.44it/s]

epoch 24


100%|██████████| 500/500 [01:31<00:00,  5.45it/s]


loss: 2.854, val_loss: 2.848, acc: 0.327, val_acc: 0.331, time: 102.115



  0%|          | 2/500 [00:00<01:30,  5.50it/s]

epoch 25


100%|██████████| 500/500 [01:31<00:00,  5.46it/s]


loss: 2.833, val_loss: 2.816, acc: 0.333, val_acc: 0.337, time: 102.016



  0%|          | 2/500 [00:00<01:31,  5.42it/s]

epoch 26


100%|██████████| 500/500 [01:31<00:00,  5.45it/s]


loss: 2.823, val_loss: 2.794, acc: 0.333, val_acc: 0.336, time: 102.140



  0%|          | 2/500 [00:00<01:31,  5.45it/s]

epoch 27


100%|██████████| 500/500 [01:31<00:00,  5.45it/s]


loss: 2.798, val_loss: 2.788, acc: 0.339, val_acc: 0.340, time: 102.133



  0%|          | 2/500 [00:00<01:31,  5.45it/s]

epoch 28


100%|██████████| 500/500 [01:31<00:00,  5.44it/s]


loss: 2.798, val_loss: 2.787, acc: 0.339, val_acc: 0.344, time: 102.058



  0%|          | 2/500 [00:00<01:32,  5.41it/s]

epoch 29


100%|██████████| 500/500 [01:31<00:00,  5.50it/s]


loss: 2.788, val_loss: 2.787, acc: 0.341, val_acc: 0.341, time: 101.546

Learning rate is dropped!



  0%|          | 2/500 [00:00<01:31,  5.45it/s]

epoch 30


100%|██████████| 500/500 [01:31<00:00,  5.46it/s]


loss: 2.471, val_loss: 2.448, acc: 0.412, val_acc: 0.416, time: 102.049



  0%|          | 2/500 [00:00<01:31,  5.43it/s]

epoch 31


100%|██████████| 500/500 [01:31<00:00,  5.45it/s]


loss: 2.367, val_loss: 2.409, acc: 0.432, val_acc: 0.422, time: 101.850



  0%|          | 2/500 [00:00<01:31,  5.45it/s]

epoch 32


100%|██████████| 500/500 [01:31<00:00,  5.47it/s]


loss: 2.316, val_loss: 2.402, acc: 0.443, val_acc: 0.430, time: 101.775



  0%|          | 2/500 [00:00<01:32,  5.37it/s]

epoch 33


100%|██████████| 500/500 [01:31<00:00,  5.43it/s]


loss: 2.278, val_loss: 2.410, acc: 0.451, val_acc: 0.422, time: 102.257



  0%|          | 2/500 [00:00<01:30,  5.50it/s]

epoch 34


100%|██████████| 500/500 [01:31<00:00,  5.42it/s]


loss: 2.266, val_loss: 2.371, acc: 0.453, val_acc: 0.429, time: 102.119



  0%|          | 2/500 [00:00<01:29,  5.59it/s]

epoch 35


100%|██████████| 500/500 [01:31<00:00,  5.45it/s]


loss: 2.241, val_loss: 2.384, acc: 0.460, val_acc: 0.432, time: 102.071



  0%|          | 2/500 [00:00<01:31,  5.42it/s]

epoch 36


100%|██████████| 500/500 [01:31<00:00,  5.42it/s]


loss: 2.227, val_loss: 2.356, acc: 0.463, val_acc: 0.437, time: 102.129



  0%|          | 2/500 [00:00<01:31,  5.44it/s]

epoch 37


100%|██████████| 500/500 [01:31<00:00,  5.49it/s]


loss: 2.210, val_loss: 2.340, acc: 0.465, val_acc: 0.437, time: 101.919



  0%|          | 2/500 [00:00<01:35,  5.22it/s]

epoch 38


100%|██████████| 500/500 [01:31<00:00,  5.44it/s]


loss: 2.199, val_loss: 2.342, acc: 0.468, val_acc: 0.439, time: 102.029



  0%|          | 2/500 [00:00<01:30,  5.51it/s]

epoch 39


100%|██████████| 500/500 [01:31<00:00,  5.51it/s]


loss: 2.191, val_loss: 2.336, acc: 0.470, val_acc: 0.439, time: 101.982



  0%|          | 2/500 [00:00<01:32,  5.40it/s]

epoch 40


100%|██████████| 500/500 [01:31<00:00,  5.45it/s]


loss: 2.178, val_loss: 2.357, acc: 0.472, val_acc: 0.438, time: 101.685

Learning rate is dropped!

