In [None]:
# GoogLeNet

GoogLeNet是google研究院在ImageNet大规模视觉识别挑战赛2014（ILSVRC14）上提出了一种代号为Inception的深度卷积神经网络结构的一个特例，最终在比赛中Top-5 error为6.67%,从而获得当年比赛冠军。GoogLeNet设计基于以下动机：提升网络性能最直接的办法是增加网络深度和宽度，增加深度和宽度会产生巨大的参数，一般的解决方法是将全连接或者一般卷积转化为稀疏连接，利用密集矩阵的计算性能来计算稀疏矩阵，从而达到计算性能的提升。

## Inception架构

Inception架构的主要架构如图所示：
![](img/inception.PNG)
> 图片来源：[Going deeper with convolutions](https://arxiv.org/pdf/1409.4842v1.pdf)

上图（a）是一个简单版本，说明如下：
- 1、采用不同大小的卷积核表示不同的感受野，filter concatenation表示不同尺度特征的融合;
- 2、卷积核采大小用1/3/5是为了对齐，设定步长stride=1，pad=0/1/2,卷积之后就可以得到相同维度的特征;
- 3、除了卷积核之外加入的max pooling是被多种实现证实有效加入的;
- 4、网络越到后面特征抽象程度越高，特征所涉及的感受野变大，卷积大小3/5的卷积比例需要增加。

考虑到5×5卷积核带来的巨大计算量，inception架构采用1×1卷积核来进行降维，得到的新架构如上图（b）所示。


## GoogLeNet架构

Googlenet整体架构如下图所示：
![](img/googlenet.jpg)
> 图片来源：[Going deeper with convolutions](https://arxiv.org/pdf/1409.4842v1.pdf)

说明如下：
- 1、GoogLeNet采用模块化设计，方便修改;
- 2、为了应对梯度消失问题，网络额外增加了两个辅助的softmax分类器用来向前传递梯度，这两个辅助分类器只用于训练，测试时需要去掉;
- 3、实验表明网络末尾采用average pooling替代全连接层可以提升top-1精度，网络内部依然采用了dropout。
- 4、所有的卷积都采用了线性修正激活。


## 代码实现


In [None]:

from __future__ import division, print_function, absolute_import

import os, tflearn, random, pickle, gzip
import numpy as np
from PIL import Image
from datetime import datetime
from collections import namedtuple

from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d, avg_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.merge_ops import merge
from tflearn.layers.estimator import regression

_EPSILON = 1e-8

# import tflearn.datasets.oxflower17 as oxflower17
# X, Y = oxflower17.load_data(one_hot=True, resize_pics=(227, 227))

HParams = namedtuple('HParams',
                     'img_size, label_size, gpu_memory_fraction, model_name, gpu_usage')


class GoogLeNet(object):

    def __init__(self, hps):
        # ctrl
        self.img_size = hps.img_size
        self.label_size = hps.label_size
        self.model_name = hps.model_name
        self.gpu_usage = hps.gpu_usage

        # net
        tflearn.config.init_graph(gpu_memory_fraction=self.gpu_usage)
        network = input_data(shape=[None, self.img_size, self.img_size, 3])
        conv1_7_7 = conv_2d(network, 64, 7, strides=2, activation='relu', name = 'conv1_7_7_s2')
        pool1_3_3 = max_pool_2d(conv1_7_7, 3,strides=2)
        pool1_3_3 = local_response_normalization(pool1_3_3)
        conv2_3_3_reduce = conv_2d(pool1_3_3, 64,1, activation='relu',name = 'conv2_3_3_reduce')
        conv2_3_3 = conv_2d(conv2_3_3_reduce, 192,3, activation='relu', name='conv2_3_3')
        conv2_3_3 = local_response_normalization(conv2_3_3)
        pool2_3_3 = max_pool_2d(conv2_3_3, kernel_size=3, strides=2, name='pool2_3_3_s2')
        inception_3a_1_1 = conv_2d(pool2_3_3, 64, 1, activation='relu', name='inception_3a_1_1')
        inception_3a_3_3_reduce = conv_2d(pool2_3_3, 96,1, activation='relu', name='inception_3a_3_3_reduce')
        inception_3a_3_3 = conv_2d(inception_3a_3_3_reduce, 128,filter_size=3,  activation='relu', name = 'inception_3a_3_3')
        inception_3a_5_5_reduce = conv_2d(pool2_3_3,16, filter_size=1,activation='relu', name ='inception_3a_5_5_reduce' )
        inception_3a_5_5 = conv_2d(inception_3a_5_5_reduce, 32, filter_size=5, activation='relu', name= 'inception_3a_5_5')
        inception_3a_pool = max_pool_2d(pool2_3_3, kernel_size=3, strides=1, )
        inception_3a_pool_1_1 = conv_2d(inception_3a_pool, 32, filter_size=1, activation='relu', name='inception_3a_pool_1_1')

        # merge the inception_3a__
        inception_3a_output = merge([inception_3a_1_1, inception_3a_3_3, inception_3a_5_5, inception_3a_pool_1_1], mode='concat', axis=3)

        inception_3b_1_1 = conv_2d(inception_3a_output, 128,filter_size=1,activation='relu', name= 'inception_3b_1_1' )
        inception_3b_3_3_reduce = conv_2d(inception_3a_output, 128, filter_size=1, activation='relu', name='inception_3b_3_3_reduce')
        inception_3b_3_3 = conv_2d(inception_3b_3_3_reduce, 192, filter_size=3,  activation='relu',name='inception_3b_3_3')
        inception_3b_5_5_reduce = conv_2d(inception_3a_output, 32, filter_size=1, activation='relu', name = 'inception_3b_5_5_reduce')
        inception_3b_5_5 = conv_2d(inception_3b_5_5_reduce, 96, filter_size=5,  name = 'inception_3b_5_5')
        inception_3b_pool = max_pool_2d(inception_3a_output, kernel_size=3, strides=1,  name='inception_3b_pool')
        inception_3b_pool_1_1 = conv_2d(inception_3b_pool, 64, filter_size=1,activation='relu', name='inception_3b_pool_1_1')

        #merge the inception_3b_*
        inception_3b_output = merge([inception_3b_1_1, inception_3b_3_3, inception_3b_5_5, inception_3b_pool_1_1], mode='concat',axis=3,name='inception_3b_output')

        pool3_3_3 = max_pool_2d(inception_3b_output, kernel_size=3, strides=2, name='pool3_3_3')
        inception_4a_1_1 = conv_2d(pool3_3_3, 192, filter_size=1, activation='relu', name='inception_4a_1_1')
        inception_4a_3_3_reduce = conv_2d(pool3_3_3, 96, filter_size=1, activation='relu', name='inception_4a_3_3_reduce')
        inception_4a_3_3 = conv_2d(inception_4a_3_3_reduce, 208, filter_size=3,  activation='relu', name='inception_4a_3_3')
        inception_4a_5_5_reduce = conv_2d(pool3_3_3, 16, filter_size=1, activation='relu', name='inception_4a_5_5_reduce')
        inception_4a_5_5 = conv_2d(inception_4a_5_5_reduce, 48, filter_size=5,  activation='relu', name='inception_4a_5_5')
        inception_4a_pool = max_pool_2d(pool3_3_3, kernel_size=3, strides=1,  name='inception_4a_pool')
        inception_4a_pool_1_1 = conv_2d(inception_4a_pool, 64, filter_size=1, activation='relu', name='inception_4a_pool_1_1')

        inception_4a_output = merge([inception_4a_1_1, inception_4a_3_3, inception_4a_5_5, inception_4a_pool_1_1], mode='concat', axis=3, name='inception_4a_output')


        inception_4b_1_1 = conv_2d(inception_4a_output, 160, filter_size=1, activation='relu', name='inception_4a_1_1')
        inception_4b_3_3_reduce = conv_2d(inception_4a_output, 112, filter_size=1, activation='relu', name='inception_4b_3_3_reduce')
        inception_4b_3_3 = conv_2d(inception_4b_3_3_reduce, 224, filter_size=3, activation='relu', name='inception_4b_3_3')
        inception_4b_5_5_reduce = conv_2d(inception_4a_output, 24, filter_size=1, activation='relu', name='inception_4b_5_5_reduce')
        inception_4b_5_5 = conv_2d(inception_4b_5_5_reduce, 64, filter_size=5,  activation='relu', name='inception_4b_5_5')

        inception_4b_pool = max_pool_2d(inception_4a_output, kernel_size=3, strides=1,  name='inception_4b_pool')
        inception_4b_pool_1_1 = conv_2d(inception_4b_pool, 64, filter_size=1, activation='relu', name='inception_4b_pool_1_1')

        inception_4b_output = merge([inception_4b_1_1, inception_4b_3_3, inception_4b_5_5, inception_4b_pool_1_1], mode='concat', axis=3, name='inception_4b_output')


        inception_4c_1_1 = conv_2d(inception_4b_output, 128, filter_size=1, activation='relu',name='inception_4c_1_1')
        inception_4c_3_3_reduce = conv_2d(inception_4b_output, 128, filter_size=1, activation='relu', name='inception_4c_3_3_reduce')
        inception_4c_3_3 = conv_2d(inception_4c_3_3_reduce, 256,  filter_size=3, activation='relu', name='inception_4c_3_3')
        inception_4c_5_5_reduce = conv_2d(inception_4b_output, 24, filter_size=1, activation='relu', name='inception_4c_5_5_reduce')
        inception_4c_5_5 = conv_2d(inception_4c_5_5_reduce, 64,  filter_size=5, activation='relu', name='inception_4c_5_5')

        inception_4c_pool = max_pool_2d(inception_4b_output, kernel_size=3, strides=1)
        inception_4c_pool_1_1 = conv_2d(inception_4c_pool, 64, filter_size=1, activation='relu', name='inception_4c_pool_1_1')

        inception_4c_output = merge([inception_4c_1_1, inception_4c_3_3, inception_4c_5_5, inception_4c_pool_1_1], mode='concat', axis=3,name='inception_4c_output')

        inception_4d_1_1 = conv_2d(inception_4c_output, 112, filter_size=1, activation='relu', name='inception_4d_1_1')
        inception_4d_3_3_reduce = conv_2d(inception_4c_output, 144, filter_size=1, activation='relu', name='inception_4d_3_3_reduce')
        inception_4d_3_3 = conv_2d(inception_4d_3_3_reduce, 288, filter_size=3, activation='relu', name='inception_4d_3_3')
        inception_4d_5_5_reduce = conv_2d(inception_4c_output, 32, filter_size=1, activation='relu', name='inception_4d_5_5_reduce')
        inception_4d_5_5 = conv_2d(inception_4d_5_5_reduce, 64, filter_size=5,  activation='relu', name='inception_4d_5_5')
        inception_4d_pool = max_pool_2d(inception_4c_output, kernel_size=3, strides=1,  name='inception_4d_pool')
        inception_4d_pool_1_1 = conv_2d(inception_4d_pool, 64, filter_size=1, activation='relu', name='inception_4d_pool_1_1')

        inception_4d_output = merge([inception_4d_1_1, inception_4d_3_3, inception_4d_5_5, inception_4d_pool_1_1], mode='concat', axis=3, name='inception_4d_output')

        inception_4e_1_1 = conv_2d(inception_4d_output, 256, filter_size=1, activation='relu', name='inception_4e_1_1')
        inception_4e_3_3_reduce = conv_2d(inception_4d_output, 160, filter_size=1, activation='relu', name='inception_4e_3_3_reduce')
        inception_4e_3_3 = conv_2d(inception_4e_3_3_reduce, 320, filter_size=3, activation='relu', name='inception_4e_3_3')
        inception_4e_5_5_reduce = conv_2d(inception_4d_output, 32, filter_size=1, activation='relu', name='inception_4e_5_5_reduce')
        inception_4e_5_5 = conv_2d(inception_4e_5_5_reduce, 128,  filter_size=5, activation='relu', name='inception_4e_5_5')
        inception_4e_pool = max_pool_2d(inception_4d_output, kernel_size=3, strides=1,  name='inception_4e_pool')
        inception_4e_pool_1_1 = conv_2d(inception_4e_pool, 128, filter_size=1, activation='relu', name='inception_4e_pool_1_1')


        inception_4e_output = merge([inception_4e_1_1, inception_4e_3_3, inception_4e_5_5,inception_4e_pool_1_1],axis=3, mode='concat')

        pool4_3_3 = max_pool_2d(inception_4e_output, kernel_size=3, strides=2, name='pool_3_3')


        inception_5a_1_1 = conv_2d(pool4_3_3, 256, filter_size=1, activation='relu', name='inception_5a_1_1')
        inception_5a_3_3_reduce = conv_2d(pool4_3_3, 160, filter_size=1, activation='relu', name='inception_5a_3_3_reduce')
        inception_5a_3_3 = conv_2d(inception_5a_3_3_reduce, 320, filter_size=3, activation='relu', name='inception_5a_3_3')
        inception_5a_5_5_reduce = conv_2d(pool4_3_3, 32, filter_size=1, activation='relu', name='inception_5a_5_5_reduce')
        inception_5a_5_5 = conv_2d(inception_5a_5_5_reduce, 128, filter_size=5,  activation='relu', name='inception_5a_5_5')
        inception_5a_pool = max_pool_2d(pool4_3_3, kernel_size=3, strides=1,  name='inception_5a_pool')
        inception_5a_pool_1_1 = conv_2d(inception_5a_pool, 128, filter_size=1,activation='relu', name='inception_5a_pool_1_1')

        inception_5a_output = merge([inception_5a_1_1, inception_5a_3_3, inception_5a_5_5, inception_5a_pool_1_1], axis=3,mode='concat')


        inception_5b_1_1 = conv_2d(inception_5a_output, 384, filter_size=1,activation='relu', name='inception_5b_1_1')
        inception_5b_3_3_reduce = conv_2d(inception_5a_output, 192, filter_size=1, activation='relu', name='inception_5b_3_3_reduce')
        inception_5b_3_3 = conv_2d(inception_5b_3_3_reduce, 384,  filter_size=3,activation='relu', name='inception_5b_3_3')
        inception_5b_5_5_reduce = conv_2d(inception_5a_output, 48, filter_size=1, activation='relu', name='inception_5b_5_5_reduce')
        inception_5b_5_5 = conv_2d(inception_5b_5_5_reduce,128, filter_size=5,  activation='relu', name='inception_5b_5_5' )
        inception_5b_pool = max_pool_2d(inception_5a_output, kernel_size=3, strides=1,  name='inception_5b_pool')
        inception_5b_pool_1_1 = conv_2d(inception_5b_pool, 128, filter_size=1, activation='relu', name='inception_5b_pool_1_1')
        inception_5b_output = merge([inception_5b_1_1, inception_5b_3_3, inception_5b_5_5, inception_5b_pool_1_1], axis=3, mode='concat')

        pool5_7_7 = avg_pool_2d(inception_5b_output, kernel_size=7, strides=1)
        pool5_7_7 = dropout(pool5_7_7, 0.4)

        # output stage
        loss = fully_connected(pool5_7_7, self.label_size ,activation='softmax')
        network = regression(loss, optimizer='momentum',
                             loss='categorical_crossentropy',
                             learning_rate=0.001)

        model_path = 'models/%s' % self.model_name
        if not os.path.exists(model_path): os.makedirs(model_path)
        self.model = tflearn.DNN(network, checkpoint_path=model_path+'/model',
                        max_checkpoints=3, tensorboard_verbose=2)
        # load existing model checkpoint
        ckpt = self.get_checkpoint(model_path)
        if ckpt:    
            self.model.load(ckpt)
            print("load existing checkpoint from %s" % ckpt)



    def get_checkpoint(self, model_path):
        ckpt_path = '%s/checkpoint' % model_path
        if not os.path.exists(ckpt_path): return
        with open(ckpt_path, 'r') as f:
            lines = [line.split(':')[1].replace('\"', '').strip() for line in f.readlines()]
            lines = lines[1:][::-1]
        for line in lines:
            path = "%s/%s" % (model_path, line)
            if os.path.exists(path):
                return path



    def fit(self, X, Y, n_epoch=1000):
        print("fit data dim: X=%s, Y=%s" % (np.shape(X), np.shape(Y)))
        self.model.fit(X, Y, n_epoch=n_epoch, validation_set=0.1, shuffle=True,
                  show_metric=True, batch_size=32, snapshot_step=200,
                  snapshot_epoch=True, run_id=self.model_name)
        


    def predict(self, X):
        return self.model.predict(X)



    def get_data(self, dirname='17flowers', resize_pics=(227, 227), down_sampling=None):
        pkl_fnames = ["images/%s/%s" % (dirname, f) for f in os.listdir("images/%s/" % dirname) if "samples_" in f]
        if not pkl_fnames:
            pkl_fnames = image_dirs_to_samples(dirname, self.label_size,
                resize=resize_pics, convert_gray=False, filetypes=['.jpg', '.jpeg'], 
                down_sampling=down_sampling, categorical_Y=True, shuffle_data=True)

        # print("[get_data]:", pkl_fnames)
        return pkl_fnames



    def to_categorical(y, nb_classes):
        """ to_categorical.

        Convert class vector (integers from 0 to nb_classes)
        to binary class matrix, for use with categorical_crossentropy.

        Arguments:
            y: `array`. Class vector to convert.
            nb_classes: `int`. Total number of classes.

        """
        y = np.asarray(y, dtype='int32')
        if not nb_classes:
            nb_classes = np.max(y)+1
        Y = np.zeros((len(y), nb_classes))
        for i in range(len(y)):
            Y[i, y[i]] = 1.
        return Y


    # =====================
    #    SEQUENCES UTILS
    # =====================


    def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post',
                      truncating='post', value=0.):
        """ pad_sequences.

        Pad each sequence to the same length: the length of the longest sequence.
        If maxlen is provided, any sequence longer than maxlen is truncated to
        maxlen. Truncation happens off either the beginning or the end (default)
        of the sequence. Supports pre-padding and post-padding (default).

        Arguments:
            sequences: list of lists where each element is a sequence.
            maxlen: int, maximum length.
            dtype: type to cast the resulting sequence.
            padding: 'pre' or 'post', pad either before or after each sequence.
            truncating: 'pre' or 'post', remove values from sequences larger than
                maxlen either in the beginning or in the end of the sequence
            value: float, value to pad the sequences to the desired value.

        Returns:
            x: `numpy array` with dimensions (number_of_sequences, maxlen)

        Credits: From Keras `pad_sequences` function.
        """
        lengths = [len(s) for s in sequences]

        nb_samples = len(sequences)
        if maxlen is None:
            maxlen = np.max(lengths)

        x = (np.ones((nb_samples, maxlen)) * value).astype(dtype)
        for idx, s in enumerate(sequences):
            if len(s) == 0:
                continue  # empty list was found
            if truncating == 'pre':
                trunc = s[-maxlen:]
            elif truncating == 'post':
                trunc = s[:maxlen]
            else:
                raise ValueError("Truncating type '%s' not understood" % padding)

            if padding == 'post':
                x[idx, :len(trunc)] = trunc
            elif padding == 'pre':
                x[idx, -len(trunc):] = trunc
            else:
                raise ValueError("Padding type '%s' not understood" % padding)
        return x


    def string_to_semi_redundant_sequences(string, seq_maxlen=25, redun_step=3, char_idx=None):
        """ string_to_semi_redundant_sequences.

        Vectorize a string and returns parsed sequences and targets, along with
        the associated dictionary.

        Arguments:
            string: `str`. Lower-case text from input text file.
            seq_maxlen: `int`. Maximum length of a sequence. Default: 25.
            redun_step: `int`. Redundancy step. Default: 3.
            char_idx: 'dict'. A dictionary to convert chars to positions. Will be automatically generated if None

        Returns:
            A tuple: (inputs, targets, dictionary)
        """

        print("Vectorizing text...")

        if char_idx is None:
          char_idx = chars_to_dictionary(string)

        len_chars = len(char_idx)

        sequences = []
        next_chars = []
        for i in range(0, len(string) - seq_maxlen, redun_step):
            sequences.append(string[i: i + seq_maxlen])
            next_chars.append(string[i + seq_maxlen])

        X = np.zeros((len(sequences), seq_maxlen, len_chars), dtype=np.bool)
        Y = np.zeros((len(sequences), len_chars), dtype=np.bool)
        for i, seq in enumerate(sequences):
            for t, char in enumerate(seq):
                X[i, t, char_idx[char]] = 1
            Y[i, char_idx[next_chars[i]]] = 1

        print("Text total length: {:,}".format(len(string)))
        print("Distinct chars   : {:,}".format(len_chars))
        print("Total sequences  : {:,}".format(len(sequences)))

        return X, Y, char_idx


    def textfile_to_semi_redundant_sequences(path, seq_maxlen=25, redun_step=3,
                                             to_lower_case=False, pre_defined_char_idx=None):
        """ Vectorize Text file """
        text = open(path).read()
        if to_lower_case:
            text = text.lower()
        return string_to_semi_redundant_sequences(text, seq_maxlen, redun_step, pre_defined_char_idx)


    def chars_to_dictionary(string):
        """ Creates a dictionary char:integer for each unique character """
        chars = set(string)
        # sorted tries to keep a consistent dictionary, if you run a second time for the same char set
        char_idx = {c: i for i, c in enumerate(sorted(chars))}
        return char_idx


    def random_sequence_from_string(string, seq_maxlen):
        rand_index = random.randint(0, len(string) - seq_maxlen - 1)
        return string[rand_index: rand_index + seq_maxlen]


    def random_sequence_from_textfile(path, seq_maxlen):
        text = open(path).read()
        return random_sequence_from_string(text, seq_maxlen)

    try:
        from tensorflow.contrib.learn.python.learn.preprocessing.text import \
            VocabularyProcessor as _VocabularyProcessor
    except Exception:
        _VocabularyProcessor = object


    # Mirroring TensorFLow `VocabularyProcessor`
    class VocabularyProcessor(_VocabularyProcessor):
        """ Vocabulary Processor.

        Maps documents to sequences of word ids.

        Arguments:
            max_document_length: Maximum length of documents.
                if documents are longer, they will be trimmed, if shorter - padded.
            min_frequency: Minimum frequency of words in the vocabulary.
            vocabulary: CategoricalVocabulary object.

        Attributes:
            vocabulary_: CategoricalVocabulary object.

        """

        def __init__(self,
                     max_document_length,
                     min_frequency=0,
                     vocabulary=None,
                     tokenizer_fn=None):
            super(VocabularyProcessor, self).__init__(max_document_length,
                                                      min_frequency,
                                                      vocabulary,
                                                      tokenizer_fn)

        def fit(self, raw_documents, unused_y=None):
            """ fit.

            Learn a vocabulary dictionary of all tokens in the raw documents.

            Arguments:
                raw_documents: An iterable which yield either str or unicode.
                unused_y: to match fit format signature of estimators.

            Returns:
                self
            """
            return super(VocabularyProcessor, self).fit(raw_documents, unused_y)

        def fit_transform(self, raw_documents, unused_y=None):
            """ fit_transform.

            Learn the vocabulary dictionary and return indexies of words.

            Arguments:
                raw_documents: An iterable which yield either str or unicode.
                unused_y: to match fit_transform signature of estimators.

            Returns:
                X: iterable, [n_samples, max_document_length] Word-id matrix.
            """
            return super(VocabularyProcessor, self).fit_transform(raw_documents,
                                                                  unused_y)

        def transform(self, raw_documents):
            """ transform.

            Transform documents to word-id matrix.

            Convert words to ids with vocabulary fitted with fit or the one
            provided in the constructor.

            Arguments:
                raw_documents: An iterable which yield either str or unicode.

            Yields:
                X: iterable, [n_samples, max_document_length] Word-id matrix.
            """
            return super(VocabularyProcessor, self).transform(raw_documents)

        def reverse(self, documents):
            """ reverse.

            Reverses output of vocabulary mapping to words.

            Arguments:
                documents: iterable, list of class ids.

            Returns:
                Iterator over mapped in words documents.
            """
            return super(VocabularyProcessor, self).reverse(documents)

        def save(self, filename):
            """ save.

            Saves vocabulary processor into given file.

            Arguments:
                filename: Path to output file.
            """
            super(VocabularyProcessor, self).save(filename)

        @classmethod
        def restore(cls, filename):
            """ restore.

            Restores vocabulary processor from given file.

            Arguments:
                filename: Path to file to load from.

            Returns:
                VocabularyProcessor object.
            """
            return super(VocabularyProcessor, cls).restore(filename)


    # ===================
    #    IMAGES UTILS
    # ===================

    def build_hdf5_image_dataset(target_path, image_shape, output_path='dataset.h5',
                                 mode='file', categorical_labels=True,
                                 normalize=True, grayscale=False,
                                 files_extension=None, chunks=True):
        """ Build HDF5 Image Dataset.

        Build an HDF5 dataset by providing either a root folder or a plain text
        file with images path and class id.

        'folder' mode: Root folder should be arranged as follow:
        ```
        ROOT_FOLDER -> SUBFOLDER_0 (CLASS 0) -> CLASS0_IMG1.jpg
                                             -> CLASS0_IMG2.jpg
                                             -> ...
                    -> SUBFOLDER_1 (CLASS 1) -> CLASS1_IMG1.jpg
                                             -> ...
                    -> ...
        ```
        Note that if sub-folders are not integers from 0 to n_classes, an id will
        be assigned to each sub-folder following alphabetical order.

        'file' mode: Plain text file should be formatted as follow:
        ```
        /path/to/img1 class_id
        /path/to/img2 class_id
        /path/to/img3 class_id
        ```

        Examples:
            ```
            # Load path/class_id image file:
            dataset_file = 'my_dataset.txt'

            # Build a HDF5 dataset (only required once)
            from tflearn.data_utils import build_hdf5_image_dataset
            build_hdf5_image_dataset(dataset_file, image_shape=(128, 128),
                                     mode='file', output_path='dataset.h5',
                                     categorical_labels=True, normalize=True)

            # Load HDF5 dataset
            import h5py
            h5f = h5py.File('dataset.h5', 'w')
            X = h5f['X']
            Y = h5f['Y']

            # Build neural network and train
            network = ...
            model = DNN(network, ...)
            model.fit(X, Y)
            ```

        Arguments:
            target_path: `str`. Path of root folder or images plain text file.
            image_shape: `tuple (height, width)`. The images shape. Images that
                doesn't match that shape will be resized.
            output_path: `str`. The output path for the hdf5 dataset. Default:
                'dataset.h5'
            mode: `str` in ['file', 'folder']. The data source mode. 'folder'
                accepts a root folder with each of his sub-folder representing a
                class containing the images to classify.
                'file' accepts a single plain text file that contains every
                image path with their class id.
                Default: 'folder'.
            categorical_labels: `bool`. If True, labels are converted to binary
                vectors.
            normalize: `bool`. If True, normalize all pictures by dividing
                every image array by 255.
            grayscale: `bool`. If true, images are converted to grayscale.
            files_extension: `list of str`. A list of allowed image file
                extension, for example ['.jpg', '.jpeg', '.png']. If None,
                all files are allowed.
            chunks: `bool` or `list of int`. Whether to chunks the dataset or not.
                Additionaly, a specific shape for each chunk can be provided.

        """
        import h5py

        assert image_shape, "Image shape must be defined."
        assert image_shape[0] and image_shape[1], \
            "Image shape error. It must be a tuple of int: ('width', 'height')."
        assert mode in ['folder', 'file'], "`mode` arg must be 'folder' or 'file'"

        if mode == 'folder':
            images, labels = directory_to_samples(target_path,
                                                  flags=files_extension)
        else:
            with open(target_path, 'r') as f:
                images, labels = [], []
                for l in f.readlines():
                    l = l.strip('\n').split()
                    images.append(l[0])
                    labels.append(int(l[1]))

        n_classes = np.max(labels) + 1

        d_imgshape = (len(images), image_shape[0], image_shape[1], 3) \
            if not grayscale else (len(images), image_shape[0], image_shape[1])
        d_labelshape = (len(images), n_classes) \
            if categorical_labels else (len(images), )

        dataset = h5py.File(output_path, 'w')
        dataset.create_dataset('X', d_imgshape, chunks=chunks)
        dataset.create_dataset('Y', d_labelshape, chunks=chunks)

        for i in range(len(images)):
            img = load_image(images[i])
            width, height = img.size
            if width != image_shape[0] or height != image_shape[1]:
                img = resize_image(img, image_shape[0], image_shape[1])
            if grayscale:
                img = convert_color(img, 'L')
            elif img.mode == 'L':
                img = convert_color(img, 'RGB')

            img = pil_to_nparray(img)
            if normalize:
                img /= 255.
            dataset['X'][i] = img
            if categorical_labels:
                dataset['Y'][i] = to_categorical([labels[i]], n_classes)[0]
            else:
                dataset['Y'][i] = labels[i]

    def get_img_channel(image_path):
        """
        Load a image and return the channel of the image
        :param image_path:
        :return: the channel of the image
        """
        try:
            img = load_image(image_path)
            img = pil_to_nparray(img)
            channel = img.shape[2]
        except:
            channel = 1
        return channel

    def image_preloader(target_path, image_shape, mode='file', normalize=True,
                        grayscale=False, categorical_labels=True,
                        files_extension=None, filter_channel=False):
        """ Image PreLoader.

        Create a python array (`Preloader`) that loads images on the fly (from
        disk or url). There is two ways to provide image samples 'folder' or
        'file', see the specifications below.

        'folder' mode: Load images from disk, given a root folder. This folder
        should be arranged as follow:
        ```
        ROOT_FOLDER -> SUBFOLDER_0 (CLASS 0) -> CLASS0_IMG1.jpg
                                             -> CLASS0_IMG2.jpg
                                             -> ...
                    -> SUBFOLDER_1 (CLASS 1) -> CLASS1_IMG1.jpg
                                             -> ...
                    -> ...
        ```
        Note that if sub-folders are not integers from 0 to n_classes, an id will
        be assigned to each sub-folder following alphabetical order.

        'file' mode: A plain text file listing every image path and class id.
        This file should be formatted as follow:
        ```
        /path/to/img1 class_id
        /path/to/img2 class_id
        /path/to/img3 class_id
        ```

        Note that load images on the fly and convert is time inefficient,
        so you can instead use `build_hdf5_image_dataset` to build a HDF5 dataset
        that enable fast retrieval (this function takes similar arguments).

        Examples:
            ```
            # Load path/class_id image file:
            dataset_file = 'my_dataset.txt'

            # Build the preloader array, resize images to 128x128
            from tflearn.data_utils import image_preloader
            X, Y = image_preloader(dataset_file, image_shape=(128, 128),
                                   mode='file', categorical_labels=True,
                                   normalize=True)

            # Build neural network and train
            network = ...
            model = DNN(network, ...)
            model.fit(X, Y)
            ```

        Arguments:
            target_path: `str`. Path of root folder or images plain text file.
            image_shape: `tuple (height, width)`. The images shape. Images that
                doesn't match that shape will be resized.
            mode: `str` in ['file', 'folder']. The data source mode. 'folder'
                accepts a root folder with each of his sub-folder representing a
                class containing the images to classify.
                'file' accepts a single plain text file that contains every
                image path with their class id.
                Default: 'folder'.
            categorical_labels: `bool`. If True, labels are converted to binary
                vectors.
            normalize: `bool`. If True, normalize all pictures by dividing
                every image array by 255.
            grayscale: `bool`. If true, images are converted to grayscale.
            files_extension: `list of str`. A list of allowed image file
                extension, for example ['.jpg', '.jpeg', '.png']. If None,
                all files are allowed.
            filter_channel: `bool`. If true, images which the channel is not 3 should
                be filter.

        Returns:
            (X, Y): with X the images array and Y the labels array.

        """
        assert mode in ['folder', 'file']
        if mode == 'folder':
            images, labels = directory_to_samples(target_path,
                                                  flags=files_extension, filter_channel=filter_channel)
        else:
            with open(target_path, 'r') as f:
                images, labels = [], []
                for l in f.readlines():
                    l = l.strip('\n').split()
                    if not files_extension or any(flag in l(0) for flag in files_extension):
                        if filter_channel:
                            channel_cnt = get_img_channel(l[0])
                            if channel_cnt != 3:
                                continue
                        images.append(l[0])
                        labels.append(int(l[1]))

        n_classes = np.max(labels) + 1
        X = ImagePreloader(images, image_shape, normalize, grayscale)
        Y = LabelPreloader(labels, n_classes, categorical_labels)

        return X, Y


    def load_image(in_image):
        """ Load an image, returns PIL.Image. """
        img = Image.open(in_image)
        return img


    def resize_image(in_image, new_width, new_height, out_image=None,
                     resize_mode=Image.ANTIALIAS):
        """ Resize an image.

        Arguments:
            in_image: `PIL.Image`. The image to resize.
            new_width: `int`. The image new width.
            new_height: `int`. The image new height.
            out_image: `str`. If specified, save the image to the given path.
            resize_mode: `PIL.Image.mode`. The resizing mode.

        Returns:
            `PIL.Image`. The resize image.

        """
        img = in_image.resize((new_width, new_height), resize_mode)
        if out_image:
            img.save(out_image)
        return img


    def convert_color(in_image, mode):
        """ Convert image color with provided `mode`. """
        return in_image.convert(mode)


    def pil_to_nparray(pil_image):
        """ Convert a PIL.Image to numpy array. """
        pil_image.load()
        return np.asarray(pil_image, dtype="float32")


    def image_dirs_to_samples(dirname, label_size, resize=None, convert_gray=None, filetypes=None, 
                            down_sampling=None, categorical_Y=True, shuffle_data=True):
        #------------------------------------------------------
        def dump_samples(X, Y, fptr):
            # X, Y = np.array(X), np.array(Y)
            if categorical_Y:
                Y = to_categorical(Y, label_size) # First class is '0'
            fname = "images/%s/samples_%i.pkl.gz" % (dirname, fptr)
            pickle.dump((X, Y), gzip.open(fname, 'wb'))
            print("dumped samples shape %s, %s in %s @ %s" % (np.shape(X), np.shape(Y), fname, datetime.now()))
            return fname
            
        #------------------------------------------------------
        print("Starting to parse images...")
        if filetypes:
            if filetypes not in [list, tuple]: filetypes = list(filetypes)
        directory = "images/%s/jpg/" % dirname
        raw_samples, raw_targets = directory_to_samples(directory, flags=filetypes, down_sampling=down_sampling)
        if shuffle_data:
            raw_samples, raw_targets = shuffle(raw_samples, raw_targets)
            
        raw_samples, raw_targets = list(raw_samples), list(raw_targets)
        # generate sample caches
        pkl_files = []
        print("%i samples to go @ %s" % (len(raw_samples), datetime.now()))
        samples, targets, i, fptr = [], [], 0, 0
        while raw_samples:
            s, t = raw_samples.pop(0), raw_targets.pop(0)
            try:
                img = load_image(s)
            except Exception as e:
                print(e)
                continue
            if resize:
                img = resize_image(img, resize[0], resize[1])
            if convert_gray:
                img = convert_color(img, 'L')
            img = pil_to_nparray(img)
            # ensure image_channel = 3
            if (len(img.shape) != 3) or (img.shape[2] != 3): continue
            # normalize
            img /= 255.
            samples.append(img)
            targets.append(t)
            i += 1
            if (i % 500 == 0): 
                print("%i img loaded @ %s" % (i, datetime.now()))
            # if (i % 5000 == 0):
                fname = dump_samples(samples, targets, fptr)
                pkl_files.append(fname)
                samples, targets, fptr = [], [], (fptr+1)
        if samples:
            fname = dump_samples(samples, targets, fptr)
            pkl_files.append(fname)
            samples, targets, fptr = [], [], (fptr+1)
        print("Parsing Done, %i samples, %i pkl files @ %s" % (i, fptr, datetime.now()))
        return pkl_files



    # def build_image_dataset_from_dir(dirname,
    #                                  # directory,
    #                                  # dataset_fname,
    #                                  resize=None, convert_gray=None,
    #                                  filetypes=None, shuffle_data=False,
    #                                  categorical_Y=False,
    #                                  down_sampling=None,
    #                                  ):
        
    #     dataset_cache = [f for f in os.listdir("images/%s/" % dirname) if "samples_" in f]
    #     if not dataset_cache:
    #         X, Y = image_dirs_to_samples("images/%s/jpg/" % dirname, resize, convert_gray, filetypes, down_sampling=down_sampling, 
    #             dataset_fname=dataset_fname, categorical_Y=categorical_Y, shuffle_data=shuffle_data)
    #     return dataset_cache


    def random_flip_leftright(x):
        if bool(random.getrandbits(1)):
            return np.fliplr(x)
        else:
            return x


    def random_flip_updown(x):
        if bool(random.getrandbits(1)):
            return np.flipud(x)
        else:
            return x


    # ==================
    #     DATA UTILS
    # ==================


    def shuffle(*arrs):
        """ shuffle.

        Shuffle given arrays at unison, along first axis.

        Arguments:
            *arrs: Each array to shuffle at unison.

        Returns:
            Tuple of shuffled arrays.

        """
        arrs = list(arrs)
        for i, arr in enumerate(arrs):
            assert len(arrs[0]) == len(arrs[i])
            arrs[i] = np.array(arr)
        p = np.random.permutation(len(arrs[0]))
        return tuple(arr[p] for arr in arrs)


    def samplewise_zero_center(X):
        """ samplewise_zero_center.

        Zero center each sample by subtracting it by its mean.

        Arguments:
            X: `array`. The batch of samples to center.

        Returns:
            A numpy array with same shape as input.

        """
        for i in range(len(X)):
            X[i] -= np.mean(X[i], axis=1, keepdims=True)
        return X


    def samplewise_std_normalization(X):
        """ samplewise_std_normalization.

        Scale each sample with its standard deviation.

        Arguments:
            X: `array`. The batch of samples to scale.

        Returns:
            A numpy array with same shape as input.

        """
        for i in range(len(X)):
            X[i] /= (np.std(X[i], axis=1, keepdims=True) + _EPSILON)
        return X


    def featurewise_zero_center(X, mean=None):
        """ featurewise_zero_center.

        Zero center every sample with specified mean. If not specified, the mean
        is evaluated over all samples.

        Arguments:
            X: `array`. The batch of samples to center.
            mean: `float`. The mean to use for zero centering. If not specified, it
                will be evaluated on provided data.

        Returns:
            A numpy array with same shape as input. Or a tuple (array, mean) if no
            mean value was specified.

        """
        if mean is None:
            mean = np.mean(X, axis=0)
            return X - mean, mean
        else:
            return X - mean


    def featurewise_std_normalization(X, std=None):
        """ featurewise_std_normalization.

        Scale each sample by the specified standard deviation. If no std
        specified, std is evaluated over all samples data.

        Arguments:
            X: `array`. The batch of samples to scale.
            std: `float`. The std to use for scaling data. If not specified, it
                will be evaluated over the provided data.

        Returns:
            A numpy array with same shape as input. Or a tuple (array, std) if no
            std value was specified.

        """
        if std is None:
            std = np.std(X, axis=0)
            return X / std, std
        else:
            return X / std


    def directory_to_samples(directory, flags=None, filter_channel=False, down_sampling=None):
        """ Read a directory, and list all subdirectories files as class sample """
        samples = []
        targets = []
        label = 0
        # try: # Python 2
        #     classes = sorted(os.walk(directory).next()[1])
        #     # classes = sorted(next(os.walk(directory))[1])
        # except Exception: # Python 3
        print(directory)
        classes = os.walk(directory).__next__()[1]
        # keep category numerical order, if all categories are digits
        if all([n.isdigit() for n in classes]):
            classes = sorted(classes, key=lambda v: int(v))
        else:
            classes = sorted(classes)
        for c in classes:
            c_dir = os.path.join(directory, c)
            # try: # Python 2
            #     walk = os.walk(c_dir).next()
            #     # walk = next(os.walk(c_dir))
            # except Exception: # Python 3
            walk = os.walk(c_dir).__next__()
            cands = walk[2]
            # if down sampling, discard this sample with probability = 1-down_sampling 
            if down_sampling and (c in down_sampling):
                if down_sampling[c] > 1:
                    l = down_sampling[c]
                else:
                    l = max(50, int(len(cands) * down_sampling[c]))
                cands = cands[:l]
            for sample in cands:
                if not flags or any(flag in sample for flag in flags):
                    if filter_channel:
                        channel_cnt = get_img_channel(os.path.join(c_dir, sample))
                        if channel_cnt != 3:
                            continue
                    samples.append(os.path.join(c_dir, sample))
                    targets.append(label)
            print("samples dim %s, target dim %s" % (np.shape(samples), np.shape(targets)))
            label += 1
        return samples, targets



    def load_csv(filepath, target_column=-1, columns_to_ignore=None,
                 has_header=True, categorical_labels=False, n_classes=None):
        """ load_csv.

        Load data from a CSV file. By default the labels are considered to be the
        last column, but it can be changed by filling 'target_column' parameter.

        Arguments:
            filepath: `str`. The csv file path.
            target_column: The id of the column representing the labels.
                Default: -1 (The last column).
            columns_to_ignore: `list of int`. A list of columns index to ignore.
            has_header: `bool`. Whether the csv file has a header or not.
            categorical_labels: `bool`. If True, labels are returned as binary
                vectors (to be used with 'categorical_crossentropy').
            n_classes: `int`. Total number of class (needed if
                categorical_labels is True).

        Returns:
            A tuple (data, target).

        """

        from tensorflow.python.platform import gfile
        with gfile.Open(filepath) as csv_file:
            data_file = csv.reader(csv_file)
            if not columns_to_ignore:
                columns_to_ignore = []
            if has_header:
                header = next(data_file)
            data, target = [], []
            # Fix column to ignore ids after removing target_column
            for i, c in enumerate(columns_to_ignore):
                if c > target_column:
                    columns_to_ignore[i] -= 1
            for i, d in enumerate(data_file):
                target.append(d.pop(target_column))
                data.append([_d for j, _d in enumerate(d) if j not in columns_to_ignore])
            if categorical_labels:
                assert isinstance(n_classes, int), "n_classes not specified!"
                target = to_categorical(target, n_classes)
            return data, target


    class Preloader(object):
        def __init__(self, array, function):
            self.array = array
            self.function = function

        def __getitem__(self, id):
            if type(id) in [list, np.ndarray]:
                return [self.function(self.array[i]) for i in id]
            elif isinstance(id, slice):
                return [self.function(arr) for arr in self.array[id]]
            else:
                return self.function(self.array[id])

        def __len__(self):
            return len(self.array)


    class ImagePreloader(Preloader):
        def __init__(self, array, image_shape, normalize=True, grayscale=False):
            fn = lambda x: self.preload(x, image_shape, normalize, grayscale)
            super(ImagePreloader, self).__init__(array, fn)

        def preload(self, path, image_shape, normalize=True, grayscale=False):
            img = load_image(path)
            width, height = img.size
            if width != image_shape[0] or height != image_shape[1]:
                img = resize_image(img, image_shape[0], image_shape[1])
            if grayscale:
                img = convert_color(img, 'L')
            img = pil_to_nparray(img)
            if normalize:
                img /= 255.
            return img


    class LabelPreloader(Preloader):
        def __init__(self, array, n_class=None, categorical_label=True):
            fn = lambda x: self.preload(x, n_class, categorical_label)
            super(LabelPreloader, self).__init__(array, fn)

        def preload(self, label, n_class, categorical_label):
            if categorical_label:
                #TODO: inspect assert bug
                #assert isinstance(n_class, int)
                return to_categorical([label], n_class)[0]
            else:
                return label


    def get_max(X):
        return np.max(X)


    def get_mean(X):
        return np.mean(X)


    def get_std(X):
        return np.std(X)


## 在flowers数据上进行训练

先将数据download到本地，数据地址如下：
http://www.robots.ox.ac.uk/~vgg/data/flowers/17/17flowers.tgz

In [None]:
from __future__ import division, print_function, absolute_import

from datetime import datetime

import pickle, gzip
import numpy as np
import tflearn.datasets.oxflower17 as oxflower17

hps = HParams(img_size=227,
             label_size=17,
             gpu_memory_fraction=0.4,
             model_name="17flowers",
             gpu_usage=0.5)
gnet = GoogLeNet(hps=hps) 
pkl_files = gnet.get_data(dirname="17flowers", down_sampling={str(n): 10000 for n in range(13)})

epoch = 0
while True:
    for f in pkl_files:
        X, Y = pickle.load(gzip.open(f, 'rb'))
        gnet.fit(X, Y, n_epoch=10)
        print('[pkl_files] done with %s @ %s' % (f, datetime.now()))
    epoch += 1
    print("[Finish] all pkl_files been trained %i times." % epoch)
    


## 训练结果

训练过程如下图所示：
![](img/training_googlenet.PNG)

训练过程中precision平稳升高，loss平稳降低，在step为16k时收敛，precision达到0.97，loss降至0.01。

## 参考文献
<b>相关论文</b>

Going Deeper with Convolutions
https://arxiv.org/abs/1409.4842

<b>参考代码</b>
https://github.com/Marsan-Ma/imgrec

