# AlexNet
1. 2012 年ILSVRC(ImageNet Large Scale Visual Recognition Challenge) 冠军（top-5错误率：16.4%，使用额外数据可达到15.3%，8层神经网络）
2. Hinton的学生Alex Krizhevsky提出,可以算是LeNet的一种更深更宽的版本
3. 新技术点：

    1). 成功使用ReLU作为CNN的激活函数，并验证其效果在较深的网络超过了Sigmoid;
    
    2). 将Dropout技术实用化；
    
    3). 在CNN中使用重叠的最大池化（最大：tf.nn.max_pool、重叠：步长小于池化核尺寸）,此前普遍使用平均池化，这会导致模糊化；
    
    4). 提出LRN层；(http://yeephycho.github.io/2016/08/03/Normalizations-in-neural-networks/)
    
    5). 使用CUDA加速深度卷积网络的训练；
    
    6). 数据增强，大大减轻了过拟合，提高模型的泛化能力。

没有添加最后三层全连接层，完整代码参考下面链接：
http://www.cs.toronto.edu/~guerzhoy/tf_alexnet/

In [1]:
from datetime import datetime
import math
import time
import tensorflow as tf

batch_size为32,总共测试100个batch数据

In [2]:
batch_size=32
num_batches=100

显示网络每一层的结构，显示各层输出tensor的尺寸

In [3]:
def print_activations(t):
    print(t.op.name, ' ', t.get_shape().as_list())

# 一、定义ＡlexNet 的网络结构（inference）

In [4]:
def inference(images):
    parameters = []
    # conv1
    with tf.name_scope('conv1') as scope:
        kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 64], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
                             trainable=True, name='biases')
        bias = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(bias, name=scope)
        print_activations(conv1)
        parameters += [kernel, biases]


  # pool1
    lrn1 = tf.nn.lrn(conv1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='lrn1')
    pool1 = tf.nn.max_pool(lrn1,
                           ksize=[1, 3, 3, 1],
                           strides=[1, 2, 2, 1],
                           padding='VALID',
                           name='pool1')
    print_activations(pool1)

  # conv2
    with tf.name_scope('conv2') as scope:
        kernel = tf.Variable(tf.truncated_normal([5, 5, 64, 192], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[192], dtype=tf.float32),
                             trainable=True, name='biases')
        bias = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(bias, name=scope)
        parameters += [kernel, biases]
    print_activations(conv2)

  # pool2
    lrn2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='lrn2')
    pool2 = tf.nn.max_pool(lrn2,
                           ksize=[1, 3, 3, 1],
                           strides=[1, 2, 2, 1],
                           padding='VALID',
                           name='pool2')
    print_activations(pool2)

  # conv3
    with tf.name_scope('conv3') as scope:
        kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384],
                                                 dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),
                             trainable=True, name='biases')
        bias = tf.nn.bias_add(conv, biases)
        conv3 = tf.nn.relu(bias, name=scope)
        parameters += [kernel, biases]
        print_activations(conv3)

  # conv4
    with tf.name_scope('conv4') as scope:
        kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256],
                                                 dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                             trainable=True, name='biases')
        bias = tf.nn.bias_add(conv, biases)
        conv4 = tf.nn.relu(bias, name=scope)
        parameters += [kernel, biases]
        print_activations(conv4)

  # conv5
    with tf.name_scope('conv5') as scope:
        kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256],
                                                 dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                             trainable=True, name='biases')
        bias = tf.nn.bias_add(conv, biases)
        conv5 = tf.nn.relu(bias, name=scope)
        parameters += [kernel, biases]
        print_activations(conv5)

  # pool5
    pool5 = tf.nn.max_pool(conv5,
                           ksize=[1, 3, 3, 1],
                           strides=[1, 2, 2, 1],
                           padding='VALID',
                           name='pool5')
    print_activations(pool5)

    return pool5, parameters

# 二、计算Ｆorward和Ｆorword-backward执行时间的函数

In [7]:
def time_tensorflow_run(session, target, info_string):
#  """Run the computation to obtain the target tensor and print timing stats.
#
#  Args:
#    session: the TensorFlow session to run the computation under.
#    target: the target Tensor that is passed to the session's run() function.
#    info_string: a string summarizing this run, to be printed with the stats.
#
#  Returns:
#    None
#  """
    num_steps_burn_in = 10
    total_duration = 0.0
    total_duration_squared = 0.0
    for i in range(num_batches + num_steps_burn_in):
        start_time = time.time()
        _ = session.run(target)
        duration = time.time() - start_time
        if i >= num_steps_burn_in:
            if not i % 10:
                print ('%s: step %d, duration = %.3f' %
                       (datetime.now(), i - num_steps_burn_in, duration))
            total_duration += duration
            total_duration_squared += duration * duration
    mn = total_duration / num_batches
    vr = total_duration_squared / num_batches - mn * mn
    sd = math.sqrt(vr)
    print ('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
           (datetime.now(), info_string, num_batches, mn, sd))

# 三、定义模拟网络执行过程

In [8]:
def run_benchmark():
#  """Run the benchmark on AlexNet."""
    with tf.Graph().as_default():
    # Generate some dummy images.
        image_size = 224
    # Note that our padding definition is slightly different the cuda-convnet.
    # In order to force the model to start with the same activations sizes,
    # we add 3 to the image_size and employ VALID padding above.
        images = tf.Variable(tf.random_normal([batch_size,
                                           image_size,
                                           image_size, 3],
                                          dtype=tf.float32,
                                          stddev=1e-1))

    # Build a Graph that computes the logits predictions from the
    # inference model.
        pool5, parameters = inference(images)

    # Build an initialization operation.
        init = tf.global_variables_initializer()

    # Start running operations on the Graph.
        config = tf.ConfigProto()
        config.gpu_options.allocator_type = 'BFC'
        sess = tf.Session(config=config)
        sess.run(init)

    # Run the forward benchmark.
        time_tensorflow_run(sess, pool5, "Forward")

    # Add a simple objective so we can calculate the backward pass.
    #objective = sum(t ** 2) / 2
        objective = tf.nn.l2_loss(pool5)
    # Compute the gradient with respect to all the parameters.
        grad = tf.gradients(objective, parameters)
    # Run the backward benchmark.
        time_tensorflow_run(sess, grad, "Forward-backward")

# 四、执行

In [9]:
run_benchmark()

conv1   [32, 56, 56, 64]
pool1   [32, 27, 27, 64]
conv2   [32, 27, 27, 192]
pool2   [32, 13, 13, 192]
conv3   [32, 13, 13, 384]
conv4   [32, 13, 13, 256]
conv5   [32, 13, 13, 256]
pool5   [32, 6, 6, 256]
2017-06-12 17:44:42.808367: step 0, duration = 0.090
2017-06-12 17:44:43.743112: step 10, duration = 0.117
2017-06-12 17:44:44.697359: step 20, duration = 0.091
2017-06-12 17:44:45.611486: step 30, duration = 0.090
2017-06-12 17:44:46.533292: step 40, duration = 0.095
2017-06-12 17:44:47.453792: step 50, duration = 0.091
2017-06-12 17:44:48.378840: step 60, duration = 0.091
2017-06-12 17:44:49.343948: step 70, duration = 0.103
2017-06-12 17:44:50.298895: step 80, duration = 0.099
2017-06-12 17:44:51.231422: step 90, duration = 0.091
2017-06-12 17:44:52.099861: Forward across 100 steps, 0.094 +/- 0.006 sec / batch
2017-06-12 17:44:56.465665: step 0, duration = 0.313
2017-06-12 17:44:59.651308: step 10, duration = 0.313
2017-06-12 17:45:02.765514: step 20, duration = 0.311
2017-06-12 17: