In [1]:
import os
import tensorflow as tf
import numpy as np
import math
import timeit
#import matplotlib.pyplot as plt

#%matplotlib inline

In [2]:
USE_GPU = True

if USE_GPU:
    device = '/device:GPU:0'
else:
    device = '/cpu:0'

# Constant to control how often we print when training models
print_every = 100

print('Using device: ', device)

Using device:  /device:GPU:0


In [3]:
#important block
def flatten(x):
    """    
    Input:
    - TensorFlow Tensor of shape (N, D1, ..., DM)
    
    Output:
    - TensorFlow Tensor of shape (N, D1 * ... * DM)
    """
    N = tf.shape(x)[0]
    return tf.reshape(x, (N, -1))
    
def fc(x, w):

    x = flatten(x)   # Flatten the input; now x has shape (N, D)
    ly=tf.matmul(x, w)
    out = tf.nn.relu(ly) # Hidden layer: h has shape (N, H)
    return out

def convlayer(x, params):
    conv_w, conv_b = params
    x_conv = tf.nn.conv2d(x, conv_w, [1,1,1,1], "SAME") 
    x_conv_b = tf.nn.bias_add(x_conv,conv_b) 
    out = tf.nn.relu(x_conv_b)
    return out

def three_layer_convnet(x, params):
    conv_w1, conv_b1, conv_w2, conv_b2, fc_w, fc_b = params
    scores = None
    ly1=convlayer(x,[conv_w1,conv_b1])
    ly2=convlayer(ly1,[conv_w2,conv_b2])
    x_flat = flatten(ly2)
    scores = tf.matmul(x_flat, fc_w) + fc_b

    return ly1,ly2,scores

def centralnet(h1,h2,params):
    w1,w2=params
    h1=flatten(h1) #h1,h2 of shape(N,D)
    h2=flatten(h2)
    hc=tf.matmul(h1,w1)+tf.matmul(h2,w2)#Hidden layer:hc has shape (N,H)
    return hc

def conv_centralnet(x1,x2,params):
    feed1,feed2,feedc=params
    #convnet
    h11,h21,out1 = three_layer_convnet(x1, feed1)
    h12,h22,out2 = three_layer_convnet(x2, feed2)

    #centralnet
    c11,c12,c21,c22,cc1=feedc
    hc1=centralnet(h11,h12,[c11,c12])
    hc2=centralnet(h21,h22,[c21,c22])+tf.matmul(hc1,cc1)

    #final
    scores= out1+out2+hc2
    return scores


In [4]:
def conv_centralnet_test():
    tf.reset_default_graph()

    with tf.device(device):
        #conv1
        x1 = tf.placeholder(tf.float32)
        conv1_w1 = tf.zeros((5, 5, 3, 6))
        conv1_b1 = tf.zeros((6,))
        conv1_w2 = tf.zeros((3, 3, 6, 9))
        conv1_b2 = tf.zeros((9,))
        fc1_w = tf.zeros((32 * 32 * 9, 10))
        fc1_b = tf.zeros((10,))
        feed1 = [conv1_w1, conv1_b1, conv1_w2, conv1_b2, fc1_w, fc1_b]
        
        #conv2
        
        x2 = tf.placeholder(tf.float32)
        conv2_w1 = tf.zeros((5, 5, 3, 6))
        conv2_b1 = tf.zeros((6,))
        conv2_w2 = tf.zeros((3, 3, 6, 9))
        conv2_b2 = tf.zeros((9,))
        fc2_w = tf.zeros((32 * 32 * 9, 10))
        fc2_b = tf.zeros((10,))
        feed2 = [conv2_w1, conv2_b1, conv2_w2, conv2_b2, fc2_w, fc2_b]

        
        #centralnet
        c11 = tf.zeros((32*32*6, 4))
        c12 = tf.zeros((32*32*6, 4))
        c21 = tf.zeros((32*32*9, 10))
        c22 = tf.zeros((32*32*9, 10))
        cc1 = tf.zeros((4, 10))
 
        feedc=[c11,c12,c21,c22,cc1]

        #final
        
        params=[feed1,feed2,feedc]
        scores=conv_centralnet(x1,x2,params)
    # Inputs to convolutional layers are 4-dimensional arrays with shape
    # [batch_size, height, width, channels]
    x1_np = np.zeros((64, 32, 32, 3))
    x2_np = np.zeros((64, 32, 32, 3))   
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        scores_np = sess.run(scores, feed_dict={x1: x1_np,x2:x2_np})
        print('scores_np has shape: ', scores_np.shape)

with tf.device('/cpu:0'):
    conv_centralnet_test()

scores_np has shape:  (64, 10)


In [5]:
#准备数据

def load_cifar10(num_training=49000, num_validation=1000, num_test=10000):
    """
    Fetch the CIFAR-10 dataset from the web and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.
    """
    # Load the raw CIFAR-10 dataset and use appropriate data types and shapes
    cifar10 = tf.keras.datasets.cifar10.load_data()
    (X_train, y_train), (X_test, y_test) = cifar10
    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int32).flatten()
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int32).flatten()

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean pixel and divide by std
    mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
    std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
    X_train = (X_train - mean_pixel) / std_pixel
    X_val = (X_val - mean_pixel) / std_pixel
    X_test = (X_test - mean_pixel) / std_pixel

    return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
NHW = (0, 1, 2)
X_train, y_train, X_val, y_val, X_test, y_test = load_cifar10()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape, y_train.dtype)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        """
        Construct a Dataset object to iterate over data X and labels y
        
        Inputs:
        - X: Numpy array of data, of any shape
        - y: Numpy array of labels, of any shape but with y.shape[0] == X.shape[0]
        - batch_size: Integer giving number of elements per minibatch
        - shuffle: (optional) Boolean, whether to shuffle the data on each epoch
        """
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))


train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=64, shuffle=False)
test_dset = Dataset(X_test, y_test, batch_size=64)

# We can iterate through a dataset like this:
for t, (x, y) in enumerate(train_dset):
    print(t, x.shape, y.shape)
    if t > 5: break

Train data shape:  (49000, 32, 32, 3)
Train labels shape:  (49000,) int32
Validation data shape:  (1000, 32, 32, 3)
Validation labels shape:  (1000,)
Test data shape:  (10000, 32, 32, 3)
Test labels shape:  (10000,)
0 (64, 32, 32, 3) (64,)
1 (64, 32, 32, 3) (64,)
2 (64, 32, 32, 3) (64,)
3 (64, 32, 32, 3) (64,)
4 (64, 32, 32, 3) (64,)
5 (64, 32, 32, 3) (64,)
6 (64, 32, 32, 3) (64,)


In [6]:
def training_step(scores, y, params, learning_rate):
    """
    Set up the part of the computational graph which makes a training step.

    Inputs:
    - scores: TensorFlow Tensor of shape (N, C) giving classification scores for
      the model.
    - y: TensorFlow Tensor of shape (N,) giving ground-truth labels for scores;
      y[i] == c means that c is the correct class for scores[i].
    - params: List of TensorFlow Tensors giving the weights of the model
    - learning_rate: Python scalar giving the learning rate to use for gradient
      descent step.
      
    Returns:
    - loss: A TensorFlow Tensor of shape () (scalar) giving the loss for this
      batch of data; evaluating the loss also performs a gradient descent step
      on params (see above).
    """
    #展开列表
    feed= [i for k in params for i in k]
    # First compute the loss; the first line gives losses for each example in
    # the minibatch, and the second averages the losses across the batch
    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=scores)
    loss = tf.reduce_mean(losses)

    # Compute the gradient of the loss with respect to each parameter of the the
    # network. This is a very magical function call: TensorFlow internally
    # traverses the computational graph starting at loss backward to each element
    # of params, and uses backpropagation to figure out how to compute gradients;
    # it then adds new operations to the computational graph which compute the
    # requested gradients, and returns a list of TensorFlow Tensors that will
    # contain the requested gradients when evaluated.
    grad_params = tf.gradients(loss, feed)
    
    # Make a gradient descent step on all of the model parameters.
    new_weights = []   
    for w, grad_w in zip(feed, grad_params):
        new_w = tf.assign_sub(w, learning_rate * grad_w)
        new_weights.append(new_w)

    # Insert a control dependency so that evaluting the loss causes a weight
    # update to happen; see the discussion above.
    with tf.control_dependencies(new_weights):
        return tf.identity(loss)

In [7]:
def train_part2(model_fn, init_fn, learning_rate):

    # First clear the default graph
    tf.reset_default_graph()
    is_training = tf.placeholder(tf.bool, name='is_training')
    # Set up the computational graph for performing forward and backward passes,
    # and weight updates.
    with tf.device(device):
        # Set up placeholders for the data and labels
        x1 = tf.placeholder(tf.float32, [None, 32, 32, 3])
        x2 = tf.placeholder(tf.float32, [None, 32, 32, 3])
        
        y = tf.placeholder(tf.int32, [None])
        params = init_fn()           # Initialize the model parameters
        scores = model_fn(x1,x2, params) # Forward pass of the model
        loss = training_step(scores, y, params, learning_rate)

    # Now we actually run the graph many times using the training data
    with tf.Session() as sess:
        # Initialize variables that will live in the graph
        sess.run(tf.global_variables_initializer())
        for t, (x_np, y_np) in enumerate(train_dset):
            # Run the graph on a batch of training data; recall that asking
            # TensorFlow to evaluate loss will cause an SGD step to happen.
            feed_dict = {x1: x_np,x2:x_np, y: y_np}
            loss_np = sess.run(loss, feed_dict=feed_dict)
            
            # Periodically print the loss and check accuracy on the val set
            if t % print_every == 0:
                print('Iteration %d, loss = %.4f' % (t, loss_np))
                check_accuracy(sess, val_dset, x1,x2, scores, is_training)

In [8]:
def check_accuracy(sess, dset, x1,x2, scores, is_training=None):
    """
    Check accuracy on a classification model.
    
    Inputs:
    - sess: A TensorFlow Session that will be used to run the graph
    - dset: A Dataset object on which to check accuracy
    - x: A TensorFlow placeholder Tensor where input images should be fed
    - scores: A TensorFlow Tensor representing the scores output from the
      model; this is the Tensor we will ask TensorFlow to evaluate.
      
    Returns: Nothing, but prints the accuracy of the model
    """
    num_correct, num_samples = 0, 0
    for x_batch, y_batch in dset:
        feed_dict = {x1: x_batch,x2:x_batch, is_training: 0}
        scores_np = sess.run(scores, feed_dict=feed_dict)
        y_pred = scores_np.argmax(axis=1)
        num_samples += x_batch.shape[0]
        num_correct += (y_pred == y_batch).sum()
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f%%)' % (num_correct, num_samples, 100 * acc))

In [9]:
def kaiming_normal(shape):
    if len(shape) == 2:
        fan_in, fan_out = shape[0], shape[1]
    elif len(shape) == 4:
        fan_in, fan_out = np.prod(shape[:3]), shape[3]
    return tf.random_normal(shape) * np.sqrt(2.0 / fan_in)

In [10]:
def conv_centralnet_init():
    """
    Initialize the weights of a Three-Layer ConvNet, for use with the
    three_layer_convnet function defined above.
    
    Inputs: None
    
    Returns a list containing:
    - conv_w1: TensorFlow Variable giving weights for the first conv layer
    - conv_b1: TensorFlow Variable giving biases for the first conv layer
    - conv_w2: TensorFlow Variable giving weights for the second conv layer
    - conv_b2: TensorFlow Variable giving biases for the second conv layer
    - fc_w: TensorFlow Variable giving weights for the fully-connected layer
    - fc_b: TensorFlow Variable giving biases for the fully-connected layer
    """
    params = None
    #conv1
    conv1_w1 = tf.Variable(kaiming_normal([5,5,3,32]))
    conv1_b1 = tf.Variable(tf.zeros(32))
    conv1_w2 = tf.Variable(kaiming_normal([3,3,32,16]))
    conv1_b2 = tf.Variable(tf.zeros(16))
    fc1_w = tf.Variable(kaiming_normal((16 * 32 * 32, 10)))
    fc1_b = tf.Variable(tf.zeros(10))
    #conv2
    conv2_w1 = tf.Variable(kaiming_normal([5,5,3,32]))
    conv2_b1 = tf.Variable(tf.zeros(32))
    conv2_w2 = tf.Variable(kaiming_normal([3,3,32,16]))
    conv2_b2 = tf.Variable(tf.zeros(16))
    fc2_w = tf.Variable(kaiming_normal((16 * 32 * 32, 10)))
    fc2_b = tf.Variable(tf.zeros(10))
    #centralnet
    c11 = tf.Variable(kaiming_normal((32 * 32 * 32, 4)))
    c12 = tf.Variable(kaiming_normal((32 * 32 * 32, 4)))
    c21 = tf.Variable(kaiming_normal((16 * 32 * 32, 10)))
    c22 = tf.Variable(kaiming_normal((16 * 32 * 32, 10)))
    cc1 = tf.Variable(kaiming_normal((4, 10)))
    
    feed1 = [conv1_w1, conv1_b1, conv1_w2, conv1_b2, fc1_w, fc1_b]
    feed2 = [conv2_w1, conv2_b1, conv2_w2, conv2_b2, fc2_w, fc2_b]
    feedc=[c11,c12,c21,c22,cc1]
    params=[feed1,feed2,feedc]
    
    return params


In [11]:
conv_centralnet_init()

[[<tf.Variable 'Variable:0' shape=(5, 5, 3, 32) dtype=float32_ref>,
  <tf.Variable 'Variable_1:0' shape=(32,) dtype=float32_ref>,
  <tf.Variable 'Variable_2:0' shape=(3, 3, 32, 16) dtype=float32_ref>,
  <tf.Variable 'Variable_3:0' shape=(16,) dtype=float32_ref>,
  <tf.Variable 'Variable_4:0' shape=(16384, 10) dtype=float32_ref>,
  <tf.Variable 'Variable_5:0' shape=(10,) dtype=float32_ref>],
 [<tf.Variable 'Variable_6:0' shape=(5, 5, 3, 32) dtype=float32_ref>,
  <tf.Variable 'Variable_7:0' shape=(32,) dtype=float32_ref>,
  <tf.Variable 'Variable_8:0' shape=(3, 3, 32, 16) dtype=float32_ref>,
  <tf.Variable 'Variable_9:0' shape=(16,) dtype=float32_ref>,
  <tf.Variable 'Variable_10:0' shape=(16384, 10) dtype=float32_ref>,
  <tf.Variable 'Variable_11:0' shape=(10,) dtype=float32_ref>],
 [<tf.Variable 'Variable_12:0' shape=(32768, 4) dtype=float32_ref>,
  <tf.Variable 'Variable_13:0' shape=(32768, 4) dtype=float32_ref>,
  <tf.Variable 'Variable_14:0' shape=(16384, 10) dtype=float32_ref>,
  <

In [12]:
learning_rate = 3e-5
train_part2(conv_centralnet, conv_centralnet_init, learning_rate)

Iteration 0, loss = 5.1540
Got 103 / 1000 correct (10.30%)
Iteration 100, loss = 3.3509
Got 156 / 1000 correct (15.60%)
Iteration 200, loss = 3.1254
Got 171 / 1000 correct (17.10%)
Iteration 300, loss = 3.1145
Got 190 / 1000 correct (19.00%)
Iteration 400, loss = 2.7905
Got 204 / 1000 correct (20.40%)
Iteration 500, loss = 3.0036
Got 217 / 1000 correct (21.70%)
Iteration 600, loss = 3.0157
Got 234 / 1000 correct (23.40%)
Iteration 700, loss = 2.5265
Got 238 / 1000 correct (23.80%)
