In [1]:
import os
import tensorflow as tf
import numpy as np
import math
import timeit
#import matplotlib.pyplot as plt

#%matplotlib inline

In [2]:
USE_GPU = False

if USE_GPU:
    device = '/device:GPU:0'
else:
    device = '/cpu:0'

# Constant to control how often we print when training models
print_every = 100

print('Using device: ', device)

Using device:  /cpu:0


In [3]:
#准备数据

def load_cifar10(num_training=49000, num_validation=1000, num_test=10000):
    """
    Fetch the CIFAR-10 dataset from the web and perform preprocessing to prepare
    it for the two-layer neural net classifier. These are the same steps as
    we used for the SVM, but condensed to a single function.
    """
    # Load the raw CIFAR-10 dataset and use appropriate data types and shapes
    cifar10 = tf.keras.datasets.cifar10.load_data()
    (X_train, y_train), (X_test, y_test) = cifar10
    X_train = np.asarray(X_train, dtype=np.float32)
    y_train = np.asarray(y_train, dtype=np.int32).flatten()
    X_test = np.asarray(X_test, dtype=np.float32)
    y_test = np.asarray(y_test, dtype=np.int32).flatten()

    # Subsample the data
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]
    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # Normalize the data: subtract the mean pixel and divide by std
    mean_pixel = X_train.mean(axis=(0, 1, 2), keepdims=True)
    std_pixel = X_train.std(axis=(0, 1, 2), keepdims=True)
    X_train = (X_train - mean_pixel) / std_pixel
    X_val = (X_val - mean_pixel) / std_pixel
    X_test = (X_test - mean_pixel) / std_pixel

    return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
NHW = (0, 1, 2)
X_train, y_train, X_val, y_val, X_test, y_test = load_cifar10()
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape, y_train.dtype)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)

class Dataset(object):
    def __init__(self, X, y, batch_size, shuffle=False):
        """
        Construct a Dataset object to iterate over data X and labels y
        
        Inputs:
        - X: Numpy array of data, of any shape
        - y: Numpy array of labels, of any shape but with y.shape[0] == X.shape[0]
        - batch_size: Integer giving number of elements per minibatch
        - shuffle: (optional) Boolean, whether to shuffle the data on each epoch
        """
        assert X.shape[0] == y.shape[0], 'Got different numbers of data and labels'
        self.X, self.y = X, y
        self.batch_size, self.shuffle = batch_size, shuffle

    def __iter__(self):
        N, B = self.X.shape[0], self.batch_size
        idxs = np.arange(N)
        if self.shuffle:
            np.random.shuffle(idxs)
        return iter((self.X[i:i+B], self.y[i:i+B]) for i in range(0, N, B))


train_dset = Dataset(X_train, y_train, batch_size=64, shuffle=True)
val_dset = Dataset(X_val, y_val, batch_size=64, shuffle=False)
test_dset = Dataset(X_test, y_test, batch_size=64)

# We can iterate through a dataset like this:
for t, (x, y) in enumerate(train_dset):
    print(t, x.shape, y.shape)
    if t > 5: break

Train data shape:  (49000, 32, 32, 3)
Train labels shape:  (49000,) int32
Validation data shape:  (1000, 32, 32, 3)
Validation labels shape:  (1000,)
Test data shape:  (10000, 32, 32, 3)
Test labels shape:  (10000,)
0 (64, 32, 32, 3) (64,)
1 (64, 32, 32, 3) (64,)
2 (64, 32, 32, 3) (64,)
3 (64, 32, 32, 3) (64,)
4 (64, 32, 32, 3) (64,)
5 (64, 32, 32, 3) (64,)
6 (64, 32, 32, 3) (64,)


In [4]:
def check_accuracy(sess, dset, x1,x2, scores, is_training=None):
    """
    Check accuracy on a classification model.
    
    Inputs:
    - sess: A TensorFlow Session that will be used to run the graph
    - dset: A Dataset object on which to check accuracy
    - x: A TensorFlow placeholder Tensor where input images should be fed
    - scores: A TensorFlow Tensor representing the scores output from the
      model; this is the Tensor we will ask TensorFlow to evaluate.
      
    Returns: Nothing, but prints the accuracy of the model
    """
    num_correct, num_samples = 0, 0
    for x_batch, y_batch in dset:
        feed_dict = {x1: x_batch,x2:x_batch, is_training: 0}
        scores_np = sess.run(scores, feed_dict=feed_dict)
        y_pred = scores_np.argmax(axis=1)
        num_samples += x_batch.shape[0]
        num_correct += (y_pred == y_batch).sum()
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f%%)' % (num_correct, num_samples, 100 * acc))

In [9]:
class CentralNet(tf.keras.Model):
    def __init__(self, channel_1, channel_2, num_classes,c1):
        super().__init__()
        ########################################################################
        # TODO: Implement the __init__ method for a three-layer ConvNet. You   #
        # should instantiate layer objects to be used in the forward pass.     #
        ########################################################################
#         pass
        initializer = tf.variance_scaling_initializer(scale=2.0)
        self.conv1 = tf.layers.Conv2D(channel_1, kernel_size=(5,5), 
                                      strides=(1,1),padding="SAME",
                                      activation = tf.nn.relu,use_bias=True,
                                      kernel_initializer = initializer,
                                      bias_initializer=tf.zeros_initializer())
        self.conv2 = tf.layers.Conv2D(channel_2, kernel_size=(3,3), 
                                      strides=(1,1),padding="SAME",
                                      activation = tf.nn.relu,use_bias=True,
                                      kernel_initializer = initializer,
                                      bias_initializer=tf.zeros_initializer())
        self.fc4c1 = tf.layers.Dense(c1, activation=None,use_bias=False,
                                  kernel_initializer=initializer,
                                  bias_initializer=tf.zeros_initializer())
        self.fc4c2 = tf.layers.Dense(num_classes, activation=None,use_bias=False,
                                  kernel_initializer=initializer,
                                  bias_initializer=tf.zeros_initializer())
        self.fc4cc = tf.layers.Dense(num_classes, activation=None,use_bias=False,
                                  kernel_initializer=initializer,
                                  bias_initializer=tf.zeros_initializer())
        self.fc4conv = tf.layers.Dense(num_classes, activation=None,use_bias=True,
                                  kernel_initializer=initializer,
                                  bias_initializer=tf.zeros_initializer())
        self.flatten = tf.keras.layers.Flatten()
                                      
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################

    
    def call(self, x1,x2, training=None):
        scores = None
        ########################################################################
        # TODO: Implement the forward pass for a three-layer ConvNet. You      #
        # should use the layer objects defined in the __init__ method.         #
        ########################################################################
#         pass
        x1_conv1 = self.conv1(x1)
        h1=self.flatten(x1_conv1)
        x2_conv1 = self.conv1(x2)
        h2=self.flatten(x2_conv1)
        hc1=self.fc4c1(h1)+self.fc4c1(h2)
        ##
        x1_conv2 = self.conv2(x1_conv1)
        x2_conv2 = self.conv2(x2_conv1)
        x1_flat = self.flatten(x1_conv2)
        x2_flat = self.flatten(x2_conv2)
        hc2=self.fc4c2(x1_flat)+self.fc4c2(x2_flat)+self.fc4cc(hc1)
        scores = self.fc4conv(x1_flat)+self.fc4conv(x2_flat)+hc2
        ########################################################################
        #                           END OF YOUR CODE                           #
        ########################################################################        
        return scores

In [10]:
def test_CentralNet():
    tf.reset_default_graph()
    
    channel_1, channel_2, num_classes,c1 = 12, 8, 10, 4
    model = CentralNet(channel_1, channel_2, num_classes,c1)
    with tf.device(device):
        x1 = tf.zeros((64, 3, 32, 32))
        x2 = tf.zeros((64, 3, 32, 32))
        scores = model(x1,x2)
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        scores_np = sess.run(scores)
        print(scores_np.shape)

test_CentralNet()

(64, 10)


In [7]:
def train_part34(model_init_fn, optimizer_init_fn, num_epochs=1):
    """
    Simple training loop for use with models defined using tf.keras. It trains
    a model for one epoch on the CIFAR-10 training set and periodically checks
    accuracy on the CIFAR-10 validation set.
    
    Inputs:
    - model_init_fn: A function that takes no parameters; when called it
      constructs the model we want to train: model = model_init_fn()
    - optimizer_init_fn: A function which takes no parameters; when called it
      constructs the Optimizer object we will use to optimize the model:
      optimizer = optimizer_init_fn()
    - num_epochs: The number of epochs to train for
    
    Returns: Nothing, but prints progress during trainingn
    """
    tf.reset_default_graph()    
    with tf.device(device):
        # Construct the computational graph we will use to train the model. We
        # use the model_init_fn to construct the model, declare placeholders for
        # the data and labels
        x1 = tf.placeholder(tf.float32, [None, 32, 32, 3])
        x2 = tf.placeholder(tf.float32, [None, 32, 32, 3])
        y = tf.placeholder(tf.int32, [None])
        
        # We need a place holder to explicitly specify if the model is in the training
        # phase or not. This is because a number of layers behaves differently in
        # training and in testing, e.g., dropout and batch normalization.
        # We pass this variable to the computation graph through feed_dict as shown below.
        is_training = tf.placeholder(tf.bool, name='is_training')
        
        # Use the model function to build the forward pass.
        scores = model_init_fn(x1,x2, is_training)

        # Compute the loss like we did in Part II
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=scores)
        loss = tf.reduce_mean(loss)

        # Use the optimizer_fn to construct an Optimizer, then use the optimizer
        # to set up the training step. Asking TensorFlow to evaluate the
        # train_op returned by optimizer.minimize(loss) will cause us to make a
        # single update step using the current minibatch of data.
        
        # Note that we use tf.control_dependencies to force the model to run
        # the tf.GraphKeys.UPDATE_OPS at each training step. tf.GraphKeys.UPDATE_OPS
        # holds the operators that update the states of the network.
        # For example, the tf.layers.batch_normalization function adds the running mean
        # and variance update operators to tf.GraphKeys.UPDATE_OPS.
        optimizer = optimizer_init_fn()
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(loss)

    # Now we can run the computational graph many times to train the model.
    # When we call sess.run we ask it to evaluate train_op, which causes the
    # model to update.
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        t = 0
        for epoch in range(num_epochs):
            print('Starting epoch %d' % epoch)
            for x_np, y_np in train_dset:
                feed_dict = {x1: x_np,x2:x_np, y: y_np, is_training:1}
                loss_np, _ = sess.run([loss, train_op], feed_dict=feed_dict)
                if t % print_every == 0:
                    print('Iteration %d, loss = %.4f' % (t, loss_np))
                    check_accuracy(sess, val_dset, x1,x2, scores, is_training=is_training)
                    print()
                t += 1

In [8]:
learning_rate = 3e-5
channel_1, channel_2, num_classes,c1 = 32, 16, 10, 4

def model_init_fn(inputs1,inputs2, is_training):
    model = None

    model = CentralNet(channel_1, channel_2, num_classes,c1)

    return model(inputs1,inputs2)

def optimizer_init_fn():
    optimizer = None

    optimizer = tf.train.GradientDescentOptimizer(learning_rate)

    return optimizer

train_part34(model_init_fn, optimizer_init_fn)

Starting epoch 0
Iteration 0, loss = 7.4852
Got 68 / 1000 correct (6.80%)

Iteration 100, loss = 3.6246
Got 157 / 1000 correct (15.70%)

Iteration 200, loss = 3.6126
Got 204 / 1000 correct (20.40%)

Iteration 300, loss = 3.3209
Got 207 / 1000 correct (20.70%)

Iteration 400, loss = 3.0245
Got 224 / 1000 correct (22.40%)

Iteration 500, loss = 3.5126
Got 231 / 1000 correct (23.10%)

Iteration 600, loss = 3.1422
Got 238 / 1000 correct (23.80%)

Iteration 700, loss = 3.1523
Got 252 / 1000 correct (25.20%)

