# 搭建CNN模型，训练cifar100

## cifar100数据集介绍

cifar100数据集有20个大类别，每一个大类里面包含5个小类别。cifar100的数据有60000张图像，其中50000张是训练数据，10000张是测试数据，每一个类有500张训练数据和100张测试数据。
<img src="./images/cifar100.png" style="height: 60%;width: 60%; position: relative;right:10%">

## 搭建CNN模型

- 加载数据集

In [1]:
import tensorflow as tf
import time
import os
import sys
import math

In [2]:
print(tf.__version__)

2.1.0


In [3]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data()
print("train data shape: ", x_train.shape)
print("train label shape: ", y_train.shape)
print("test data shape: ", x_test.shape)
print("test label shape: ", y_test.shape)

train data shape:  (50000, 32, 32, 3)
train label shape:  (50000, 1)
test data shape:  (10000, 32, 32, 3)
test label shape:  (10000, 1)


- 数据预处理

In [4]:
import numpy as np
x_train = x_train.astype("float32")
x_test = x_test.astype("float32")
means = np.mean(x_train, axis=(0, 1, 2, 3))
stds = np.std(x_train, axis=(0, 1, 2, 3))
print("mean: ", means)
print("std: ", stds)
x_train = (x_train - means) / (stds + 1e-5)
x_test = (x_test - means) / (stds + 1e-5)

mean:  121.93584
std:  68.38902


In [5]:
y_train = tf.keras.utils.to_categorical(y_train, 100)
y_train = tf.convert_to_tensor(y_train)
y_test = tf.keras.utils.to_categorical(y_test, 100)
y_test = tf.convert_to_tensor(y_test)

- 搭建CNN模型
  - 卷积
  - batchNormalization
  - 正则

In [6]:
class CNNModel(object):
    def __init__(self, drop_rate, learning_rate):
        self.drop_rate = drop_rate
        self.learning_rate = learning_rate
        self.weights = 0.05
        self.num_classes = 100
    def build_model(self):
        
        model = tf.keras.Sequential()
        model.add(tf.keras.layers.Conv2D(input_shape=(32, 32, 3), filters=64, kernel_size=(3, 3), 
                                         strides=(1, 1), padding="same", activation="relu", 
                                         kernel_regularizer=tf.keras.regularizers.l2(l=self.weights), 
                                         name="conv1"))
        model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding="same",
                                         activation="relu", 
                                         kernel_regularizer=tf.keras.regularizers.l2(self.weights),
                                         name="conv2"))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.3))
        model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="valid", name="pool1"))

        model.add(tf.keras.layers.Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding="same",
                                         activation="relu", 
                                         kernel_regularizer=tf.keras.regularizers.l2(self.weights),name="conv3"))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.3))
        model.add(tf.keras.layers.Conv2D(filters=128, kernel_size=(3, 3), strides=(1, 1), padding="same",
                                        activation="relu", 
                                         kernel_regularizer=tf.keras.regularizers.l2(self.weights), name="conv4"))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.3))
        model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="valid", name="pool2"))

        model.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(2, 2), strides=(1, 1), padding="same",
                                        activation="relu", 
                                         kernel_regularizer=tf.keras.regularizers.l2(self.weights), name="conv5"))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.3))
        model.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(2, 2), strides=(1, 1), padding="same",
                                        activation="relu", 
                                         kernel_regularizer=tf.keras.regularizers.l2(self.weights), name="conv6"))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.3))
        model.add(tf.keras.layers.Conv2D(filters=256, kernel_size=(2, 2), strides=(1, 1), padding="same",
                                        activation="relu", 
                                         kernel_regularizer=tf.keras.regularizers.l2(self.weights), name="conv7"))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.3))
        model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="valid"))


        model.add(tf.keras.layers.Conv2D(filters=512, kernel_size=(2, 2), strides=(1, 1), padding="same",
                                        activation="relu", 
                                         kernel_regularizer=tf.keras.regularizers.l2(self.weights), name="conv8"))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.4))
        model.add(tf.keras.layers.Conv2D(filters=512, kernel_size=(2, 2), strides=(1, 1), padding="same",
                                        activation="relu", 
                                         kernel_regularizer=tf.keras.regularizers.l2(self.weights), name="conv9"))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.4))
        model.add(tf.keras.layers.Conv2D(filters=512, kernel_size=(2, 2), strides=(1, 1), padding="same",
                                        activation="relu", 
                                         kernel_regularizer=tf.keras.regularizers.l2(self.weights), name="conv10"))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.4))
        model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="valid"))


        model.add(tf.keras.layers.Conv2D(filters=512, kernel_size=(2, 2), strides=(1, 1), padding="same",
                                        activation="relu", 
                                         kernel_regularizer=tf.keras.regularizers.l2(self.weights), name="conv11"))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.4))
        model.add(tf.keras.layers.Conv2D(filters=512, kernel_size=(2, 2), strides=(1, 1), padding="same",
                                        activation="relu", 
                                         kernel_regularizer=tf.keras.regularizers.l2(self.weights), name="conv12"))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.4))
        model.add(tf.keras.layers.Conv2D(filters=512, kernel_size=(2, 2), strides=(1, 1), padding="same",
                                        activation="relu", 
                                         kernel_regularizer=tf.keras.regularizers.l2(self.weights), name="conv13"))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(0.4))
        model.add(tf.keras.layers.MaxPool2D(pool_size=(2, 2), strides=(2, 2), padding="valid"))

        model.add(tf.keras.layers.Flatten())
        model.add(tf.keras.layers.Dense(512, activation="relu", name="fc1"))
        model.add(tf.keras.layers.Dropout(self.drop_rate))
        model.add(tf.keras.layers.Dense(256, activation="relu", name="fc2"))
        model.add(tf.keras.layers.Dropout(self.drop_rate))
        model.add(tf.keras.layers.Dense(self.num_classes, activation="softmax"))
        model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=self.learning_rate),
                     loss=tf.keras.losses.CategoricalCrossentropy(),
                     metrics=['accuracy'])
        return model

- 训练模型
 - 用ImageDataGenerator对数据进行反转，shift处理

In [None]:
imggen = tf.keras.preprocessing.image.ImageDataGenerator(featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False) #
imggen.fit(x_train)
## 参数
batch_size = 128
dropout = 0.3
epochs = 2
learning_rate = 0.01

## 训练模型
checkpoint_path = tf.io.gfile.listdir("./models/cnn_cifar100/")
if checkpoint_path:
    ## 加载模型
    model_file = os.path.join("./models/cnn_cifar100", checkpoint_path[-1])
    model = tf.keras.models.load_model(model_file)
    model.summary()
else:
    model = CNNModel(dropout, learning_rate)
    model = model.build_model()
    model.summary()
    
def lr_decay(epoch):
    initial_lr = 0.01
    drop = 0.5
    epochs_drop = 20
    lr = initial_lr * math.pow(drop, math.floor((1 + epoch)/epochs_drop))
    return lr

## callbacks
if not os.path.exists("./logs/"):
    os.mkdir("./logs/")
log_dir = "./logs/cnn_cifar100_event-{}".format(int(time.time()))
my_callbacks = [tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1),
               tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3),
               tf.keras.callbacks.LearningRateScheduler(lr_decay)]

model.fit_generator(imggen.flow(x_train, y_train, batch_size=batch_size), 
                    steps_per_epoch=x_train.shape[0] // batch_size, epochs=epochs, 
                    validation_data=(x_test, y_test), callbacks=my_callbacks)
checkpoint = os.path.join("./models/cnn_cifar100/cnn_cifar100.h5")
model.save(checkpoint)
sys.stdout.flush()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv2D)               (None, 32, 32, 64)        1792      
_________________________________________________________________
conv2 (Conv2D)               (None, 32, 32, 64)        36928     
_________________________________________________________________
batch_normalization_12 (Batc (None, 32, 32, 64)        256       
_________________________________________________________________
dropout_14 (Dropout)         (None, 32, 32, 64)        0         
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 16, 16, 64)        0         
_________________________________________________________________
conv3 (Conv2D)               (None, 16, 16, 128)       73856     
_________________________________________________________________
batch_normalization_13 (Batc (None, 16, 16, 128)      

- 测试

In [None]:
result = model.predict(x_test)
acc = tf.equal(tf.argmax(result, axis=1), tf.argmax(y_test, axis=1))
acc = tf.cast(acc, tf.float32)
accuracy = tf.reduce_mean(acc)
print("测试准确率为： ", accuracy.numpy())