# 用VGG13实现cifar数据集上的图像识别

## The CIFAR-10 dataset

**网址**：

http://www.cs.toronto.edu/~kriz/cifar.html?usg=alkjrhjqbhw2llxlo8emqns-tbk0at96jq

**介绍**：

The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images.

The dataset is divided into five training batches and one test batch, each with 10000 images. The test batch contains exactly 1000 randomly-selected images from each class. The training batches contain the remaining images in random order, but some training batches may contain more images from one class than another. Between them, the training batches contain exactly 5000 images from each class.

<img src="CIFAR-10.PNG" width="50%">

The classes are completely mutually exclusive. There is no overlap between automobiles and trucks. "Automobile" includes sedans, SUVs, things of that sort. "Truck" includes only big trucks. Neither includes pickup trucks.

在TensorFlow中，通过datasets.cifar10.load_data()函数就可以直接加载切割好的训练集和数据集。

TensorFlow会自动将数据集下载在 `C:\Users\用户名\.keras\datasets` 路径下，用户可以查看，也可手动删除不需要的数据集缓存。上述代码运行后，得到训练集的**x**和**y**形状为： (50000, 32, 32, 3)和(50000)，测试集的**x**和**𝒚**形状为(10000, 32, 32, 3)和(10000)，分别代表了 图片大小为32 × 32，彩色图片，训练集样本数为 50000，测试集样本数为 10000。

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,Sequential,optimizers,losses,datasets
from matplotlib import pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.datasets import make_moons 
from sklearn.model_selection import train_test_split

(x,y), (x_test, y_test) = datasets.cifar10.load_data()
#删除y的一个维度,[b,1]=>[b]
y=tf.squeeze(y,axis=1)
y_test=tf.squeeze(y_test,axis=1)
#打印训练接和测试集的形状
print(x.shape,y.shape,x_test.shape,y_test.shape)

def preprocess(x, y):
    # 将数据映射到-1~1
    x = 2*tf.cast(x, dtype=tf.float32) / 255. - 1
    y = tf.cast(y, dtype=tf.int32) # 类型转换
    return x,y

#构建训练集对象，随机打乱，预处理，批量化
train_db=tf.data.Dataset.from_tensor_slices((x,y))
train_db=train_db.shuffle(1000).map(preprocess).batch(128)
#构建测试集对象，预处理，批量化
test_db=tf.data.Dataset.from_tensor_slices((x_test,y_test))
test_db=test_db.map(preprocess).batch(128)
#从训练集中采用一个Batch，并观察
sample=next(iter(train_db))
print('sample:',sample[0].shape,sample[1].shape,tf.reduce_min(sample[0]),tf.reduce_max(sample[0]))

## VGG13
我们将基于VGG13网络，根据我们的数据集特点修改部分网络结构，完成 CIFAR10 图片识别，修改如下：

1. 将网络输入调整为32×32。原网络输入为22×22，导致全连接层输入特征维度过大，网络参数量过大。

2. 3个全连接层的维度调整为[256,64,10]，满足10分类任务的设定。

<img src="VGG13.PNG">

我们将网络实现为 2个子网络：**卷积子网络**和**全连接子网络**。卷积子网络由5个子模块构成，每个子模块包含了**Conv-Conv-MaxPooling**单元结构。

In [None]:
#卷积子网络
conv_layers=[
    #先创建包含多网络的类别
    #Conv-Conv-Pooling单元1
    #64个3*3的卷积核，输入输出同大小
    layers.Conv2D(64,kernel_size=[3,3],padding="same",activation=tf.nn.relu),
    layers.Conv2D(64,kernel_size=[3,3],padding="same",activation=tf.nn.relu),
    #高宽减半
    layers.MaxPooling2D(pool_size=[2,2],strides=2,padding="same"),
    
    #Conv-Conv-Pooling 单元 2,输出通道提升至 128，高宽大小减半
    layers.Conv2D(128,kernel_size=[3,3],padding="same",activation=tf.nn.relu),
    layers.Conv2D(128,kernel_size=[3,3],padding="same",activation=tf.nn.relu),
    layers.MaxPooling2D(pool_size=[2,2],strides=2,padding="same"),
    
    # Conv-Conv-Pooling 单元 3,输出通道提升至 256，高宽大小减半
    layers.Conv2D(256,kernel_size=[3,3],padding="same",activation=tf.nn.relu),
    layers.Conv2D(256,kernel_size=[3,3],padding="same",activation=tf.nn.relu),
    layers.MaxPooling2D(pool_size=[2,2],strides=2,padding="same"),
    
    # Conv-Conv-Pooling 单元 4,输出通道提升至 512，高宽大小减半
    layers.Conv2D(512,kernel_size=[3,3],padding="same",activation=tf.nn.relu),
    layers.Conv2D(512,kernel_size=[3,3],padding="same",activation=tf.nn.relu),
    layers.MaxPooling2D(pool_size=[2,2],strides=2,padding="same"),
    
    # Conv-Conv-Pooling 单元 5,输出通道提升至 512，高宽大小减半
    layers.Conv2D(512,kernel_size=[3,3],padding="same",activation=tf.nn.relu),
    layers.Conv2D(512,kernel_size=[3,3],padding="same",activation=tf.nn.relu),
    layers.MaxPooling2D(pool_size=[2,2],strides=2,padding="same")
    
    #思考：为什么越往后通道数越多
    #回答：图片数据的识别过程一般认为也是表示学习(Representation Learning)的过程，
    #从接受到的原始像素特征开始，逐渐提取边缘、角点等底层特征，
    #再到纹理等中层特征，
    #再到头 部、物体部件等高层特征。
    #所以前面的卷积层通道少，提取的是底层特征
    #后面的卷积和通道多，提取的是高层特征
    
    #思考：为什么要池化
    ]

#利用前面创建的层列表构建网络容器
conv_net=Sequential(conv_layers)

#全连接子网络
fc_net=Sequential([
    layers.Dense(256, activation=tf.nn.relu),
    layers.Dense(128, activation=tf.nn.relu),
    layers.Dense(10,activation=None)])

#build两个子网络，并打印网络参数信息
conv_net.build(input_shape=(None,32,32,3))
fc_net.build(input_shape=(None,512))
conv_net.summary()
fc_net.summary()

#设置学习率,默认值0.001
optimizer=optimizers.Adam(lr=1e-4)
#需要更新的参数
variables=conv_net.trainable_variables+fc_net.trainable_variables

## 训练并保存网络

In [None]:
#模型计算
def main():
    for epoch in range(10):
        for step,(x,y) in enumerate(train_db):
            with tf.GradientTape() as tape:
                #[b,32,32,3] => [b,1,1,512]
                out=conv_net(x)
                #flatten => [b,512]
                out=tf.reshape(out,[-1,512])
                #[b,512] => [b,10]
                logits=fc_net(out)
                #[b] => [b,10]
                y_onehot=tf.one_hot(y,depth=10)
                #compute loss
                loss = tf.losses.categorical_crossentropy(y_onehot,logits,from_logits=True)
                loss = tf.reduce_mean(loss)
            
            grads = tape.gradient(loss,variables)
            optimizer.apply_gradients(zip(grads,variables))

            if step%100==0:
                print(epoch,step,'loss:',float(loss))
                
        total_num=0
        total_corret=0
        for x,y in test_db:
            out=conv_net(x)
            out=tf.reshape(out,[-1,512])
            logits=fc_net(out)
            prob=tf.nn.softmax(logits,axis=1)
            pred=tf.argmax(prob,axis=1)
            pred=tf.cast(pred,dtype=tf.int32)
            
            correct=tf.cast(tf.equal(pred,y),dtype=tf.int32)
            correct=tf.reduce_sum(correct)
            
            total_num+=x.shape[0]
            total_corret+=int(correct)
        
        acc=total_corret/total_num
        print(epoch,'acc:',acc)

if __name__=='__main__':
    main()

**Save and load the model**

High level keras `model.save` and `tf.keras.models.load_model`

```
keras_model_path = "/tmp/keras_save"
model.save(keras_model_path)  # save() should be called out of strategy scope
```

Low level `tf.saved_model.save` and `tf.saved_model.load`

即SavedModel方式，TensorFlow之所以能够被业界青睐，除了优秀的神经网络层API支持之外，还得益于它强大的生态系统，包括移动端和网页端等的支持。当需要将模型部署到其他平台时，采用 TensorFlow提出的SavedModel方式更具有平台无关性。

参考网址:

https://www.tensorflow.org/tutorials/distribute/save_and_load

In [None]:
conv_net.save('conv_net.h5') 
print('saving conv-net')
#del conv_net#删除网络对象
fc_net.save('fc_net.h5') 
print('saving fc_net.')
#del fc_net

## 加载网络
```
restored_keras_model = tf.keras.models.load_model(keras_model_path)
restored_keras_model.fit(train_dataset, epochs=2)
```

In [None]:
print('load conv_net from file.')
conv_net = keras.models.load_model('conv_net.h5')
print('load fc_net from file.')
fc_net = keras.models.load_model('fc_net.h5')

关于WARNING:tensorflow:No training configuration found in save file: the model was *not* compiled. Compile it manually.
            
见：[userwarning-no-training-configuration-found-in-save-file-the-model-was-not-c](https://stackoverflow.com/questions/53295570/userwarning-no-training-configuration-found-in-save-file-the-model-was-not-c)

## 剪裁并显示图片

In [None]:
#读取图片
import imageio
image_value = imageio.imread('cat.jpg')
plt.imshow(image_value)
plt.show()

image_value=tf.image.resize(image_value,[32,32],antialias=True)
image_value=tf.cast(image_value,tf.int32)
plt.imshow(image_value)
plt.show()

## 输出图片识别结果

In [None]:
transfer=dict({0:"airplane",
        1:"automobile",
        2:"bird",
        3:"cat",
        4:"deer",
        5:"dog",
        6:"frog",
        7:"horse",
        8:"ship",
        9:"truck"})

In [None]:
def test(file_name):
    image_value=imageio.imread(file_name)[::,::,0:3]#因为读进来时是四通道
    plt.imshow(image_value)
    image_value=tf.image.resize(image_value,[32,32],antialias=True)
    image_value=tf.expand_dims(image_value,axis=0)
    image_value=2*tf.cast(image_value, dtype=tf.float32) / 255. - 1
    out=conv_net(image_value)     
    out=tf.reshape(out,[-1,512])
    logits=fc_net(out)
    res=tf.argmax(logits,axis=1)
    plt.xlabel(transfer[int(res)])

In [None]:
file_name="cat.jpg"
test(file_name)

若报错：Failed to get convolution algorithm. This is probably because cuDNN failed to initialize,...

可能原因是GPU内存不足造成的（重启内核）