##                                  基于神经网络的花朵分类

## 一、搭建CNN模型

### Step1：获得图片数据

解压数据集到./datasets/flowers/


In [18]:
%matplotlib inline
import seaborn as sns
import os
import shutil
import random
os.chdir('C:/Users/zhoumeng/Anaconda3/envs/tensorFlow/Lib/site-packages/keras') ##定位
daisy1 ='./datasets/flowers/daisy/'##数据所在目录
dandelion1='./datasets/flowers/dandelion/'
sunflowers1='./datasets/flowers/sunflowers/'
daisy = [daisy1 + i for i in os.listdir(daisy1) ]
dandelion = [dandelion1 + i for i in os.listdir(dandelion1) ]
sunflowers = [sunflowers1 + i for i in os.listdir(sunflowers1) ]
##查看每个里面有多少数据
print('daisy count:' + str(len(daisy)))
print('dandelion count:' + str(len(dandelion)))
print('sunflowers count:' + str(len(sunflowers)))

daisy count:633
dandelion count:898
sunflowers count:699


In [22]:
print(daisy[1])##查看一下是否传输正确

./datasets/flowers/daisy/2019064575_7656b9340f_m.jpg


### Step2：拆分训练集和验证集
注：这里为了加快速度，只选择了600张图片作为训练集，150张图片作为测试集

In [24]:
target = './datasets/flowers/arrange/' # 目标训练集地址
# 随机化
random.shuffle(daisy)
random.shuffle(dandelion)
random.shuffle(sunflowers)
def ensure_dir(dir_path):
    if not os.path.exists(dir_path):
        try:
            os.makedirs(dir_path)
        except OSError:
            pass

# 生成文件夹
ensure_dir(target + 'train/daisy')
ensure_dir(target + 'train/dandelion')
ensure_dir(target + 'train/sunflowers')
ensure_dir(target + 'validation/daisy')
ensure_dir(target + 'validation/dandelion')
ensure_dir(target + 'validation/sunflowers')
# 复制图片
##选每类图片的前200个图作为训练集共600个
for daisy_file,  dandelion_file, sunflowers_file in list(zip(daisy, dandelion,sunflowers))[:200]:
    shutil.copyfile(daisy_file, target + 'train/daisy/' + os.path.basename(daisy_file))
    shutil.copyfile(dandelion_file, target + 'train/dandelion/' + os.path.basename(dandelion_file))
    shutil.copyfile(sunflowers_file, target + 'train/sunflowers/' + os.path.basename(sunflowers_file))

##选每类图片的200-250个图作为测试集共150个
for daisy_file,  dandelion_file, sunflowers_file in list(zip(daisy, dandelion,sunflowers))[200:250]:
    shutil.copyfile(daisy_file, target + 'validation/daisy/' + os.path.basename(daisy_file))
    shutil.copyfile(dandelion_file, target + 'validation/dandelion/' + os.path.basename(dandelion_file))
    shutil.copyfile(sunflowers_file, target + 'validation/sunflowers/' + os.path.basename(sunflowers_file))       

### Step3：处理图片数据

In [25]:
from keras.preprocessing.image import ImageDataGenerator
# 图片尺寸
img_width, img_height = 128, 128
input_shape = (img_width, img_height, 3)
train_data_dir = target + 'train'
validation_data_dir = target + 'validation'

# 生成变形图片
train_pic_gen = ImageDataGenerator(
        rescale=1./255, # 对输入图片归一化到0-1区间
        rotation_range=20, 
        width_shift_range=0.2, 
        height_shift_range=0.2, 
        shear_range=0.2, 
        zoom_range=0.5, 
        horizontal_flip=True, # 水平翻转
        fill_mode='nearest')
# 测试集不做变形处理，只需要归一化。
validation_pic_gen = ImageDataGenerator(rescale=1./255)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [26]:
# 按文件夹生成训练集流和标签 categorical三分类问题
##batch_size: batch数据的大小,默认32
train_flow = train_pic_gen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='categorical')
# 按文件夹生成测试集流和标签 categorical
validation_flow = validation_pic_gen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='categorical')

Found 600 images belonging to 3 classes.
Found 150 images belonging to 3 classes.


### Step4:搭建神经网络

In [30]:
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
steps_per_epoch=2000
##整数，当生成器返回steps_per_epoch次数据时计一个epoch结束，执行下一个epoch
validation_steps = 800
##官方说明文档中例子给出的数字2000和800
##当validation_data为生成器时，本参数指定验证集的生成器返回次数
epochs = 1
# 两层卷积-池化，提取64个平面特征
model = Sequential([
Convolution2D(32, (3, 3), input_shape=input_shape, activation='relu'),
MaxPooling2D(pool_size=(2, 2)),
Convolution2D(64, (3, 3), activation='relu'),
MaxPooling2D(pool_size=(2, 2)),
Flatten(),
Dense(64, activation='relu'),
Dropout(0.5),
Dense(3, activation='softmax'),
])
##drop设置为0.5
# 损失函数
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])
##优化器设置为RMSprop

注：这里由于epoch设置为1，所以为了准确率降steps_per_epoch设置为2000，validation_steps设置为800

### Step5：利用搭建好的神经网络进行识别

In [31]:
model.fit_generator(
        train_flow,
        steps_per_epoch=steps_per_epoch,
        epochs=epochs,
        validation_data=validation_flow,
        validation_steps=validation_steps)

Epoch 1/1


<keras.callbacks.History at 0x199e3149ac8>

#### 小结
    在这一部分中，利用Sequential的方法搭建了层数为2的神经网络模型，在最后的结果中，训练集的准确度达到了0.79，测试集的准确度达到了0.84，效果是不错的，对比老师展示的“猫狗识别问题”，这里虽然训练集和测试集都比老师少，但是因为由于是一个三分类问题，在参数相同的情况下，因此时间还是要长一些。

# 二、迁移学习--微训练模型（fine-tuning）

In [32]:
from keras.applications.inception_v3 import InceptionV3
base_model = InceptionV3(weights='imagenet')

#### 迁移VGG16模型

In [33]:
from keras.models import Model
from keras.optimizers import SGD
from keras.applications.vgg16 import VGG16
# 图片尺寸
img_width, img_height = 128, 128
input_shape = (img_width, img_height, 3)

In [34]:
base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)

#### 利用Model的方法搭建模型

In [35]:
from keras.layers import Dropout, Flatten, Dense
x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
y = Dense(3, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=y)

#### 处理图片数据

注:为了加快速度，这里的epochs设置为1，steps_per_epoch设置为1000，validation_steps设置为800

In [36]:
from keras.preprocessing.image import ImageDataGenerator
# 数据地址
target = './datasets/flowers/arrange/' # 目标训练集地址
train_data_dir = target + 'train'
validation_data_dir = target + 'validation'
# 模型参数
steps_per_epoch = 1000
validation_steps = 800
epochs = 1
# 生成变形图片，并做去均值处理，已做好的可以省略这步，延续之前生存的变形图片
train_pic_gen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)
# 测试集不做变形处理，只需要去均值
validation_pic_gen = ImageDataGenerator(rescale=1./255)
# 按文件夹生成训练集流和标签
train_flow = train_pic_gen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='categorical')
# 按文件夹生成测试集流和标签
validation_flow = validation_pic_gen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=32,
        class_mode='categorical')

Found 600 images belonging to 3 classes.
Found 150 images belonging to 3 classes.


In [37]:
# 冻结VGG中ImageNet的CNN结构部分，让ImageNet训练好的参数不变
for layer in base_model.layers:
    layer.trainable = False 

#### 利用搭建好的模型进行图片识别

In [38]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit_generator(
        train_flow,
        steps_per_epoch=steps_per_epoch,#1000
        epochs=epochs,
        validation_data=validation_flow,#800
        validation_steps=validation_steps)

Epoch 1/1


<keras.callbacks.History at 0x199888292e8>

#### 小结：
    在这一部分中，迁移已有的vgg16，利用model的方法搭建神经网络，最后结果训练集的准确率为0.91，测试集的准确率为0.88，相较于上一部分自己搭建的2层的神经网络，在迭代步长（steps_per_epoch）少了一半的情况下，准确率依旧有所提高，效果较好，这一部分跑了将近3个小时。