<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#迁移学习" data-toc-modified-id="迁移学习-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>迁移学习</a></span></li><li><span><a href="#inception-v3模型相关参数" data-toc-modified-id="inception-v3模型相关参数-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>inception-v3模型相关参数</a></span></li><li><span><a href="#新模型相关参数" data-toc-modified-id="新模型相关参数-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>新模型相关参数</a></span></li><li><span><a href="#读取inception-v3模型" data-toc-modified-id="读取inception-v3模型-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>读取inception-v3模型</a></span></li><li><span><a href="#产生特征向量" data-toc-modified-id="产生特征向量-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>产生特征向量</a></span></li><li><span><a href="#把图片数据转化为特征向量并保存" data-toc-modified-id="把图片数据转化为特征向量并保存-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>把图片数据转化为特征向量并保存</a></span></li></ul></div>

# 迁移学习

In [7]:
import glob
import os.path
import random
import numpy as np
import tensorflow as tf
from tensorflow.python.platform import gfile

# inception-v3模型相关参数

In [8]:
# 瓶颈层节点个数
BOTTLENECK_TENSOR_SIZE = 2048

# 图片输入张量所对应的名称
JPEG_DATA_TENSOR_NAME = 'DecodeJpeg/contents:0'
# Inception-v3瓶颈层结果的张量名称。在Inception-v3模型中，
# 这个张量名称就是‘pool_3/_reshape:0’。
BOTTLENECK_TENSOR_NAME = 'pool_3/_reshape:0'

# 下载谷歌训练好的Inception-v3模型文件目录
MODEL_DIR = '../../../datasets/inception_dec_2015'
# 下载谷歌讯号的Inception-v3模型文件名
MODEL_FILE = 'tensorflow_inception_graph.pb'

# 新模型相关参数

In [9]:
# 图片数据文件夹
FLOWER_DIR = "../../../datasets/flower_photos"

# 图片特征文件夹
FEATURE_DIR = "../../../datasets/flower_features"


# 划分数据集百分比
VALIDATION_PERCENTAGE = 20 # 交叉验证集百分比
TEST_PERCENTAGE = 10       # 测试集数据百分比

# 读取inception-v3模型

In [10]:
def read_inception_v3():
    # 加载模型
    with gfile.FastGFile(os.path.join(MODEL_DIR, MODEL_FILE), 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        
    # 加载需要的节点
    bottleneck_tensor, jpeg_data_tensor = tf.import_graph_def(graph_def,return_elements=[BOTTLENECK_TENSOR_NAME, JPEG_DATA_TENSOR_NAME])
    
    # 返回节点
    return bottleneck_tensor, jpeg_data_tensor

# 产生特征向量

1. 取出文件夹中的每一张图片;
2. 把每一张图片转化为特征向量;
3. 把特征向量按比例存储到 training、validation 和 testing文件夹中。

In [11]:
def generate_features(sess, jpeg_data_tensor, bottleneck_tensor):
    # 读取当前目录下的所有子目录
    sub_dirs = [x[0] for x in os.walk(FLOWER_DIR)]
    print("sub_dirs: \n", sub_dirs)
    is_root_dir = True
    
    # 初始化各个标签
    training_index = 0
    validation_index = 0
    testing_index = 0
    current_label = 0
    
    # 读取所有的子目录
    for sub_dir in sub_dirs:
        if True == is_root_dir:
            is_root_dir = False
            continue
            
        # 获取一个子目录中所有的图片文件
        extensions = ['jpg', 'jpeg']
        file_list = []
        dir_name = os.path.basename(sub_dir)
        for extension in extensions:
            file_glob = FLOWER_DIR + "/" + dir_name + "/" + "*." + extension
            file_list.extend(glob.glob(file_glob))
        if not file_list:
            continue
        print("processing: ", dir_name)
        print("image num: ", len(file_list))
        
        current_label += 1
        image_index = 0
        # 处理图片数据
        for file_name in file_list:
            # 获取每张图片数据
            image_raw_data = tf.gfile.FastGFile(file_name, "rb").read()
            
            # 把每一张图片数据转化为对应的特征数据
            image_feature = sess.run(bottleneck_tensor, feed_dict={jpeg_data_tensor:image_raw_data})
            image_feature = np.squeeze(image_feature).tolist() # 把多维数据调整为向量
            image_label = np.eye(5)[current_label-1].tolist()
            
            # 把image_feature和image_label转化为Example Protocol Buffer形式
            float_feature = tf.train.Feature(float_list=tf.train.FloatList(value=image_feature))
            float_label = tf.train.Feature(float_list=tf.train.FloatList(value=image_label))
            feature = {"image_feature": float_feature, "image_label": float_label}
            features = tf.train.Features(feature=feature)
            example = tf.train.Example(features=features)
            serialized = example.SerializeToString()
            
            # 随机划分数据
            chance = np.random.randint(100)
            if chance < VALIDATION_PERCENTAGE:
                # 构建交叉验证集数据地址
                file_name = ("validation.tfrecords-%.5d" % validation_index)
                example_path = FEATURE_DIR + "/" + "validation" + "/" + file_name
                validation_index += 1
            elif chance < VALIDATION_PERCENTAGE + TEST_PERCENTAGE:
                # 构建测试集数据地址
                file_name = ("testing.tfrecords-%.5d" % testing_index)
                example_path = FEATURE_DIR + "/" + "testing" + "/" + file_name
                testing_index += 1
            else:
                # 构建训练集数据地址
                file_name = ("training.tfrecords-%.5d" % training_index)
                example_path = FEATURE_DIR + "/" + "training" + "/" + file_name
                training_index += 1
                
            # 保存特征数据
            writer = tf.python_io.TFRecordWriter(example_path)
            writer.write(serialized)
            writer.close()
                
            if image_index % 200 == 0:
                print(image_index, "images processed")
            image_index += 1
            
    # 返回结果
    return

# 把图片数据转化为特征向量并保存

In [12]:
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth=True
with tf.Session(config=config) as sess:
    # 变量初始化
    tf.global_variables_initializer().run()
    
    # 加载模型
    bottleneck_tensor, jpeg_data_tensor = read_inception_v3()
    
    # 产生特征向量并保存
    generate_features(sess, jpeg_data_tensor, bottleneck_tensor)

sub_dirs: 
 ['../../../datasets/flower_photos', '../../../datasets/flower_photos\\daisy', '../../../datasets/flower_photos\\dandelion', '../../../datasets/flower_photos\\roses', '../../../datasets/flower_photos\\sunflowers', '../../../datasets/flower_photos\\tulips']
processing:  daisy
image num:  633
0 images processed
200 images processed
400 images processed
600 images processed
processing:  dandelion
image num:  898
0 images processed
200 images processed
400 images processed
600 images processed
800 images processed
processing:  roses
image num:  641
0 images processed
200 images processed
400 images processed
600 images processed
processing:  sunflowers
image num:  699
0 images processed
200 images processed
400 images processed
600 images processed
processing:  tulips
image num:  799
0 images processed
200 images processed
400 images processed
600 images processed
