In [1]:
import numpy as np
import tensorflow as tf

import cv2

from glob import glob

import multiprocessing


import os
import pandas as pd

import xml
from xml.etree import ElementTree as ET

  from ._conv import register_converters as _register_converters


In [2]:
LABEL2INDEX = {'frog':0 , 'truck':1 , 'deer':2 , 'automobile':3 , 'bird':4 , 'horse':5 , 'ship':6 , 'cat':7 , 'dog':8 , 'airplane':9}
INDEX2LABEL = {value:key for key , value in LABEL2INDEX.items()}

In [3]:
#调整尺寸
def resize(img , resize_type):
    if resize_type == 'reg':
        #resize_val 为(x,x)
        return tf.image.resize_images(img , (256,256))
    
    elif resize_type == 'min':
        #resize_val scalar
        height = tf.cast(img.shape[0] , dtype=tf.float32)
        width = tf.cast(img.shape[1] , dtype=tf.float32)
        ratio = tf.divide(height , width)
        
        new_shape = tf.cond(tf.less(height, width), lambda: (tf.constant(256, dtype=tf.int32),
                                                           tf.cast(tf.floor(tf.divide(256.0, ratio)) , dtype=tf.int32)),
                     lambda: (
                         tf.cast(tf.floor(tf.multiply(256.0 , ratio)), dtype=tf.int32),
                         tf.constant(256, dtype=tf.int32)))
        
        return tf.image.resize_images(img , new_shape)
    
    elif resize_type == 'max':
        #resize_val scalar
        height = tf.cast(img.shape[0] , dtype=tf.float32)
        width = tf.cast(img.shape[1] , dtype=tf.float32)
        ratio = tf.divide(height , width)
        
        new_shape = tf.cond(tf.less(height, width),lambda: (
                         tf.cast(tf.floor(tf.multiply(256.0 , ratio)), dtype=tf.int32),
                         tf.constant(256, dtype=tf.int32)), 
                            lambda: (tf.constant(256, dtype=tf.int32),
                                                           tf.cast(tf.floor(tf.divide(256.0, ratio)) , dtype=tf.int32)))
        
        return tf.image.resize_images(img , new_shape)
    
#随机裁剪
def random_clip(img , crop_size):
    #shape = img.shape
    #
    #if shape[0] == crop_size[0] and shape[1] == crop_size[1]:
    #    return img
    #
    #height_clip_domain = shape[0]-crop_size[0]
    #width_clip_domain = shape[1]-crop_size[1]
    #
    #height_clip_idx = np.random.randint(0 , height_clip_domain)
    #width_clip_idx = np.random.randint(0 , width_clip_domain)
    #
    #return img[height_clip_idx:height_clip_idx+crop_size[0] , width_clip_idx:width_clip_idx+crop_size[1] , :]

    return tf.random_crop(img , crop_size)
    

def central_clip(img , crop_size):
    img_height = img.shape[0]
    img_width = img.shape[1]
    
    height_r = img_height - crop_size[0]
    width_r = img_width - crop_size[1]
    
    top = height_r//2
    left = width_r//2
    
    img = tf.slice(img , [top , left , 0] , [crop_size[0] , crop_size[1] , 3])
    
    return img

In [4]:
def split_data(train_size = 0.8):
    filenames = os.listdir(path='../../tensorflow2/dataset/CIFAR10/train/')
    filenames = sorted(filenames , key=lambda filename: int(filename.split('.')[0] )) #与labels顺序对应
    filenames = np.array(filenames)
    
    labels = pd.read_csv('../../tensorflow2/dataset/CIFAR10/trainLabels.csv')['label'].map(LABEL2INDEX).get_values()
    
    idx = list(range(len(filenames)))
    np.random.shuffle(idx)    
    
    train_idx = idx[ : int(len(idx) * train_size)]
    val_idx = idx[int(len(idx) * train_size) : ]
    
    return filenames[train_idx] , labels[train_idx] , filenames[val_idx] , labels[val_idx]

In [5]:
FAST = True
ACCURATE = not FAST

if FAST:
    HEIGHT = 231
    WIDTH = 231
else:
    HEIGHT = 221
    WIDTH = 221

In [61]:
def preprocess_train(filename , label):
    #训练数据预处理
    img = tf.read_file('../../tensorflow2/dataset/CIFAR10/train/' + filename)
    img = tf.image.decode_image(img)
    img = tf.cast(img , tf.float32)
    img = tf.reshape(img, tf.stack([32, 32, 3])) #关键操作
    
    img = resize(img , 'max')
    img = random_clip(img , crop_size=(HEIGHT , WIDTH , 3))
    
    # img = tf.random_crop(img , size = [HEIGHT , WIDTH , 3])
    # img = tf.image.random_flip_left_right(img)
    
    #img = cv2.imread('../../tensorflow2/dataset/CIFAR10/train/' + filename)
    
    
    img = img/127.5 - 1.0
    
    return img , label
    

def preprocess_val(filename , label):
    #验证数据预处理
    img = tf.read_file('../../tensorflow2/dataset/CIFAR10/train/' + filename)
    img = tf.image.decode_image(img)
    img = tf.cast(img , tf.float32)
    img = tf.reshape(img, tf.stack([32, 32, 3])) #关键操作
    
    img = resize(img , 'max')
    img = random_clip(img , crop_size=(HEIGHT , WIDTH , 3))
    
    # img = tf.random_crop(img , size = [HEIGHT , WIDTH , 3])
    # img = tf.image.random_flip_left_right(img)
    
    img = img/127.5 - 1.0
    
    # img = central_clip(img , crop_size=(HEIGHT , WIDTH))
    
    return img , label

def build_dataset(filenames , labels , is_training = True , batch_size=128):
    dataset = tf.data.Dataset()
    dataset = dataset.from_tensor_slices(( filenames , labels )) #每个元素为(filename label) #传入xml路径信息

    if is_training:
        dataset = dataset.map(preprocess_train)
        #dataset = dataset.map(lambda filename , label : tuple( tf.py_func( preprocess_train , [filename , label] , [tf.float32 , tf.int16] )) )
    else:
        dataset = dataset.map(preprocess_val)
    
    dataset = dataset.shuffle(buffer_size=1000).repeat().batch(batch_size)
    
    iterator = dataset.make_initializable_iterator()
    
    return  iterator

In [62]:
train_filenames , train_labels , val_filenames , val_labels = split_data(train_size=0.9)

In [63]:
iterator_train = build_dataset( train_filenames , train_labels ) #传入路径


In [64]:
iterator_val = build_dataset(val_filenames , val_labels , is_training=False) #传入路径

In [65]:
class OverFeat(object):

    def __init__(self , iterator_train , iterator_val , num_classes , model_type = 'fast'):

        #需要对其initializer进行初始化 sess.run(xxx.initializer)
        self.iterator_train = iterator_train
        self.iterator_val = iterator_val

        self.IS_TRAINING = tf.placeholder(dtype=tf.bool)
        
        self.X = iterator_train.get_next()[0]
        self.y = iterator_train.get_next()[1]
        
        #self.X = tf.cond( tf.equal(self.IS_TRAINING , tf.constant(True)) , lambda : iterator_train.get_next()[0] , lambda : iterator_val.get_next()[0] )
        #self.y = tf.cond( tf.equal(self.IS_TRAINING , tf.constant(True)) , lambda : iterator_train.get_next()[1] , lambda : iterator_val.get_next()[1] )
        
        self.NUM_CLASSES = num_classes
        self.TYPE = model_type
        
        self.EPOCH = 90 #paper
        
        self.BATCH_SIZE = 128 #paper
        
        self.KEEP_PROB = 0.5
        
        if model_type == 'accurate':
            #self.HEIGHT = 221
            #self.WIDTH = 221
            
            self.model_accurate()
        else:
            #self.HEIGHT = 231
            #self.WIDTH = 231
            
            self.model_fast()

    def model_fast(self):
        conv1 = self.conv(self.X , 11 , 11 , 96 , 4 , 4 , name='conv1')
        max_pooling1 = self.max_pooling(conv1 , 2 , 2 , 2 , 2 , name='pooling1')
        
        conv2 = self.conv(max_pooling1 , 5 , 5 , 256 , 1 , 1 , name='conv2')
        max_pooling2 = self.max_pooling(conv2 , 2 , 2 , 2 , 2 , name='pooling2')
        
        conv3 = self.conv(max_pooling2 , 3 , 3 , 512 , 1 , 1 , name='conv3')
        
        conv4 = self.conv(conv3 , 3 , 3 , 1024 , 1 , 1 , name='conv4')
    
        conv5 = self.conv(conv4 , 3 , 3 , 1024 , 1 , 1 , name='conv5')
        max_pooling5 = self.max_pooling(conv5 , 2 , 2 , 2 , 2 , name='pooling5')
        
        #conv6 = self.fcn(max_pooling5 , output_channel=3072 , name='fcn') #FCN形式 全连接变为卷积形式
        #===
        max_pooling5 = tf.layers.flatten(max_pooling5)
        #===
        
        fc6 = self.fc(max_pooling5 , 3072 , name='fc6')
        fc6 = tf.layers.dropout(fc6 , rate=1. - self.KEEP_PROB)
            
        fc7 = self.fc(fc6 , 4096 , name='fc7')
        fc7 = tf.layers.dropout(fc7 , rate=1. - self.KEEP_PROB)
        
        fc8 = self.fc(fc7 , self.NUM_CLASSES , name='fc8')
        
        self.logits = fc8
        
    
    def model_accurate(self):
        conv1 = self.conv(self.X , 7 , 7 , 96 , 2 , 2 , name='conv1')
        max_pooling1 = self.max_pooling(conv1 , 3 , 3 , 3 , 3 , name='pooling1')
        
        conv2 = self.conv(max_pooling1 , 7 , 7 , 256 , 1 , 1 , name='conv2')
        max_pooling2 = self.max_pooling(conv2 , 2 , 2 , 2 , 2 , name='pooling2')
        
        conv3 = self.conv(max_pooling2 , 3 , 3 , 512 , 1 , 1 , name='conv3')
        
        conv4 = self.conv(conv3 , 3 , 3 , 512 , 1 , 1 , name='conv4')
        
        conv5 = self.conv(conv4 , 3 , 3 , 1024 , 1 , 1 , name='conv5')
        
        conv6 = self.conv(conv5 , 3 , 3 , 1024 , 1 , 1 , name='conv6')
        max_pooling6 = self.max_pooling(conv6 , 3 , 3 , 3 , 3 , name='pooling6')
        
        #conv7 = self.fcn(max_pooling6 , output_channel = 4096 , name='fcn') #FCN形式 全连接变为卷积形式
        #===
        max_pooling6 = tf.layers.flatten(max_pooling6)
        #===
        
        fc7 = self.fc(max_pooling6 , 4096 , name='fc7')
        fc7 = tf.layers.dropout(fc7 , rate=1. - self.KEEP_PROB)
        
        fc8 = self.fc(fc7 , 4096 , name='fc8')
        fc8 = tf.layers.dropout(fc8 , rate=1. - self.KEEP_PROB)
                
        fc9 = self.fc(fc8 , self.NUM_CLASSES , name='fc9')
        
        self.logits = fc9
    
    def train(self):
        #验证使用
        predictions = tf.nn.softmax(self.logits)
        predictions = tf.argmax(predictions , axis=-1)
        equal = tf.equal(predictions , self.y) #bool
        equal = tf.cast(equal , dtype=tf.int32)
        accu = tf.reduce_sum(equal)/self.BATCH_SIZE
        
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y , logits=self.logits)
        loss = tf.reduce_mean(loss)
        #构建训练过程
        
        epoch = tf.Variable(initial_value=0 , name='epoch' , trainable=False)
        epoch_add = tf.assign_add(epoch , value=1) #对epoch加1 因为下面的lr需要变化
        
        learning_rate = tf.train.piecewise_constant(epoch , boundaries=[30,50,60,70,80] ,
                                                    values=[0.05,0.025,0.0125,0.00625,0.003125,0.0015625])
        optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate , momentum=0.6)
        
        train_op = optimizer.minimize(loss)
        
        with tf.Session() as sess:
            
            sess.run(tf.global_variables_initializer())
            
            for i in range(self.EPOCH):
                sess.run(self.iterator_train.initializer)
                
                while True:
                    try:
                        _ , _loss = sess.run((train_op , loss) , feed_dict={self.IS_TRAINING : True})
                        
                        print(_loss)
                        
                    except tf.errors.OutOfRangeError:
                        #一个epoch训练完毕
                        #可以进行验证 为验证做准备

                        #运行验证数据集迭代器
                        sess.run(self.iterator_val.initializer)

                        while True:
                            try:
                                _accu = sess.run(accu , feed_dict={self.IS_TRAINING : False})

                                print(_accu)

                            except tf.errors.OutOfRangeError:
                                #验证完毕 继续训练
                                sess.run(epoch_add)

                                #跳出内循环（验证循环）
                                break
                    
                        #跳出外循环（训练循环）
                        break
                
        
    
    def predict(self):
        pass
    
    def conv(self , x , filter_height , filter_width , output_channel , stride_height , stride_width , name , padding='same'):
        
        return tf.layers.conv2d(x , output_channel , [filter_height , filter_width] , [stride_height , stride_width] , padding=padding ,
                             activation=tf.nn.relu , kernel_initializer = tf.initializers.random_normal(stddev=1e-2) ,
                             bias_initializer = tf.initializers.constant() , kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-5) ,
                             name=name , reuse=tf.AUTO_REUSE)
    
        #input_channel = x.get_shape().as_list()[-1]
        
        #with tf.variable_scope(name) as scope:
            #weights = tf.get_variable(name='weights' , shape=[filter_height , filter_width , input_channel , output_channel] , initializer=tf.random_normal_initializer(0.0 , 1e-2) , regularizer=tf.contrib.layers.l2_regularizer(scale=1e-5))
            #biases = tf.get_variable(name='biases' , shape=[output_channel] , initializer=tf.constant_initializer())
            #
            #conv = tf.nn.conv2d(x , weights , strides=[1 , stride_height , stride_width , 1] , padding=padding)
            #biases = tf.nn.bias_add(conv , biases)
            #
            #relu = tf.nn.relu(biases)
            
            #return relu
            
    #3*3 pooling
    def max_pooling(self , x , pooling_height , pooling_width , stride_height , stride_width  , name , padding='same'):
        #return tf.nn.max_pool(x , [1 , pooling_height , pooling_width , 1] , strides=[1 , stride_height , stride_width , 1] , padding=padding , name=name)
        
        return tf.layers.max_pooling2d(x , [pooling_height , pooling_width] , [stride_height , stride_width] , padding=padding , name=name)
    
    def fc(self , x , output_size , name):
        
        return tf.layers.dense(x , output_size , activation=tf.nn.relu , kernel_initializer=tf.initializers.random_normal(stddev=1e-2) ,
                               bias_initializer = tf.initializers.constant() , kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-5),
                               name = name , reuse=tf.AUTO_REUSE)
        
        #with tf.variable_scope(name) as scope:
        #    weights = tf.get_variable(name='weights' , shape=[input_size , output_size] , initializer=tf.random_normal_initializer())
        #    biases = tf.get_variable(name='biases' , shape=[output_size] , initializer=tf.constant_initializer())
        #    
        #    biases = tf.nn.bias_add(tf.matmul(x , weights) , biases)
        #    
        #    if relu:
        #        return tf.nn.relu(biases)
        #    else:
        #        return biases
        #
    
    #所有卷积层后紧跟的fc层变为卷积层方式
    def fcn(self , x , output_channel , name , padding='same'):
        if self.TYPE == 'accurate':
            return tf.layers.conv2d(x , output_channel , [6 , 6] , [1 , 1] , padding=padding ,
                             activation=tf.nn.relu , kernel_initializer = tf.initializers.random_normal(stddev=1e-2) ,
                             bias_initializer = tf.initializers.constant() , kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-5) ,
                             name=name , reuse=tf.AUTO_REUSE)
        else:
            return tf.layers.conv2d(x , output_channel , [5 , 5] , [1 , 1] , padding=padding ,
                             activation=tf.nn.relu , kernel_initializer = tf.initializers.random_normal(stddev=1e-2) ,
                             bias_initializer = tf.initializers.constant() , kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1e-5) ,
                             name=name , reuse=tf.AUTO_REUSE)
        
    def batch_norm(self , x , name):
        return tf.layers.batch_normalization(x , axis=-1 , training=self.IS_TRAINING , renorm=True , fused=True , name=name)

In [66]:
'''model fast'''
overfeat = OverFeat(iterator_train , iterator_val , 10)


In [67]:
overfeat.X.get_shape()

TensorShape([Dimension(None), Dimension(231), Dimension(231), Dimension(3)])

In [None]:
overfeat.train() #传入训练和验证的迭代器 需要进行sess.run()