In [1]:
import numpy as np

import tensorflow as tf

import os

from keras.utils import to_categorical

from collections import defaultdict

import scipy.io as sio

from PIL import Image

import cv2

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
data_dir = list()

for img in os.listdir('../../tensorflow2/dataset/102flowers/jpg/'):
    data_dir.append(os.path.join('../../tensorflow2/dataset/102flowers/jpg/' , img))

data_dir.sort()

data_dir = np.array(data_dir)

In [3]:
labels = sio.loadmat('../../tensorflow2/dataset/imagelabels.mat')

setid = sio.loadmat('../../tensorflow2/dataset/setid.mat')

In [4]:
labels = labels['labels'][0]-1

In [5]:
trnid = np.array(setid['tstid'][0]) - 1 #train index
tstid = np.array(setid['trnid'][0]) - 1 #test index
valid = np.array(setid['valid'][0]) - 1 #val index

In [6]:
num_classes = 102 #102种花

In [7]:
train_num = len(trnid)
test_num = len(tstid)

In [8]:
def next_batch(batch_size = 128 , is_training = True):
    data = []
    _labels = []
    
    if is_training:
        #训练数据随机索引
        shuffle_idx = np.random.randint(low=0 , high=train_num , size=batch_size)
        
        for i in trnid[shuffle_idx]:
            img = cv2.imread(data_dir[i])
            img = cv2.resize(img , (224,224))
            img = img/127.5-1.0
            
            label = labels[i]
            
            data.append(img)
            _labels.append(label)
            
        return np.array(data) , to_categorical(np.array(_labels) , num_classes=num_classes)
            
    else:
        #验证数据随机索引
        shuffle_idx = np.random.randint(low=0 , high=test_num , size=batch_size)
        
        for i in tstid[shuffle_idx]:
            img = cv2.imread(data_dir[i])
            img = cv2.resize(img , (224,224))
            img = img/127.5-1.0
            
            label = labels[i]
            
            data.append(img)
            _labels.append(label)
            
        return np.array(data) , to_categorical(np.array(_labels) , num_classes=num_classes)
    

In [9]:
def spatial_pyramid_pool(conv5 , pyramid_bins):
    batch_size = conv5.get_shape().as_list()[0] #batch_size
    conv5_height = conv5.get_shape().as_list()[1] #feature map height
    conv5_width = conv5.get_shape().as_list()[2] #feature map width

    for i in range(len(pyramid_bins)):
        pooling_height = np.ceil(conv5_height / pyramid_bins[i])
        stride_height = np.ceil(conv5_height / pyramid_bins[i]) #floor
        
        pooling_width = np.ceil(conv5_width / pyramid_bins[i])
        stride_width = np.ceil(conv5_width / pyramid_bins[i]) #floor
        
        padding_height = int(pyramid_bins[i] * pooling_height - conv5_height)
        padding_width = int(pyramid_bins[i] * pooling_width - conv5_width)
        
        conv5_padding = tf.pad(conv5 , tf.constant([[0,0] , [0,padding_height] , [0,padding_width] ,[0,0]]))
        
        #max_pooling = tf.layers.max_pooling2d(conv5_padding , [pooling_height , pooling_width] , [stride_height , stride_width] , padding='same')
        max_pooling = tf.nn.max_pool(conv5_padding , ksize=[1,pooling_height,pooling_width,1] , strides=[1,stride_height,stride_width,1] , padding='SAME')
        
        if i==0:
            spp = tf.reshape(max_pooling , shape=(batch_size , -1))
        else:
            spp = tf.concat(values=[spp , tf.reshape(max_pooling , shape=(batch_size , -1)) ] , axis=-1)
            
    return spp

In [10]:
#获取预训练参数
net_data = np.load('bvlc_alexnet.npy' , encoding='bytes').item() #不加encoding='bytes' 死机
#爆内存 不使用
conv1w = tf.Variable(net_data["conv1"][0] , trainable=False) #11*11
conv1b = tf.Variable(net_data["conv1"][1] , trainable=False)

conv2w = tf.Variable(net_data["conv2"][0] , trainable=False) #5*5
conv2b = tf.Variable(net_data["conv2"][1] , trainable=False)

conv3w = tf.Variable(net_data["conv3"][0] , trainable=False) #3*3
conv3b = tf.Variable(net_data["conv3"][1] , trainable=False)

conv4w = tf.Variable(net_data["conv4"][0] , trainable=False) #3*3
conv4b = tf.Variable(net_data["conv4"][1] , trainable=False)

conv5w = tf.Variable(net_data["conv5"][0] , trainable=False) #3*3
conv5b = tf.Variable(net_data["conv5"][1] , trainable=False)

fc6w = tf.Variable(tf.truncated_normal(shape=((8**2+6**2+4**2)*256 , 1024) , stddev=1e-2))
fc6b = tf.Variable(tf.constant(0.1 , shape=[1024]))
#fc6b = tf.Variable(net_data['fc6'][1])

fc7w = tf.Variable(tf.truncated_normal(shape=(1024 , num_classes) , stddev=1e-2))
fc7b = tf.Variable(tf.constant(0.1 , shape=[num_classes]))

#下面不敢使用 爆显存
#fc7w = tf.Variable(net_data['fc7'][0])
#fc7b = tf.Variable(net_data['fc7'][1])

#fc8w = tf.Variable(tf.truncated_normal(shape=(4096 , num_classes) , stddev=1e-2))
#fc8b = tf.Variable(tf.constant(0.1 , shape=[num_classes]))




In [11]:
def group_conv(x , kernel , strides):
    group_x = tf.split(x , num_or_size_splits=2 , axis=3)
    group_kernel = tf.split(kernel , num_or_size_splits=2 , axis=3)
    
    group_conv0 = tf.nn.conv2d(group_x[0] , group_kernel[0] , strides=strides , padding='SAME')
    group_conv1 = tf.nn.conv2d(group_x[1] , group_kernel[1] , strides=strides , padding='SAME')
    
    group_conv = tf.concat((group_conv0 , group_conv1) , axis=3)
    
    return group_conv

In [9]:
keep_prob = tf.placeholder(dtype=tf.float32)
pyramid_bins = [8,6,4] #spp中使用3种格子

def model_finetune(x):
    conv1 = tf.nn.conv2d(x , conv1w , strides=(1,4,4,1) , padding='SAME')
    conv1 = tf.nn.bias_add(conv1 , conv1b)
    conv1 = tf.nn.relu(conv1)
    lrn1 = tf.nn.local_response_normalization(conv1 , depth_radius=5 , alpha=0.0001 , beta=0.75 , bias=1.0)
    maxpool1 = tf.nn.max_pool(lrn1 , ksize=(1,3,3,1) , strides=(1,2,2,1) , padding='VALID')
    
    conv2 = group_conv(maxpool1 , conv2w , strides=(1,1,1,1))
    conv2 = tf.nn.bias_add(conv2 , conv2b)
    conv2 = tf.nn.relu(conv2)
    lrn2 = tf.nn.local_response_normalization(conv2 , depth_radius=5 , alpha=0.0001 , beta=0.75 , bias=1.0)
    maxpool2 = tf.nn.max_pool(lrn2 , ksize=(1,3,3,1) , strides=(1,2,2,1) , padding='VALID')
    
    conv3 = tf.nn.conv2d(maxpool2 , conv3w , strides=(1,1,1,1) , padding='SAME')
    conv3 = tf.nn.bias_add(conv3 , conv3b)
    conv3 = tf.nn.relu(conv3)

    conv4 = group_conv(conv3 , conv4w , strides=(1,1,1,1))
    conv4 = tf.nn.bias_add(conv4 , conv4b)
    conv4 = tf.nn.relu(conv4)

    conv5 = group_conv(conv4 , conv5w , strides=(1,1,1,1))
    conv5 = tf.nn.bias_add(conv5 , conv5b)
    conv5 = tf.nn.relu(conv5)

    maxpool5 = spatial_pyramid_pool(conv5 , pyramid_bins)
    
    # print(maxpool5.get_shape().as_list()) #debug
    
    fc6 = tf.nn.relu_layer(maxpool5 , fc6w , fc6b)
    fc6 = tf.nn.dropout(fc6 , keep_prob)
    
    fc7 = tf.nn.xw_plus_b(fc6 , fc7w , fc7b)
    
    return fc7
    
    #下面的不敢使用 爆显存
    #fc7 = tf.nn.relu_layer(fc6 , fc7w , fc7b)
    #fc7 = tf.nn.dropout(fc7 , keep_prob)
    #
    #fc8 = tf.nn.xw_plus_b(fc7 , fc8w , fc8b) #需要softmax激活
    
    #return fc8

In [10]:
def model(x):
    conv1 = tf.layers.conv2d(x , filters=96 , kernel_size=(11,11) , strides=(4,4) , padding='same' , activation=tf.nn.relu,
                     kernel_initializer = tf.initializers.truncated_normal(stddev=1e-2),
                     bias_initializer = tf.initializers.constant(),
                            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005),
                            bias_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005))
    lrn1 = tf.nn.local_response_normalization(conv1 , depth_radius=5 , alpha=0.0001 , beta=0.75 , bias=1.0)
    maxpool1 = tf.layers.max_pooling2d(lrn1 , pool_size=(3,3) , strides=(2,2) , padding='valid')
    
    conv2 = tf.layers.conv2d(maxpool1 , filters=256 , kernel_size=(5,5) , strides=(1,1) , padding='same' , activation=tf.nn.relu,
                     kernel_initializer = tf.initializers.truncated_normal(stddev=1e-2),
                     bias_initializer = tf.initializers.constant(),
                            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005),
                            bias_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005))
    lrn2 = tf.nn.local_response_normalization(conv2 , depth_radius=5 , alpha=0.0001 , beta=0.75 , bias=1.0)
    maxpool2 = tf.layers.max_pooling2d(lrn2 , pool_size=(3,3) , strides=(2,2) , padding='valid')
    
    conv3 = tf.layers.conv2d(maxpool2 , filters=384 , kernel_size=(3,3) , strides=(1,1) , padding='same' , activation=tf.nn.relu,
                     kernel_initializer = tf.initializers.truncated_normal(stddev=1e-2),
                     bias_initializer = tf.initializers.constant(),
                            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005),
                            bias_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005))
    
    conv4 = tf.layers.conv2d(conv3 , filters=384 , kernel_size=(3,3) , strides=(1,1) , padding='same' , activation=tf.nn.relu,
                     kernel_initializer = tf.initializers.truncated_normal(stddev=1e-2),
                     bias_initializer = tf.initializers.constant(),
                            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005),
                            bias_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005))
    
    conv5 = tf.layers.conv2d(conv4 , filters=256 , kernel_size=(3,3) , strides=(1,1) , padding='same' , activation=tf.nn.relu,
                     kernel_initializer = tf.initializers.truncated_normal(stddev=1e-2),
                     bias_initializer = tf.initializers.constant(),
                            kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005),
                            bias_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005))
    
    maxpool5 = spatial_pyramid_pool(conv5 , pyramid_bins)
    
    fc6 = tf.layers.dense(maxpool5 , units=1024 , activation=tf.nn.relu,
                          kernel_initializer=tf.initializers.random_normal(stddev=1e-2),
                          bias_initializer=tf.initializers.constant(),
                          kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005),
                          bias_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005))
    fc6 = tf.layers.dropout(fc6 , keep_prob)
    
    fc7 = tf.layers.dense(fc6 , units=1024 , activation=tf.nn.relu,
                          kernel_initializer=tf.initializers.random_normal(stddev=1e-2),
                          bias_initializer=tf.initializers.constant(),
                          kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005),
                          bias_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005))
    fc7 = tf.layers.dropout(fc7 , keep_prob)
    
    fc8 = tf.layers.dense(fc7 , units=num_classes,
                          kernel_initializer=tf.initializers.random_normal(stddev=1e-2),
                          bias_initializer=tf.initializers.constant(),
                          kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005),
                          bias_regularizer=tf.contrib.layers.l2_regularizer(scale=0.00005))
    
    return fc8

In [15]:
batch_size = 128

In [22]:
#构建计算图
x = tf.placeholder(dtype=tf.float32 , shape=[batch_size , 224,224,3])
y_ = tf.placeholder(dtype=tf.float32 , shape=[batch_size , num_classes])

#logits = model(x)

logits = model_finetune(x)

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_ , logits=logits))

global_step = tf.Variable(0 , trainable=False)
learning_rate = tf.train.exponential_decay(0.1 , global_step , decay_steps=1000 , decay_rate=0.9 , staircase=True)

#train_step = tf.train.AdagradOptimizer(learning_rate).minimize(loss)
train_step = tf.train.AdamOptimizer().minimize(loss)

#验证
#softmax_logits = tf.nn.softmax(logits)
#correct_count = tf.equal(tf.argmax(softmax_logits , axis=1) , tf.argmax(y_ , 1))

correct_count = tf.equal(tf.argmax(logits , axis=1) , tf.argmax(y_ , 1))

accu = tf.reduce_mean(tf.cast(correct_count , tf.float32))

In [24]:
#能收敛了 accu上升
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for it in range(10): #epoch=80

        cnt_tmp = 0

        for i in range(10):
            it=it+1
            
            xtrain , ytrain = next_batch(batch_size=batch_size)

            _ , train_accu , cost = sess.run([train_step , accu , loss] , feed_dict={x : xtrain , y_ : ytrain , keep_prob:1.0})

            print('%d loss:%f accu:%f' % (it , cost , train_accu))

            if train_accu>0.95:
                cnt_tmp = cnt_tmp+1

            if cnt_tmp>10:
                break


1 loss:4.627480 accu:0.000000
2 loss:6.562114 accu:0.078125
3 loss:5.553918 accu:0.046875
4 loss:5.020390 accu:0.148438
5 loss:5.026891 accu:0.039062
6 loss:4.769906 accu:0.093750
7 loss:4.351510 accu:0.085938
8 loss:4.173891 accu:0.125000
9 loss:4.181258 accu:0.171875
10 loss:4.125919 accu:0.132812
2 loss:3.993456 accu:0.179688
3 loss:3.896168 accu:0.187500
4 loss:4.020677 accu:0.125000
5 loss:3.696506 accu:0.125000
6 loss:3.691297 accu:0.195312
7 loss:3.560419 accu:0.265625
8 loss:3.554865 accu:0.320312
9 loss:3.402827 accu:0.296875
10 loss:3.438694 accu:0.242188
11 loss:3.341337 accu:0.281250
3 loss:3.136966 accu:0.390625
4 loss:3.224842 accu:0.375000
5 loss:2.925241 accu:0.312500
6 loss:2.844578 accu:0.312500
7 loss:3.116645 accu:0.296875
8 loss:2.655392 accu:0.406250
9 loss:2.758189 accu:0.484375
10 loss:2.810175 accu:0.382812
11 loss:2.527923 accu:0.453125
12 loss:2.572539 accu:0.445312
4 loss:2.431806 accu:0.460938
5 loss:2.255583 accu:0.507812
6 loss:2.216383 accu:0.445312
7 lo