# 定义网络模型，从npy读取vgg参数，进行预测。
对应相对路径vgg_mooc
模型参数npy去根目录找，避免重复。


In [None]:
#utils.py
# 读取图片的工具
from skimage import io,transform
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from pylab import mpl

#help(mpl.rcParams)#带验证的字典，验证函数已经定义并且和rc参数绑定好
mpl.rcParams['font.sans-serif'] = ['SimHei']#显示中文标签
mpl.rcParams['axes.unicode_minus'] = False #显示正负号

#读取图片，居中裁剪并缩放，打印显示三者
def load_image(path):
    fig = plt.figure('Centre and Resize')
    img = io.imread(path)
    #shape是611*711*3,RGB的，后边要把RGB转换一下。
    print(img.shape)
    ax0 = fig.add_subplot(131)
    ax0.set_xlabel(u'Original Picture')
    ax0.imshow(img)
    
    #宽和高取一个，比短边多出来的部分的一半(有一个是空)做起点，
    #其实就是，居中截一个正方形。
    short_edge = min(img.shape[:2])
    y = int((img.shape[0] - short_edge) / 2)
    x = int((img.shape[1] - short_edge) / 2)
    #print('x:',x,' y:',y) 
    crop_img = img[y:y+short_edge, x:x+short_edge]#裁剪，
    ax1 = fig.add_subplot(132)
    ax1.set_xlabel(u'Centre Picture')
    ax1.imshow(crop_img)
    
    re_img = transform.resize(crop_img, (224, 224))#缩放
    
    ax2 = fig.add_subplot(133)
    ax2.set_xlabel(u'Resize Picture')
    ax2.imshow(re_img)
    
    img_ready = re_img.reshape((1,224,224,3))#转成tf用的数据维度
    return img_ready
def percent(value):
    return '%.2f%%'%(value*100)
    
#测试    
load_image('pic/0.jpg')
plt.show()#手动show一下。

In [None]:
#vgg16.py
#重建网络结构并读取网络参数
import inspect
import os
import numpy as np
import tensorflow as tf
import time
import matplotlib.pyplot as plt

VGG_MEAN = [103.939, 116.779, 123.68]#手动设的平均值，这顺序已经不是RGB了。

class Vgg16():
    def __init__(self, vgg16_path = None):
        if vgg16_path is None:
            vgg16_path = os.path.join(os.getcwd(), '../../model_saved/vgg.npy')#相对路径
            self.data_dict = np.load(vgg16_path, encoding='latin1').item()#item()是全部读出，遍历。
    def net(self, images):
        print('buid model started')
        #怎么叫建立模型开始？这个不就是预测流程么？只有复制变量建立运算图的时间，不值得统计，可能只是为了证明直接load比训练要快吧。
        #因为init没有建立模型，只是读了字典，这是把散的变量堆成一个网络结构（计算图）所用时间。
        #forward还没执行sess.run，是两步，todo：测一下这个打印是会在哪一步发生？我觉得是第一步，一次搭建，多次sess.run()
        start_time = time.time()
        #预处理
        rgb_scaled = images * 255.0
        red, green, blue = tf.split(rgb_scaled, 3, 3)
        bgr = tf.concat([blue - VGG_MEAN[0], green - VGG_MEAN[1], red - VGG_MEAN[2]], 3)
        
        self.conv1_1 = self.conv_layer(bgr,'conv1_1')
        self.conv1_2 = self.conv_layer(self.conv1_1, 'conv1_2')
        self.pool1 = self.max_pool_2x2(self.conv1_2, 'pool1')
        
        self.conv2_1 = self.conv_layer(self.pool1, 'conv2_1')
        self.conv2_2 = self.conv_layer(self.conv2_1, 'conv2_2')
        self.pool2 = self.max_pool_2x2(self.conv2_2, 'pool2')
        
        self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
        self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
        self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
        self.pool3 = self.max_pool_2x2(self.conv3_3, "pool3")
        
        self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
        self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
        self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
        self.pool4 = self.max_pool_2x2(self.conv4_3, "pool4")
        
        self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
        self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
        self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
        self.pool5 = self.max_pool_2x2(self.conv5_3, "pool5")
        
        self.fc6 = self.fc_layer(self.pool5, 'fc6')
        self.relu6 = tf.nn.relu(self.fc6)
        self.fc7 = self.fc_layer(self.relu6, 'fc7')
        self.relu7 = tf.nn.relu(self.fc7)
        self.fc8 = self.fc_layer(self.relu7, 'fc8')
        self.prob = tf.nn.softmax(self.fc8, name = 'prob')#不return，直接取成员。
        
        end_time = time.time()
        print('time sonsuming:',end_time - start_time)
        self.data_dict = None#清空了。。。
        
        
    def conv_layer(self, x, name):
        w = self.get_weights(name)
        b = self.get_biases(name)
        #conv的ksize和strides维度不一致（比如，3,3,512,512），而pool一致(不过pool也不用weights，conv的ksize就是weights size）。
        conv = tf.nn.conv2d(x, w, [1,1,1,1], padding='SAME')
        conv = tf.nn.bias_add(conv, b)
        relu = tf.nn.relu(conv)
        return relu
        
    def get_weights(self, name):
        return tf.constant(self.data_dict[name][0], name = 'filter')
    
    def get_biases(self, name):
        return tf.constant(self.data_dict[name][1], name = 'biases')
    
    def max_pool_2x2(self, x, name):
        return tf.nn.max_pool(x, ksize=[1,2,2,1], strides = [1,2,2,1],padding="SAME",name=name)
        
    def fc_layer(self, x, name):#relu没放在内部，因为最后一层也算fc，但是不需要relu。
        with tf.variable_scope(name):
            dims = x.get_shape().as_list()
            dim = 1
            for i in dims[1:]:#集成了一个flatten操作，兼容fc之间和conv到fc过渡两种形态。
                dim *= i
            x = tf.reshape(x, [-1,dim])

            w = self.get_fc_weights(name)#self.data_dict[name][0]
            b = self.get_biases(name)#self.data_dict[name][1]
            wx_plus_b = tf.nn.bias_add(tf.matmul(x, w),b)
            return wx_plus_b
    
    def get_fc_weights(self, name):#只是起名不一样。我觉得起名不影响读变量吧？从dict拿的时候给对key就行了吧。todo:替换了试一下。
        return tf.constant(self.data_dict[name][0],name = 'weights')
   

In [None]:
#app.py
# 应用网络结构做出预测
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from Nclasses import labels
img_ready = load_image('pic/0.jpg') 

fig=plt.figure(u"Top-5 预测结果") 

with tf.Session() as sess:
    images = tf.placeholder(tf.float32, [1, 224, 224, 3])
    vgg = Vgg16() 
    vgg.net(images) ##既然有dict清空操作，第二遍会怎样？当然是完蛋了！
    probability = sess.run(vgg.prob, feed_dict={images:img_ready})
    print(type(probablity))
    print(len(probablity))
    top5 = np.argsort(probability[0])[-1:-6:-1]#probablity取[0]是取第一个数据，实际也只有一个数据
    print("top5:",top5)#排名前五的分类下标
    values = []
    bar_label = []
    for n, i in enumerate(top5): #i是分类下标
        print("n:",n)
        print("i:",i)
        values.append(probability[0][i]) #取这些概率值
        bar_label.append(labels[i]) #添加对应的真正标签字符串
        print( i, ":", labels[i], "----", percent(probability[0][i]) )
        
    ax = fig.add_subplot(111) 
    ax.bar(range(len(values)), values, tick_label=bar_label, width=0.5, fc='g')
    ax.set_ylabel(u'probabilityit') 
    ax.set_title(u'Top-5') 
    for a,b in zip(range(len(values)), values):
        ax.text(a, b+0.0005, percent(b), ha='center', va = 'bottom', fontsize=7)   
    plt.show() 


(611, 711, 3)


  warn("The default mode, 'constant', will be changed to 'reflect' in "


buid model started
time sonsuming: 1.0472784042358398


In [1]:
pwd

'/home/qw/Documents/tf_demo'

# 测试区，测试代码

In [15]:
#ndarray可没有.mode
#验证一下图片模式是否RGB
from PIL import Image
img = Image.open('pic/0.jpg')
print(img.mode)

RGB


In [33]:
ls '../../model_saved'

[0m[01;32mvgg.npy[0m*


In [56]:
import os
vgg16_path = os.path.join(os.getcwd(), '../../model_saved/vgg.npy')#相对路径
# data = np.load(vgg16_path, encoding='latin1')
# print(type(data))#<class 'numpy.ndarray'>
# print(data.shape)#()
# print(data.size)#1

#item()迭代，读出所有变量
#以字典形式存在，每个层名对应两个list，weights和biases，conv和fc全是如此。

data_dict = np.load(vgg16_path, encoding='latin1').item()
print(type(data_dict))#<class 'dict'>
print(len(data_dict))#16
print(type(data_dict['conv5_1']))#16
print(len(data_dict['conv5_1']))#每个key对应的value是两个数组，一个weights，一个biases
print(type(data_dict['conv5_1'][0]))
# print(len(data_dict['conv5_1'][0]))#len=3，不是3个，看下边的shape比较清晰。
print(data_dict['conv5_1'][0].shape)#3,3,512,512，出入通道512,卷积核3*3
print(data_dict['conv5_1'][1].shape)#(512,)
print('biases:',type(data_dict['conv5_1'][1]))
# print('biases:',len(data_dict['conv5_1'][1]))#512
print('biases:',data_dict['conv5_1'][1].shape)#512
print((data_dict['conv5_1'][0][0].shape))
print((data_dict['conv5_1'][0][1].shape))
print(data_dict['fc6'][0].shape)#
print(data_dict['fc6'][1].shape)#

<class 'dict'>
16
<class 'list'>
2
<class 'numpy.ndarray'>
(3, 3, 512, 512)
(512,)
biases: <class 'numpy.ndarray'>
biases: (512,)
(3, 512, 512)
(3, 512, 512)
(25088, 4096)
(4096,)


In [51]:
help(np.ndarray.shape)

Help on getset descriptor numpy.ndarray.shape:

shape
    Tuple of array dimensions.
    
    Notes
    -----
    May be used to "reshape" the array, as long as this would not
    require a change in the total number of elements
    
    Examples
    --------
    >>> x = np.array([1, 2, 3, 4])
    >>> x.shape
    (4,)
    >>> y = np.zeros((2, 3, 4))
    >>> y.shape
    (2, 3, 4)
    >>> y.shape = (3, 8)
    >>> y
    array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
           [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
           [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])
    >>> y.shape = (3, 6)
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    ValueError: total size of new array must be unchanged



In [13]:
#softmax输出不是二维，什么输出是需要用[0]取一下来着？
#本例softmax有一个[0]操作，是因为batch需要占一个维度，虽然只有一个图像，那个维度还是要有。
import tensorflow as tf
# help(tf.nn.softmax)
a = tf.Variable([[1.,2.,2.],[2.,3.,4.]])
soft = tf.nn.softmax(a)
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    print(sess.run(a))
    print(sess.run(soft))
    print(sess.run(soft)[0])#

[[ 1.  2.  2.]
 [ 2.  3.  4.]]
[[ 0.15536241  0.42231882  0.42231882]
 [ 0.09003057  0.24472848  0.66524094]]
[ 0.15536241  0.42231882  0.42231882]


# 附件