# Cat and Dog image classification

In [1]:
import numpy as np
from os import listdir
from random import shuffle
from PIL import Image


image_path = ["./dataset/cat_dog/cat", "./dataset/cat_dog/dog"]
#image_path = ["./data/A", "./data/B"]

def preprocess_to_data_file(data_dir_list, ratio=0.8):

    total_list = [] 
   
    '''
        create all images into (image path, label) format and write to total_data.txt 
    ''' 
    with open('total_data.txt', 'w') as f:
        for index, data_dir in enumerate(data_dir_list):
            for filename in listdir(data_dir):
                #print("{} {}".format(data_dir_list[index]+'/'+filename, index))
                f.write('{} {}\n'.format(data_dir_list[index]+'/'+filename.replace(' ',''), index))
                total_list.append(data_dir_list[index]+'/'+filename.replace(' ','')+' '+str(index))

    #print(total_list)
    '''
        shuffle total_list
    ''' 
    shuffle(total_list)

    '''
        split total_list to train_list and test_list. 
        write train_list/test_list into train_data.txt/test_data.txt
    ''' 
    train_list = total_list[:int(ratio*len(total_list))]
    test_list = total_list[int(ratio*len(total_list)):]

    with open('train_data.txt', 'w') as f:
        for i in train_list:
            f.write(i+'\n')

    with open('test_data.txt', 'w') as f:
        for i in test_list:
            f.write(i+'\n')



preprocess_to_data_file(image_path, 0.8)


In [2]:
import numpy as np
from random import shuffle
from PIL import Image
import tensorflow as tf
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

def one_hot_encoding(label):

    '''
    change label id to one-hot encoding
    '''

    values = np.asarray(label)
    n_class = np.max(values) + 1
    encoding_result = np.eye(n_class)[values]
    return encoding_result 



def load_data(file_path):

    '''
    load all of training data
    '''

    with open(file_path, "r") as lines:
        data_list = []
        for line in lines:
            data_list.append(line.replace('\n',''))

        shuffle(data_list) 

    data_path = []
    data_label = []
    for data in data_list:
        data_path.append(data.split(' ')[0])
        data_label.append(int(data.split(' ')[1]))

    return data_path, data_label


def load_batch_data(data_path, labels):
    

    batch_data = []
    for index, im in enumerate(data_path): 
        raw_image = Image.open(im)
        ### please notice this line if image have already been normalized 
        resize_image = raw_image.resize((200, 100))
        normalized_image = np.asarray(resize_image)/255.0
        batch_data.append(normalized_image)

    batch_label = one_hot_encoding(labels)


    batch_data = np.asarray(batch_data, np.float32)
    batch_label = np.asarray(batch_label, np.float32)
    return batch_data, batch_label



print('loading image path......')
train_data, train_label = load_data("train_data.txt")
test_data, test_label = load_data("test_data.txt")


print('number of train image is {}'.format(len(train_data)))
print('number of test image is {}'.format(len(test_data)))


'''
set network parameters 

'''
# 162*212
image_size_width = 200
image_size_height = 100
num_labels = 2 # cat and dog
num_channels = 3 # RGB
batch_size = 256
kernel_size = 3
num_steps = 8001

'''
create CNN model
'''

x = tf.placeholder(tf.float32, [None, image_size_height, image_size_width, num_channels])
y = tf.placeholder(tf.float32, [None, num_labels])

# initial variables
layer1_weights = tf.Variable(tf.truncated_normal([kernel_size, kernel_size, num_channels, 32], stddev=0.1))
layer1_biases = tf.Variable(tf.zeros([32]))
layer2_weights = tf.Variable(tf.truncated_normal([kernel_size, kernel_size, 32, 64], stddev=0.1))
layer2_biases = tf.Variable(tf.constant(1.0, shape=[64]))
#layer3_weights = tf.Variable(tf.truncated_normal([kernel_size, kernel_size, 64, 128], stddev=0.1))
#layer3_biases = tf.Variable(tf.constant(1.0, shape=[128]))

layer4_weights = tf.Variable(tf.truncated_normal([5824, 1024], stddev=0.1))
layer4_biases = tf.Variable(tf.constant(1.0, shape=[1024]))
layer5_weights = tf.Variable(tf.truncated_normal([1024, num_labels], stddev=0.1))
layer5_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))

# CNN model detail
def model(input_image):
    conv1 = tf.nn.conv2d(input_image, layer1_weights, [1, 2, 2, 1], padding='SAME')
    hidden1 = tf.nn.relu(conv1 + layer1_biases)
    pool1 = tf.nn.max_pool(hidden1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

    conv2 = tf.nn.conv2d(pool1, layer2_weights, [1, 2, 2, 1], padding='SAME')
    hidden2 = tf.nn.relu(conv2 + layer2_biases)
    pool2 = tf.nn.max_pool(hidden2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#    conv3 = tf.nn.conv2d(hidden2, layer3_weights, [1, 2, 2, 1], padding='SAME')
#    hidden3 = tf.nn.relu(conv3 + layer3_biases)
    shape = pool2.get_shape().as_list()

    reshape = tf.reshape(pool2, [-1, shape[1] * shape[2] * shape[3]])

    hidden = tf.nn.relu(tf.matmul(reshape, layer4_weights) + layer4_biases)
    return tf.matmul(hidden, layer5_weights) + layer5_biases

# build model
logits = model(x)

# define cost
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits))

# optimization
optimizer = tf.train.AdamOptimizer(1e-4).minimize(loss)
# show prediction result
prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))

saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    print('start training......')
    for step in range(num_steps):
        offset = (step * batch_size) % (len(train_data) - batch_size)
        batch_data_path = train_data[offset:(offset + batch_size)]
        batch_label_path = train_label[offset:(offset + batch_size)]

        train_batch_data, train_batch_labels = load_batch_data(batch_data_path, batch_label_path)


        feed_dict = { x: train_batch_data, y: train_batch_labels}
        _, l, train_accuracy_ = sess.run([optimizer, loss, accuracy], feed_dict=feed_dict)


        if (step % 100 == 0):
            saver.save(sess, "train_model/model.ckpt")
            print('step={}, loss={}, accuracy={}'.format(step, l, train_accuracy_))
            test_batch_data, test_batch_labels = load_batch_data(test_data[:2000], test_label[:2000])
            feed_dict = { x: test_batch_data, y: test_batch_labels}
            test_accuracy_ = sess.run(accuracy, feed_dict=feed_dict)
            print('test accuracy = {}'.format(test_accuracy_))
    
    #print('start testing......')


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


loading image path......
number of train image is 20000
number of test image is 5000
start training......
step=0, loss=3.7011141777038574, accuracy=0.51953125
test accuracy = 0.492000013589859
step=100, loss=0.7636332511901855, accuracy=0.5859375
test accuracy = 0.5440000295639038
step=200, loss=0.7217737436294556, accuracy=0.56640625
test accuracy = 0.5404999852180481
step=300, loss=0.6706728339195251, accuracy=0.64453125
test accuracy = 0.6144999861717224
step=400, loss=0.6264533400535583, accuracy=0.66015625
test accuracy = 0.621999979019165
step=500, loss=0.7437740564346313, accuracy=0.6171875
test accuracy = 0.5740000009536743
step=600, loss=0.7252986431121826, accuracy=0.6015625
test accuracy = 0.6225000023841858


KeyboardInterrupt: 

# use pb file to store model

In [5]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.python.framework import graph_util

mnist = input_data.read_data_sets('MNIST_data', one_hot=True)


#定义输入数据mnist图片大小28*28*1=784,None表示batch_size
x = tf.placeholder(dtype=tf.float32,shape=[None,28*28],name="input")
#定义标签数据,mnist共10类
y_ = tf.placeholder(dtype=tf.float32,shape=[None,10],name="y_")
#将数据调整为二维数据，w*H*c---> 28*28*1,-1表示N张
image = tf.reshape(x,shape=[-1,28,28,1])

#第一层，卷积核={5*5*1*32}，池化核={2*2*1,1*2*2*1}
w1 = tf.Variable(initial_value=tf.random_normal(shape=[5,5,1,32],stddev=0.1,dtype=tf.float32,name="w1"))
b1= tf.Variable(initial_value=tf.zeros(shape=[32]))
conv1 = tf.nn.conv2d(input=image,filter=w1,strides=[1,1,1,1],padding="SAME",name="conv1")
relu1 = tf.nn.relu(tf.nn.bias_add(conv1,b1),name="relu1")
pool1 = tf.nn.max_pool(value=relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")
#shape={None，14,14,32}
#第二层，卷积核={5*5*32*64}，池化核={2*2*1,1*2*2*1}
w2 = tf.Variable(initial_value=tf.random_normal(shape=[5,5,32,64],stddev=0.1,dtype=tf.float32,name="w2"))
b2 = tf.Variable(initial_value=tf.zeros(shape=[64]))
conv2 = tf.nn.conv2d(input=pool1,filter=w2,strides=[1,1,1,1],padding="SAME")
relu2 = tf.nn.relu(tf.nn.bias_add(conv2,b2),name="relu2")
pool2 = tf.nn.max_pool(value=relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME",name="pool2")
#shape={None，7,7,64}
#FC1
w3 = tf.Variable(initial_value=tf.random_normal(shape=[7*7*64,1024],stddev=0.1,dtype=tf.float32,name="w3"))
b3 = tf.Variable(initial_value=tf.zeros(shape=[1024]))
#关键，进行reshape
input3 = tf.reshape(pool2,shape=[-1,7*7*64],name="input3")
fc1 = tf.nn.relu(tf.nn.bias_add(value=tf.matmul(input3,w3),bias=b3),name="fc1")
#shape={None，1024}
#FC2
w4 = tf.Variable(initial_value=tf.random_normal(shape=[1024,10],stddev=0.1,dtype=tf.float32,name="w4"))
b4 = tf.Variable(initial_value=tf.zeros(shape=[10]))
fc2 = tf.nn.bias_add(value=tf.matmul(fc1,w4),bias=b4)
#shape={None，10}
#定义交叉熵损失
# 使用softmax将NN计算输出值表示为概率
y = tf.nn.softmax(fc2,name="out")

# 定义交叉熵损失函数
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=fc2,labels=y_)
loss = tf.reduce_mean(cross_entropy)
#定义solver
train = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss=loss)

#定义正确值,判断二者下标index是否相等
correct_predict = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
#定义如何计算准确率
accuracy = tf.reduce_mean(tf.cast(correct_predict,dtype=tf.float32),name="accuracy")


#训练NN
with tf.Session() as session:
    session.run(tf.global_variables_initializer())
    for i in range(0,1000):
        xs, ys = mnist.train.next_batch(100)
        session.run(fetches=train,feed_dict={x:xs,y_:ys})
        if i%100 == 0:
            train_accuracy = session.run(fetches=accuracy,feed_dict={x:xs,y_:ys})
            print(i,"accuracy=",train_accuracy)
    #训练完成后，将网络中的权值转化为常量，形成常量graph
    constant_graph = graph_util.convert_variables_to_constants(sess=session,
                                                            input_graph_def=session.graph_def,
                                                            output_node_names=['out'])
    #将带权值的graph序列化，写成pb文件存储起来
    with tf.gfile.FastGFile("lenet.pb", mode='wb') as f:
        f.write(constant_graph.SerializeToString())

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

0 accuracy= 0.05
100 accuracy= 0.88
200 accuracy= 0.92
300 accuracy= 0.96
400 accuracy= 0.99
500 accuracy= 0.9

In [9]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

mnist = input_data.read_data_sets('MNIST_data',one_hot=True)
pb_path = "lenet.pb"

#导入pb文件到graph中
with tf.gfile.FastGFile(pb_path,'rb') as f:
    # 复制定义好的计算图到新的图中，先创建一个空的图.
    graph_def = tf.GraphDef()
    # 加载proto-buf中的模型
    graph_def.ParseFromString(f.read())
    # 最后复制pre-def图的到默认图中.
    _ = tf.import_graph_def(graph_def, name='')
    
with tf.Session() as session:
    #获取输入tensor
    session.run(tf.global_variables_initializer())
    input_x = tf.get_default_graph().get_tensor_by_name("input:0")
    #获取预测tensor
    output = tf.get_default_graph().get_tensor_by_name("out:0")
    #取第100张图片测试
    one_image = np.reshape(mnist.test.images[100], [-1, 784])
    #将测试图片传入nn中，做inference
    out = session.run(output,feed_dict={input_x:one_image})
    pre_label = np.argmax(out,1)
    print("pre_label=",pre_label)
    print('true label:', np.argmax(mnist.test.labels[100],0))

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
pre_label= [6]
true label: 6
