In [1]:
import tensorflow as tf
import os
import re
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from matplotlib.image import imread
from PIL import Image
from random import shuffle
%matplotlib inline

  from ._conv import register_converters as _register_converters


In [2]:
cat = [1, 0]
dog = [0, 1]
SIZE = 50

keepRate = 0.8
LR = 0.05
epochs = 20
batchSize = 10

TRAIN_DIR = os.path.join(os.getcwd(), 'train')
TEST_DIR = os.path.join(os.getcwd(), 'test')

In [3]:
def label_img(img):
    word_label = img.split('.')[-3]
    if word_label == 'cat': return cat
    elif word_label == 'dog': return dog
    
def resize_data():
    for img in os.listdir(TRAIN_DIR):
        path = os.path.join(TRAIN_DIR, img)
        img = Image.open(path)
        img = img.resize((SIZE, SIZE), PIL.Image.ANTIALIAS)
        img.save(path)
        
    for img in os.listdir(TEST_DIR):
        path = os.path.join(TEST_DIR, img)
        img = Image.open(path)
        img = img.resize((SIZE, SIZE), PIL.Image.ANTIALIAS)
        img.save(path)

def prep_data():
    train_data_imgs = []
    train_data_lbls = []
    
    for img in os.listdir(TRAIN_DIR):
        label = label_img(img)
        path = os.path.join(TRAIN_DIR, img)
        img = imread(path)
        train_data_imgs.append(np.array(img))
        train_data_lbls.append(np.array(label))
        
    test_data = []
    for img in os.listdir(TEST_DIR):
        path = os.path.join(TEST_DIR, img)
        img = imread(path)
        test_data.append(img)
        
    return train_data_imgs, train_data_lbls, test_data

In [4]:
# resize_data()

In [5]:
# train_data_imgs, train_data_lbls, test_data = prep_data()

In [6]:
# print(np.array(train_data_imgs).shape)
# print(np.array(train_data_lbls).shape)
# print(np.array(test_data).shape)

In [7]:
# np.save('train_data_imgs.npy', train_data_imgs)
# np.save('train_data_lbls', train_data_lbls)
# np.save('test_data.npy', test_data)

train_data_imgs = np.load('train_data_imgs.npy')
train_data_lbls = np.load('train_data_lbls.npy')
test_data = np.load('test_data.npy')

In [8]:
print(np.array(train_data_imgs).shape)
print(np.array(train_data_lbls).shape)
print(np.array(test_data).shape)

(25000, 50, 50, 3)
(25000, 2)
(12500, 50, 50, 3)


In [9]:
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def maxpool2d(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [10]:
x = tf.placeholder(tf.float32, shape=[None, SIZE, SIZE, 3])
y = tf.placeholder(tf.float32, shape=[None, 2])

red = 13 # reduced: reduced img size
tout = 64 # temp_out: number of output filters of last convolution layer

weights = {
    'Wconv1': tf.Variable(tf.random_normal([5, 5, 3, 32])),
    'Wconv2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
#     'Wconv3': tf.Variable(tf.random_normal([5, 5, 64, 128])),
#     'Wconv4': tf.Variable(tf.random_normal([5, 5, 128, 64])),
#     'Wconv5': tf.Variable(tf.random_normal([5, 5, 64, 32])),
    'Wfc': tf.Variable(tf.random_normal([red*red*tout, 1024])),
    'Wout': tf.Variable(tf.random_normal([1024, 2]))
}

biases = {
    'bconv1': tf.Variable(tf.zeros([32])),
    'bconv2': tf.Variable(tf.zeros([64])),
#     'bconv3': tf.Variable(tf.zeros([128])),
#     'bconv4': tf.Variable(tf.zeros([64])),
#     'bconv5': tf.Variable(tf.zeros([32])),
    'bfc': tf.Variable(tf.zeros([1024])),
    'bout': tf.Variable(tf.zeros([2]))
}

convnet = tf.nn.relu(conv2d(x, weights['Wconv1']) + biases['bconv1'])
convnet = maxpool2d(convnet)

convnet = tf.nn.relu(conv2d(convnet, weights['Wconv2']) + biases['bconv2'])
convnet = maxpool2d(convnet)

# convnet = tf.nn.relu(conv2d(convnet, weights['Wconv3']) + biases['bconv3'])
# convnet = maxpool2d(convnet)

# convnet = tf.nn.relu(conv2d(convnet, weights['Wconv4']) + biases['bconv4'])
# convnet = maxpool2d(convnet)

# convnet = tf.nn.relu(conv2d(convnet, weights['Wconv5']) + biases['bconv5'])
# convnet = maxpool2d(convnet)

convnet = tf.reshape(convnet, [-1, red*red*tout])
convnet = tf.nn.relu(tf.matmul(convnet, weights['Wfc']) + biases['bfc'])
convnet = tf.nn.dropout(convnet, keepRate)

convnet = tf.matmul(convnet, weights['Wout']) + biases['bout']
# convnet = tf.nn.sigmoid(tf.matmul(convnet, weights['Wout']) + biases['bout'])

In [11]:
prediction = convnet
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=prediction))
optimizer = tf.train.AdagradOptimizer(LR).minimize(loss)
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

print('Total Trainable Parameters: ', np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()]))

Total Trainable Parameters:  11132354


In [12]:
# trainX = np.array([i for i in train_data[:, 0]]).reshape([-1, 50, 50, 3])
# trainY = np.array([[i[0], i[1]] for i in train_data[:, 1]])
# t_trainX = trainX[:500]
# t_trainY = trainY[:500]

trainX = train_data_imgs
trainY = train_data_lbls
t_trainX = trainX[:500]
t_trainY = trainY[:500]

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(epochs):
        currentBatch = 0
        while currentBatch < len(train_data_imgs):
            batchX = trainX[currentBatch:currentBatch+batchSize]
            batchY = trainY[currentBatch:currentBatch+batchSize]
            currentBatch += batchSize
            sess.run([optimizer], feed_dict={x: batchX, y: batchY})
            
        l, a = sess.run([loss, accuracy], feed_dict={x: t_trainX, y: t_trainY})
        print('Epoch: ', epoch+1, '\nLoss: ', l, '\nAccuracy: ', a)
        print('\n'*2)

Epoch:  1 
Loss:  6070.5234 
Accuracy:  0.538



Epoch:  2 
Loss:  3034.4565 
Accuracy:  0.57



Epoch:  3 
Loss:  2393.024 
Accuracy:  0.514



Epoch:  4 
Loss:  875.74133 
Accuracy:  0.562



Epoch:  5 
Loss:  850.58673 
Accuracy:  0.542



Epoch:  6 
Loss:  899.8996 
Accuracy:  0.54



Epoch:  7 
Loss:  534.61475 
Accuracy:  0.536



Epoch:  8 
Loss:  477.21677 
Accuracy:  0.528



Epoch:  9 
Loss:  265.7188 
Accuracy:  0.536



Epoch:  10 
Loss:  294.0919 
Accuracy:  0.546



Epoch:  11 
Loss:  237.00366 
Accuracy:  0.526



Epoch:  12 
Loss:  381.629 
Accuracy:  0.528



Epoch:  13 
Loss:  805.1571 
Accuracy:  0.56



Epoch:  14 
Loss:  368.5914 
Accuracy:  0.516



Epoch:  15 
Loss:  218.85745 
Accuracy:  0.546



Epoch:  16 
Loss:  360.42358 
Accuracy:  0.53



Epoch:  17 
Loss:  374.85315 
Accuracy:  0.544



Epoch:  18 
Loss:  532.6025 
Accuracy:  0.502



Epoch:  19 
Loss:  367.7456 
Accuracy:  0.522



Epoch:  20 
Loss:  63.024696 
Accuracy:  0.552





In [13]:
# managed to increase accuracy just by decreasing the number of convolutional layers
# a bit too many were there when considering the dataset size and training time

# also consider checking out the sigmoid activation for output layer combined with mean log-loss loss function instead
# of no activation and softmax cross entropy loss

# further optimisations can be made by changing the model hyperparams and lr and batch_size of minibatch gradient descent
# furthermore maybe training can be optimised using the Adadelta optimizer