In [1]:
from PIL import ImageFilter, ImageStat, Image, ImageDraw
from multiprocessing import Pool, cpu_count
import pandas as pd
import numpy as np
import cv2
import glob
import os
import time
import tensorflow as tf
from keras.utils import np_utils

import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
# resize the image to 64 x 64 x 3
def get_im_cv2(filepath):
    im = cv2.imread(filepath)
    im = cv2.resize(im, (64, 64), cv2.INTER_LINEAR)
    return im

# load trainin data

In [3]:
data_dir = 'C://Users//p0ng5//Desktop//data//cervical_cancer'

def load_train():
    start = time.time()
    X_train = []
    X_id = []
    y_train = []

    train_folders = ['Type_1', 'Type_2', 'Type_3']

    for fld in train_folders:
        path = os.path.join(data_dir, 'train', fld, '*.jpg')
        for file in glob.glob(path):
            img = get_im_cv2(file)
            X_train.append(img)
            X_id.append(os.path.basename(file))
            y_train.append(train_folders.index(fld))
    print('Read train data time: {} seconds'.format(round(time.time() - start, 2)))
    return X_train, X_id, y_train


def load_test():
    X_test = []
    X_test_id = []
    start = time.time()
    path = os.path.join(data_dir, 'test', '*.jpg')
    for file in sorted(glob.glob(path)):
        img = get_im_cv2(file)
        X_test.append(img)
        X_test_id.append(os.path.basename(file))
    print("Read test data time: {} seconds".format(round(time.time() - start, 2)))
    return X_test, X_test_id

In [18]:
# normalize data
def read_and_normalize_train_data():
    
    train_data, train_id, train_target = load_train()
    train_data = np.array(train_data, dtype=np.float32)
    train_target = np.array(train_target, dtype=np.float32)
    
    train_data /= 255
    train_target = np_utils.to_categorical(train_target, 3)
    print("Train shape: {}".format(train_data.shape))
    print("Target shape: []".format(train_target.shape))
    return train_data, train_target, train_id

def read_and_normalize_test_data():
    
    test_data, test_id = load_test()
    test_data = np.array(test_data, dtype=np.float32)
    
    test_data /= 255
    print("Test shape: {}".format(test_data.shape))
    
    return test_data, test_id


def get_batch(X, batch_size, random_state=None):
    n_samples = len(X)
    n_batches = int(np.ceil(n_samples / batch_size))
    
    np.random.seed(random_state)
    rand_idx = np.random.permutation(range(n_samples))
    for i in range(n_batches):
        yield rand_idx[i*batch_size: min((i+1)*batch_size, n_samples)]
        
        
# helper function for layer creation
def conv2d(X, W, b, strides=1):
    '''relu(X*W + b)'''
    X = tf.nn.conv2d(X, W, strides=[1, strides, strides, 1], padding='SAME')
    X = tf.nn.bias_add(X, b)
    return tf.nn.relu(X)
    
def maxpool2d(X, stride=2):
    return tf.nn.max_pool(X, ksize=(1, stride, stride, 1), padding='VALID',
                          strides=(1, stride, stride, 1))

# init parameters
learning_rate = 0.01
epochs = 200
batch_size = 64
in_H = 64
in_W = 64
in_channel = 3
filter_1 = 64
filter_2 = 128
filter_3 = 256
filter_H = 3
filter_W = 3

fc_flat = (in_H//8) * (in_W//8) * filter_3
hidden_1 = 512
hidden_2 = 512
n_classes = 3

p_dropout = .5



def make_model(X, weights, biases, dropout):
    
    # conv1
    X = conv2d(X, weights['wc1'], biases['bc1'])
    X = maxpool2d(X)
    
    # conv2
    X = conv2d(X, weights['wc2'], biases['bc2'])
    X = maxpool2d(X)
    
    # conv3
    X = conv2d(X, weights['wc3'], biases['bc3'])
    X = maxpool2d(X)
    
    # FC1
    X = tf.reshape(X, shape=[-1, fc_flat])
    X = tf.add(tf.matmul(X, weights['wd1']), biases['bd1'])
    X = tf.nn.relu(X)
    
    # add dropout
    X = tf.nn.dropout(X, dropout)
    
    # FC2
    X = tf.add(tf.matmul(X, weights['wd2']), biases['bd2'])
    X = tf.nn.relu(X)
    X = tf.nn.dropout(X, dropout)
    
    # output
    out = tf.add(tf.matmul(X, weights['out']), biases['out'])
    
    return out

In [5]:
X_train, Y_train, train_id = read_and_normalize_train_data()
X_test, test_id = read_and_normalize_test_data()

Read train data time: 303.56 seconds
Train shape: (1481, 64, 64, 3)
Target shape: []
Read test data time: 106.89 seconds
Test shape: (512, 64, 64, 3)


In [22]:
tf.reset_default_graph()

# init variable
X = tf.placeholder(dtype=tf.float32, shape=(None, in_H, in_W, in_channel))
Y = tf.placeholder(dtype=tf.float32, shape=(None, n_classes))
dropout = tf.placeholder(tf.float32)

# init weights & biases
weights = {
    'wc1': tf.Variable(tf.random_normal([filter_H, filter_W, in_channel, filter_1])),
    'wc2': tf.Variable(tf.random_normal([filter_H, filter_W, filter_1, filter_2])),
    'wc3': tf.Variable(tf.random_normal([filter_H, filter_W, filter_2, filter_3])),
    'wd1': tf.Variable(tf.random_normal([fc_flat, hidden_1])),
    'wd2': tf.Variable(tf.random_normal([hidden_1, hidden_2])),
    'out': tf.Variable(tf.random_normal([hidden_2, n_classes]))
}

biases = {
    'bc1': tf.Variable(tf.zeros([filter_1])),
    'bc2': tf.Variable(tf.zeros([filter_2])),
    'bc3': tf.Variable(tf.zeros([filter_3])),
    'bd1': tf.Variable(tf.zeros([hidden_1])),
    'bd2': tf.Variable(tf.zeros([hidden_2])),
    'out': tf.Variable(tf.zeros([n_classes]))
}

pred = make_model(X, weights, biases, dropout)

# cost & optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

# accuracy
correct = tf.equal(tf.argmax(pred, axis=1), tf.argmax(Y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct, dtype=tf.float32))

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    
    for i in range(epochs):
        
        for idx in get_batch(X_train, batch_size, random_state=i):
            X_batch, Y_batch = X_train[idx], Y_train[idx]
            sess.run(optimizer, feed_dict={X:X_batch, Y:Y_batch, dropout:p_dropout})
            
        if (i%10==0) or (i+1 == epochs):
            acc, loss = sess.run([accuracy, cost], feed_dict={X:X_train, Y:Y_train, dropout:1.0})
            print("Epoch {}: cost: {:.4f} accuracy: {:.4f}".format(i+1, loss, acc))

Epoch 1: cost: 513346.6250 accuracy: 0.3862
Epoch 11: cost: 50.7139 accuracy: 0.5348
Epoch 21: cost: 0.9943 accuracy: 0.5307
Epoch 31: cost: 0.9965 accuracy: 0.5294
Epoch 41: cost: 2.5273 accuracy: 0.5287
Epoch 51: cost: 1.4421 accuracy: 0.5280
Epoch 61: cost: 0.9977 accuracy: 0.5287
Epoch 71: cost: 0.9978 accuracy: 0.5287
Epoch 81: cost: 0.9977 accuracy: 0.5287
Epoch 91: cost: 0.9986 accuracy: 0.5280
Epoch 101: cost: 0.9994 accuracy: 0.5280
Epoch 111: cost: 0.9985 accuracy: 0.5280
Epoch 121: cost: 0.9998 accuracy: 0.5273
Epoch 131: cost: 0.9998 accuracy: 0.5273
Epoch 141: cost: 0.9998 accuracy: 0.5273
Epoch 151: cost: 0.9998 accuracy: 0.5273
Epoch 161: cost: 0.9997 accuracy: 0.5273
Epoch 171: cost: 0.9998 accuracy: 0.5273
Epoch 181: cost: 0.9998 accuracy: 0.5273
Epoch 191: cost: 0.9998 accuracy: 0.5273
Epoch 200: cost: 0.9998 accuracy: 0.5273


In [23]:
from tensorflow.contrib.slim import nets