In [1]:
import pandas as pd
import numpy as np
import cv2
import random, os, sys, math, json
from datetime import datetime, timedelta
import time
from sklearn import metrics
# transfer learning using tf.slim, easy to load the original model structure and checkpoint
# https://github.com/tensorflow/models/tree/master/research/slim
import tensorflow as tf
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets as nets

In [2]:
notebookname = 'Assignment_4'

# Generate 7 random char as instance running id
run_id = random.sample('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', 7)
run_id = "".join(x for x in run_id)
# attampt_name is the name of the model
attampt_name = 'Assignment_4_1_' + run_id

# Path and Dir configration
rootDir=''
csvDataPath = os.path.join(rootDir, 'data_csv/')
imageDataPath = os.path.join(rootDir,'data_images/')

# Paramaters for model
BATCH_SIZE = 20
IMG_WIDTH = 224
IMG_HEIGHT = 224
NUM_CLASS = 7
lesion_type_list = ['nv', 'mel', 'bkl', 'bcc', 'akiec', 'vasc', 'df']

In [3]:
# Load csv data
df_train_ori = pd.read_csv(os.path.join(csvDataPath, 'train.csv'))
df_test_all = pd.read_csv(os.path.join(csvDataPath, 'test.csv'))

# The number of last 3 types of images is still less (even after data augmentation), 
# replicate last 3 types of images to make training dataset more balanced
df_lastTwo = df_train_ori.loc[df_train_ori['dx'].isin(['df', 'vasc'])].reset_index(drop=True)
df_lastThree = df_train_ori.loc[df_train_ori['dx'].isin(['akiec'])].reset_index(drop=True)

In [4]:
df_train_ori = df_train_ori.sample(frac=1).reset_index(drop=True).loc[:3000]
df_test_all = df_test_all.sample(frac=1).reset_index(drop=True)

In [5]:
# Helper functions for input data
def get_Labels(labels):
    """
    ARG:
        labels: list of actual label for a batch input
    RETURN:
        labelList: list of one-hot encoding labels
    """
    labelList = np.zeros((len(labels),7))
    for i in range(len(labels)):
        labIdx = lesion_type_list.index(labels[i])
        labelList[i][labIdx] = 1
    return labelList

# Get Next Batch
def next_batch(indx1, indx2, csv_data):
    """
    ARG:
        indx1: start index for batch
        indx2: end index for batch
        csv_data: the dataframe of the all image information
        ifTrain: True or False for if doing training process
    RETURN:
        imgPaths: list of path for batch images
        imgLabels: list of labels for batch images
        augFlags: list of augFlag for batch images
    DESCRIPTION:
        Get all information for one batch
    """
    imgPaths = np.array(csv_data.path[indx1:indx2])
    imgLabels = get_Labels(list(csv_data.dx[indx1:indx2]))
    return imgPaths, imgLabels

In [6]:
# Model Input Creation
tf.reset_default_graph()

x_in = tf.placeholder(tf.string, shape=(None,), name='img_paths')
y_label = tf.placeholder(tf.int32, shape=(None, NUM_CLASS), name='labels')


def load_image(input_elems):
    """
    ARG:
        input_elems: one image path
    RETURN:
        image_result: the real image that is going to be passed into madel
    """
    
    image_file = input_elems
    
    image = tf.read_file(image_file)
    image = tf.image.decode_jpeg(image, channels=3)
    input_image = tf.cast(image, tf.float32)
    # resizing to 224 x 224 x 3
    image_resized = tf.image.resize_images(input_image, [IMG_HEIGHT, IMG_WIDTH], align_corners=True, 
                                           method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    return image_resized

train_dataset = tf.map_fn(load_image, x_in, dtype=(tf.float32))
image_inputs = tf.identity(train_dataset, name='new_inputs')

In [7]:
# Helper Function for creating layers
def create_weights(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.05))
 
def create_biases(size):
    return tf.Variable(tf.constant(0.05, shape=[size]))

def create_convolutional_layer(input, num_input_channels, conv_filter_size, num_filters):  
    
    ## We shall define the weights that will be trained using create_weights function.
    weights = create_weights(shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters])
    ## We create biases using the create_biases function. These are also trained.
    biases = create_biases(num_filters)
    ## Creating the convolutional layer
    layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')
    layer += biases
    return layer

def create_flatten_layer(layer):
    layer_shape = layer.get_shape()
    num_features = layer_shape[1:4].num_elements()
    layer = tf.reshape(layer, [-1, num_features])
    return layer

def create_fc_layer(input, num_inputs, num_outputs, use_relu=True):
    
    #Let's define trainable weights and biases.
    weights = create_weights(shape=[num_inputs, num_outputs])
    biases = create_biases(num_outputs)
    layer = tf.matmul(input, weights) + biases
    if use_relu:
        layer = tf.nn.relu(layer)
    return layer

### Change the activation function from tanh to relu

In [8]:
# Create CNN Model
layer_conv1 = create_convolutional_layer(input=image_inputs, num_input_channels=3, conv_filter_size=7, num_filters=32)
layer_conv1 = tf.nn.max_pool(value=layer_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
layer_conv1 = tf.nn.relu(layer_conv1)

layer_conv2 = create_convolutional_layer(input=layer_conv1, num_input_channels=32, conv_filter_size=9, num_filters=64)
layer_conv2 = tf.nn.max_pool(value=layer_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
layer_conv2 = tf.nn.relu(layer_conv2)

layer_conv3 = create_convolutional_layer(input=layer_conv2, num_input_channels=64, conv_filter_size=11, num_filters=128)
layer_conv3 = tf.nn.max_pool(value=layer_conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
layer_conv3 = tf.nn.relu(layer_conv3)
          
layer_flat = create_flatten_layer(layer_conv3)
 
layer_fc = create_fc_layer(input=layer_flat, num_inputs=layer_flat.get_shape()[1:4].num_elements(), num_outputs=128)
 
y_pred_logit = create_fc_layer(input=layer_fc, num_inputs=128, num_outputs=NUM_CLASS, use_relu=False)


In [9]:
# output layer
with tf.variable_scope('MyPrediction'):
    y_pred_softmax = tf.nn.softmax(y_pred_logit, name='pred_softmax')
    y_pred_cls = tf.argmax(y_pred_softmax, axis=1, name='pred_class')

# Performance Measures 
y_label_cls = tf.cast(tf.argmax(y_label, axis=1), tf.int32, name='y_label_cls')
correct_prediction = tf.equal(tf.cast(y_pred_cls, tf.int32), y_label_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='batch_accuracy')

In [10]:
# Cost-function
mse_value = tf.losses.mean_squared_error(predictions=y_pred_logit, labels=y_label)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred_logit, labels=y_label)
loss_1 = tf.reduce_mean(cross_entropy, name='loss_1')
loss_2 = tf.reduce_mean(mse_value, name='loss_2')

# Optimization method
myOptimizer_1 = tf.train.MomentumOptimizer(learning_rate=1e-4, momentum=0.9)
myOptimizer_2 = tf.train.AdamOptimizer(learning_rate=1e-4)
train_op_1 = myOptimizer_1.minimize(loss_1, name='train_op_1')
train_op_2 = myOptimizer_2.minimize(loss_2, name='train_op_2')

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [11]:
# Session creation and variables initialization
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [12]:
def train_result():
    """
    RETURN:
        loss_over_all: the loss_1 for entire train data in model prediction
        acc: the accuracy for entire train data in model prediction
        cm: the confusion matrix for entire train data in model prediction
    """
    num_train = len(df_train_ori)
    train_label_cls = np.zeros(shape=num_train, dtype=np.int)
    train_pred_cls = np.zeros(shape=num_train, dtype=np.int)
    train_pred_prob = np.zeros(shape=[num_train,7])
    
    idx = 0
    num_of_steps = 0
    loss_value_all = 0.00
    train_accuracy_all = 0.00
    
    while idx < num_train:
        kidx = min(idx + BATCH_SIZE, num_train)
        x_batch, y_true_batch = next_batch(idx, kidx, df_train_ori)
        
        loss_batch = sess.run(loss_1, feed_dict = {x_in:x_batch, y_label:y_true_batch})
        acc_batch = sess.run(accuracy, feed_dict = {x_in:x_batch, y_label:y_true_batch})
        
        train_label_cls[idx:kidx] = sess.run(y_label_cls, feed_dict={y_label:y_true_batch})
        train_pred_cls[idx:kidx] = sess.run(y_pred_cls, feed_dict={x_in:x_batch})
        train_pred_prob[idx:kidx] = sess.run(y_pred_softmax, feed_dict={x_in:x_batch})
        
        loss_value_all += loss_batch
        train_accuracy_all += acc_batch
        idx = kidx
        num_of_steps += 1
        
    loss_over_all = loss_value_all/num_of_steps
    acc = train_accuracy_all/num_of_steps
    cm = metrics.confusion_matrix(y_true=train_label_cls, y_pred=train_pred_cls)
    
    return loss_over_all, acc, cm

In [13]:
# Test the model, Showing the performance
def print_test_result():
    """
    RETURN:
        loss_over_all: the loss_1 for entire test data in model prediction
        acc: the accuracy for entire test data in model prediction
        cm: the confusion matrix for entire test data in model prediction
    """
    num_test = len(df_test_all)
    test_label_cls = np.zeros(shape=num_test, dtype=np.int)
    test_pred_cls = np.zeros(shape=num_test, dtype=np.int)
    test_pred_prob = np.zeros(shape=[num_test,7])
    
    idx = 0
    num_of_steps = 0
    loss_value_all = 0.00
    
    while idx < num_test:
        
        kidx = min(idx + BATCH_SIZE, num_test)
        x_batch, y_true_batch = next_batch(idx, kidx, df_test_all)
        
        loss_batch = sess.run(loss_1, feed_dict = {x_in:x_batch, y_label:y_true_batch})
        acc_batch = sess.run(accuracy, feed_dict = {x_in:x_batch, y_label:y_true_batch})
        
        test_label_cls[idx:kidx] = sess.run(y_label_cls, feed_dict={y_label:y_true_batch})
        test_pred_cls[idx:kidx] = sess.run(y_pred_cls, feed_dict={x_in:x_batch})
        test_pred_prob[idx:kidx] = sess.run(y_pred_softmax, feed_dict={x_in:x_batch})
        
        loss_value_all += loss_batch
        idx = kidx
        num_of_steps += 1
        
    loss_over_all = loss_value_all/num_of_steps
    correct = (test_label_cls == np.array(test_pred_cls))
    correct_sum = np.array(correct).sum()
    acc = float(correct_sum) / num_test

    cm = metrics.confusion_matrix(y_true=test_label_cls, y_pred=test_pred_cls)
    
    # Print the accuracy.
    msg = "Accuracy on Test-Set: {0:.1%} ({1} / {2}),  loss: {3:>5.5}"
    msg_format = msg.format(acc, correct_sum, num_test, loss_over_all)
    message_all = msg_format + '\n' + str(cm)

    print(message_all)
    print('-------------------------------------------------------------------------')


In [14]:
# write to file python3
def print_to_write(file_name, msg_content, do):
    """
    ARG:
        file_name: name of report file
        msg_content: message that is about to write into report file
        do: 'a' or 'w+', appending or writing
    DESCRIPTION:
        Write message into report file
    """
    logFile = open('report/assignment4/' + file_name + '.log', do)
    print(msg_content, file=logFile)
    print('-------------------------------------------------------------------------', file=logFile)
    logFile.close()

In [15]:
def myOptimizing():
    """
    DESCRIPTION:
        Entire training and testing optimization, print and save the result including model and performance
    """
    start_time = time.time()
    ############################################## Train for 10 Epochs ##############################################
    for e in range(1,11):
        start_time_e = time.time()

        #shuffle order of data
        train_csv_shuffle = df_train_ori.sample(frac=1).reset_index(drop=True)
        num_train = len(train_csv_shuffle)
        idx = 0
        
        while idx < num_train:
            kidx = min(idx + BATCH_SIZE, num_train)
            x_batch, y_true_batch = next_batch(idx, kidx, train_csv_shuffle)
            sess.run(train_op_1, feed_dict = {x_in:x_batch, y_label:y_true_batch})
            idx = kidx
        
        loss_over_all, acc, cm = train_result()
        msg = 'Epochs:{0:>3},  Training Accuracy: {1:>6.1%},  loss: {2:>5.5}'
        msg_format = msg.format(e, acc, loss_over_all)
        message_all = msg_format + '\n' + str(cm)

        # print_to_write(attampt_name, message_all, 'a')
        print(message_all)
        print('-------------------------------------------------------------------------')
        print_test_result()
        
        end_time_e = time.time()
        time_dif_e = end_time_e - start_time_e
        e_time = "Time usage: " + str(timedelta(seconds=int(round(time_dif_e))))
        # print_to_write(attampt_name, e_time, 'a')
        print(e_time)
        print('-------------------------------------------------------------------------')
    
    # Total Time Usage
    end_time = time.time()
    time_dif = end_time - start_time
    total_time = "Total Trainning Time usage: " + str(timedelta(seconds=int(round(time_dif))))
    
    # print_to_write(attampt_name, total_time, 'a')
    print(total_time)

In [16]:
if __name__ == "__main__":
    
    # print_to_write(attampt_name, attampt_name, 'w+')
    # print_to_write(attampt_name, '--------------------------- Starting Training ---------------------------', 'a')
    print(attampt_name)
    print('--------------------------- Starting Training ---------------------------')
    print_test_result()
    myOptimizing()
    print('------------------------------- Complete --------------------------------')
    sess.close()

Assignment_4_1_npBuz0D
--------------------------- Starting Training ---------------------------
Accuracy on Test-Set: 12.9% (9 / 70),  loss: 866.88
[[0 1 0 2 7 0 0]
 [0 5 0 2 3 0 0]
 [0 5 0 0 5 0 0]
 [0 7 0 0 3 0 0]
 [0 6 0 0 4 0 0]
 [0 3 0 0 7 0 0]
 [0 7 0 0 3 0 0]]
-------------------------------------------------------------------------
Epochs:  1,  Training Accuracy:  14.3%,  loss:   nan
[[40  0  0  0  0  0  0]
 [40  0  0  0  0  0  0]
 [40  0  0  0  0  0  0]
 [40  0  0  0  0  0  0]
 [40  0  0  0  0  0  0]
 [40  0  0  0  0  0  0]
 [40  0  0  0  0  0  0]]
-------------------------------------------------------------------------
Accuracy on Test-Set: 14.3% (10 / 70),  loss:   nan
[[10  0  0  0  0  0  0]
 [10  0  0  0  0  0  0]
 [10  0  0  0  0  0  0]
 [10  0  0  0  0  0  0]
 [10  0  0  0  0  0  0]
 [10  0  0  0  0  0  0]
 [10  0  0  0  0  0  0]]
-------------------------------------------------------------------------
Time usage: 0:06:18
----------------------------------------------

#### Change the activation function. How does it effect the accuracy?
The accuracy was not effected so much by changing the activation function, however, the loss changed to a larger number by changing the activation function.

#### How does it effect how quickly the network plateaus?
The totoal training time of the previous one is 1:02:41 for 10 epochs, the new one is 1:05:40. It is not effected so much by changing the activation function.