In [1]:
import tensorflow as tf
from tensorflow.python.ops.rnn_cell_impl import BasicLSTMCell
from tensorflow.contrib.legacy_seq2seq.python.ops.seq2seq import rnn_decoder
from tensorflow.python.ops.distributions.normal import Normal


def _weight_variable(shape):
    initial = tf.truncated_normal(shape=shape, stddev=0.01)
    return tf.Variable(initial)


def _bias_variable(shape):
    initial = tf.constant(0.0, shape=shape)
    return tf.Variable(initial)

def conv2d(x, w):
    # stride [1, x_movement, y_movement, 1]
    # Must have strides[0] = strides[3] = 1
    return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_3x3(x):
    # stride [1, x_movement, y_movement, 1]
    return tf.nn.max_pool(x, ksize=[1, 3, 3, 1], strides=[1, 3, 3, 1], padding='SAME')


def max_pool_2x3(x):
    # stride [1, x_movement, y_movement, 1]
    return tf.nn.max_pool(x, ksize=[1, 2, 3, 1], strides=[1, 2, 3, 1], padding='SAME')


def max_pool_1x1(x):
    # stride [1, x_movement, y_movement, 1]
    return tf.nn.max_pool(x, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1], padding='SAME')

def _log_likelihood(loc_means, locs, variance):
    loc_means = tf.stack(loc_means)  # [timesteps, batch_sz, loc_dim]
    locs = tf.stack(locs)
    gaussian = Normal(loc_means, variance)
    logll = gaussian._log_prob(x=locs)  # [timesteps, batch_sz, loc_dim]
    logll = tf.reduce_sum(logll, 2)
    return tf.transpose(logll)  # [batch_sz, timesteps]


class RetinaSensor(object):
    # one scale
    def __init__(self, img_size_width, img_size_height, patch_window_width, patch_window_height):
        self.img_size_width = img_size_width
        self.img_size_height = img_size_height
        self.patch_window_width = patch_window_width
        self.patch_window_height = patch_window_height

    def __call__(self, img_ph, loc):
        img = tf.reshape(img_ph, [
            tf.shape(img_ph)[0],
            self.img_size_width,
            self.img_size_height,
            1
        ])
        '''
        tf.image.extract_glimpse:
        If the windows only partially
        overlaps the inputs, the non overlapping areas will be filled with
        random noise.
        '''
        pth = tf.image.extract_glimpse(

            img, # input
            [self.patch_window_width, self.patch_window_height], # size
            loc) # offset
        # pth: [tf.shape(img_ph)[0], patch_window_width, patch_window_height, 1]

        return tf.reshape(pth, [tf.shape(loc)[0],
                                self.patch_window_width * self.patch_window_height])


class GlimpseNetwork(object):
    def __init__(self, img_size_width, img_size_height,
                 patch_window_width, patch_window_height,
                 loc_dim, g_size, l_size, output_size):
        self.retina_sensor = RetinaSensor(img_size_width, img_size_height,
                                          patch_window_width, patch_window_height)

        # layer 1
        self.g1_w = _weight_variable((patch_window_width * patch_window_height, g_size))
        self.g1_b = _bias_variable((g_size,))
        self.l1_w = _weight_variable((loc_dim, l_size))
        self.l1_b = _bias_variable((l_size,))
        # layer 2
        self.g2_w = _weight_variable((g_size, output_size))
        self.g2_b = _bias_variable((output_size,))
        self.l2_w = _weight_variable((l_size, output_size))
        self.l2_b = _bias_variable((output_size,))

    def __call__(self, imgs_ph, locs):
        pths = self.retina_sensor(imgs_ph, locs)

        g = tf.nn.xw_plus_b(tf.nn.relu(tf.nn.xw_plus_b(pths, self.g1_w, self.g1_b)),
                            self.g2_w, self.g2_b)
        l = tf.nn.xw_plus_b(tf.nn.relu(tf.nn.xw_plus_b(locs, self.l1_w, self.l1_b)),
                            self.l2_w, self.l2_b)

        return tf.nn.relu(g + l)


class LocationNetwork(object):
    def __init__(self, loc_dim, rnn_output_size, variance=0.22, is_sampling=False):
        self.loc_dim = loc_dim  # 2, (x,y)
        self.variance = variance
        self.w = _weight_variable((rnn_output_size, loc_dim))
        self.b = _bias_variable((loc_dim,))

        self.is_sampling = is_sampling

    def __call__(self, cell_output):
        mean = tf.nn.xw_plus_b(cell_output, self.w, self.b)
        mean = tf.clip_by_value(mean, -1., 1.)
        mean = tf.stop_gradient(mean)

        if self.is_sampling:
            loc = mean + tf.random_normal(
                (tf.shape(cell_output)[0], self.loc_dim),
                stddev=self.variance)
            loc = tf.clip_by_value(loc, -1., 1.)
        else:
            loc = mean
        loc = tf.stop_gradient(loc)
        return loc, mean

class CNN(object):
    def __init__(self, img_size_width, img_size_height,
                 CNN_patch_width, CNN_patch_height, CNN_patch_number):
        self.img_size_width = img_size_width
        self.img_size_height = img_size_height
        self.CNN_patch_width = CNN_patch_width
        self.CNN_patch_height = CNN_patch_height
        self.CNN_patch_number = CNN_patch_number

    def __call__(self, imgs_ph):
        imgs_ph = tf.reshape(imgs_ph, [-1, self.img_size_height, self.img_size_width, 1])
        W_conv1 = _weight_variable(
            [self.CNN_patch_width, self.CNN_patch_height, 1, self.CNN_patch_number]
        )
        # patch width x height, in size 1, out size patch_nb
        b_conv1 = _bias_variable([self.CNN_patch_number])
        h_conv1 = tf.nn.relu(conv2d(imgs_ph, W_conv1) + b_conv1)

        W_fc1 = _weight_variable([self.img_size_height * self.img_size_width
                                  * self.CNN_patch_number,
                                  self.img_size_height * self.img_size_width])
        b_fc1 = _bias_variable([self.img_size_height * self.img_size_width])
        h_pool2_flat = tf.reshape(h_conv1,
                                  [-1,
                                   self.img_size_height * self.img_size_width
                                   * self.CNN_patch_number]
                                  )
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        # return tf.reshape(h_fc1, [-1, self.img_size_height, self.img_size_width, 1])
        return h_fc1

class RecurrentAttentionModel(object):
    def __init__(self, img_size_width, img_size_height,
                 CNN_patch_width, CNN_patch_height, CNN_patch_number,
                 patch_window_width, patch_window_height, g_size, l_size, glimpse_output_size,
                 loc_dim, variance,
                 cell_size, num_glimpses, num_classes,
                 learning_rate, learning_rate_decay_factor, min_learning_rate, training_batch_num,
                 max_gradient_norm, last_lstm_size, n_time_window,
                 is_training=False):

        self.img_ph = tf.placeholder(tf.float32, [None, img_size_width * img_size_height])
        self.lbl_ph = tf.placeholder(tf.int64, [None])

        self.global_step = tf.Variable(0, trainable=False)
        # decayed_learning_rate = learning_rate * decay_rate ^ (global_step / training_batch_num)
        self.learning_rate = tf.maximum(tf.train.exponential_decay(
            learning_rate, self.global_step,
            training_batch_num, # batch number
            learning_rate_decay_factor,
            # If the argument staircase is True,
            # then global_step / decay_steps is an integer division
            # and the decayed learning rate follows a staircase function.
            staircase=True),
            min_learning_rate)

        cell = BasicLSTMCell(cell_size)

        with tf.variable_scope('CNN'):
            cnn_network = CNN(img_size_width, img_size_height,
                              CNN_patch_width, CNN_patch_height, CNN_patch_number)

        with tf.variable_scope('GlimpseNetwork'):
            glimpse_network = GlimpseNetwork(img_size_width,
                                             img_size_height,
                                             patch_window_width,
                                             patch_window_height,
                                             loc_dim,
                                             g_size,
                                             l_size,
                                             glimpse_output_size)
        with tf.variable_scope('LocationNetwork'):
            location_network = LocationNetwork(loc_dim=loc_dim,
                                               rnn_output_size=cell.output_size, # cell_size
                                               variance=variance,
                                               is_sampling=is_training)

        # Core Network
        self.img_ph = cnn_network(self.img_ph)
        batch_size = tf.shape(self.img_ph)[0]  # training_batch_size * M
        init_loc = tf.random_uniform((batch_size, loc_dim), minval=-1, maxval=1)
        # shape: (batch_size, loc_dim), range: [-1,1)
        init_state = cell.zero_state(batch_size, tf.float32)

        init_glimpse = glimpse_network(self.img_ph, init_loc)
        rnn_inputs = [init_glimpse]
        rnn_inputs.extend([0] * num_glimpses)

        self.locs, loc_means = [], []

        def loop_function(prev, _):
            loc, loc_mean = location_network(prev)
            self.locs.append(loc)
            loc_means.append(loc_mean)
            glimpse = glimpse_network(self.img_ph, loc)
            return glimpse

        rnn_outputs, _ = rnn_decoder(rnn_inputs, init_state, cell, loop_function=loop_function)

        # Time independent baselines
        with tf.variable_scope('Baseline'):
            baseline_w = _weight_variable((cell.output_size, 1))
            baseline_b = _bias_variable((1,))
        baselines = []
        for output in rnn_outputs[1:]:
            baseline = tf.nn.xw_plus_b(output, baseline_w, baseline_b)
            baseline = tf.squeeze(baseline)
            baselines.append(baseline)
        baselines = tf.stack(baselines)  # [timesteps, batch_sz]
        baselines = tf.transpose(baselines)  # [batch_sz, timesteps]

        # Classification. Take the last step only.
        rnn_last_output = rnn_outputs[-1]
        with tf.variable_scope('Classification'):
            logit_w = _weight_variable((cell.output_size, num_classes))
            logit_b = _bias_variable((num_classes,))
        logits = tf.nn.xw_plus_b(rnn_last_output, logit_w, logit_b)
        self.prediction = tf.argmax(logits, 1)
        self.softmax = tf.nn.softmax(logits)

        with tf.variable_scope('LSTM_Classification'):
            last_lstm_w_in = _weight_variable((cell.output_size, last_lstm_size))
            last_lstm_b_in = _bias_variable((last_lstm_size,))
            print('rnn_last_output',np.shape(rnn_last_output),tf.shape(rnn_last_output))
            print('last_lstm_w_in',np.shape(last_lstm_w_in),tf.shape(last_lstm_w_in))
            last_lstm_in = tf.matmul(rnn_last_output, last_lstm_w_in) + last_lstm_b_in
            print('last_lstm_w_in_1',np.shape(last_lstm_w_in),tf.shape(last_lstm_w_in))
            last_lstm_in = tf.reshape(last_lstm_in, [-1, n_time_window, last_lstm_size])
            print('last_lstm_w_in_2',np.shape(last_lstm_w_in),tf.shape(last_lstm_w_in))

            if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
                cell = tf.nn.rnn_cell.BasicLSTMCell(last_lstm_size, forget_bias=1.0, state_is_tuple=True)
            else:
                cell = tf.contrib.rnn.BasicLSTMCell(last_lstm_size)
            # lstm cell is divided into two parts (c_state, h_state)
            init_state_last_lstm = cell.zero_state(batch_size // n_time_window, dtype=tf.float32)
            lstm_outputs, final_state = tf.nn.dynamic_rnn(cell, last_lstm_in,
                                                     initial_state=init_state_last_lstm, time_major=False)
            last_lstm_w_out = _weight_variable((cell.output_size, num_classes))
            last_lstm_b_out = _bias_variable((num_classes,))

            if int((tf.__version__).split('.')[1]) < 12 and int((tf.__version__).split('.')[0]) < 1:
                lstm_outputs = tf.unpack(tf.transpose(lstm_outputs, [1, 0, 2]))  # states is the last outputs
            else:
                lstm_outputs = tf.unstack(tf.transpose(lstm_outputs, [1, 0, 2]))
            lstm_logits = tf.matmul(lstm_outputs[-1], last_lstm_w_out) + last_lstm_b_out
            lstm_logits = tf.reshape(tf.tile(lstm_logits, (1, n_time_window)), [-1, num_classes])
            self.lstm_prediction = tf.argmax(lstm_logits, 1)
            self.lstm_softmax = tf.nn.softmax(lstm_logits)



        if is_training:
            # classification loss
            self.cross_entropy = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.lbl_ph, logits=logits))
            self.lstm_cross_entropy = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.lbl_ph, logits=lstm_logits))
            # RL reward
            reward = tf.cast(tf.equal(self.prediction, self.lbl_ph), tf.float32)
            rewards = tf.expand_dims(reward, 1)  # [batch_sz, 1]
            rewards = tf.tile(rewards, (1, num_glimpses))  # [batch_sz, timesteps]
            advantages = rewards - tf.stop_gradient(baselines)
            self.advantage = tf.reduce_mean(advantages)
            logll = _log_likelihood(loc_means, self.locs, variance)
            logllratio = tf.reduce_mean(logll * advantages)
            self.reward = tf.reduce_mean(reward)
            # baseline loss
            self.baselines_mse = tf.reduce_mean(tf.square((rewards - baselines)))
            # hybrid loss
            self.loss = -logllratio + self.cross_entropy + self.baselines_mse + self.lstm_cross_entropy
            params = tf.trainable_variables()
            gradients = tf.gradients(self.loss, params)
            clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
            self.train_op = tf.train.AdamOptimizer(self.learning_rate).apply_gradients(
                zip(clipped_gradients, params), global_step=self.global_step)

        self.saver = tf.train.Saver(tf.global_variables())

In [2]:
def losses(logits, labels, name):
    with tf.variable_scope('loss') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits \
            (logits=logits, labels=labels, name='xentropy_per_example')
        loss = tf.reduce_mean(cross_entropy, name='loss')
        if name=='train':
            tf.summary.scalar(scope.name + '/train_loss', loss)
        if name=='valid':
            tf.summary.scalar(scope.name + '/valid_loss', loss)
    return loss
 
def trainning(loss, learning_rate):
    with tf.name_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate= learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op = optimizer.minimize(loss, global_step= global_step)
    return train_op
 
def evaluation(logits, labels, name):
    with tf.variable_scope('accuracy') as scope:
        correct = tf.nn.in_top_k(logits, labels, 1)
        correct = tf.cast(correct, tf.float16)
        accuracy = tf.reduce_mean(correct)
        if name=='train':
            tf.summary.scalar(scope.name + '/train_accuracy', accuracy)
        if name=='valid':
            tf.summary.scalar(scope.name + '/valid_accuracy', accuracy)
    return accuracy

def recall_precision(logits, labels, name):
    logits = tf.cast(logits, tf.int64)
    labels = tf.cast(labels, tf.int64)
    predict = tf.arg_max(logits,1)
    with tf.variable_scope('recall_precision') as scope:
        TP = tf.count_nonzero(predict * labels)
        TN = tf.count_nonzero((predict - 1) * (labels - 1))
        FN = tf.count_nonzero(predict * (labels - 1))
        FP = tf.count_nonzero((predict - 1) * labels)
        precision = tf.divide(TP, TP + FP)
        recall = tf.divide(TP, TP + FN)
        precision = tf.cast(precision, dtype=tf.float64)
        recall = tf.cast(recall, dtype=tf.float64)
        #f1 = 2 * precision * recall / (precision + recall)
        #f1 = tf.cast(f1, dtype=tf.float32)
        if name=='train':
            tf.summary.scalar(scope.name + '/train_precision', precision)
            tf.summary.scalar(scope.name + '/train_recall', recall)
        if name=='valid':
            tf.summary.scalar(scope.name + '/valid_precision', precision)
            tf.summary.scalar(scope.name + '/valid_recall', recall)
    return precision, recall

In [3]:
import os
 
def get_files(path_pos,path_neg,label_pos,label_neg):
    TC = []
    label_TC = []
    nonTC = []
    label_nonTC = []
    # data loader
    file_dir_TC=path_pos
    file_dir_nonTC=path_neg
    TC_list = os.listdir(file_dir_TC)
    nonTC_list = os.listdir(file_dir_nonTC)
    for file in TC_list[:len(nonTC_list)]:
        name = file.split('_')
        if name[0] == label_pos:
            TC.append(file_dir_TC + file)
            label_TC.append(1)
    for file in nonTC_list:
        name = file.split('_')
        if name[0] == label_neg:
            nonTC.append(file_dir_nonTC + file)
            label_nonTC.append(0)
    print("There are %d TC\nThere are %d nonTC" % (len(TC), len(nonTC)))
 
    # shuffle
    image_list = np.hstack((TC, nonTC))
    label_list = np.hstack((label_TC, label_nonTC))
    temp = np.array([image_list, label_list])
    temp = temp.transpose()    
    np.random.shuffle(temp)
 
    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(i) for i in label_list]
 
    return image_list, label_list
 
# img_list,label_list = get_files(file_dir)
 
# batch
def get_batch(image, label, image_W, image_H, batch_size, capacity):   
    image = tf.cast(image, tf.string)
    label = tf.cast(label, tf.int32)
 
    # queue
    input_queue = tf.train.slice_input_producer([image, label])
 
    image_contents = tf.read_file(input_queue[0])
    label = input_queue[1]
    image = tf.image.decode_jpeg(image_contents, channels=1)
 
    # resize
    image = tf.image.resize_images(image, [image_H, image_W], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
    image = tf.cast(image, tf.float32)
    # image = tf.image.per_image_standardization(image)  
    image_batch, label_batch = tf.train.batch([image, label],
                                              batch_size=batch_size,
                                              num_threads=64,  
                                              capacity=capacity)
  
    return tf.reshape(image_batch,[batch_size,image_W,image_H]), label_batch

In [None]:
##validation
import os
import numpy as np
import tensorflow as tf
tf.reset_default_graph()
learning_rate = 1e-3
learning_rate_decay_factor = 0.97
min_learning_rate = 1e-6
max_gradient_norm = 5.0
num_steps = 100000

CNN_patch_width = 5
CNN_patch_height = 5
CNN_patch_number = 32

patch_window_width = 32 # size of glimpse window size
patch_window_height = 32
g_size = 128 # Size of theta_g^0
l_size = 128 # Size of theta_g^1
glimpse_output_size = 220 # Output size of Glimpse Network
cell_size = 100 # Size of LSTM cell
num_glimpses = 30 # Number of glimpses
variance = 0.22 # Gaussian variance for Location Network
M = 20 # Monte Carlo sampling, see Eq(2)

n_time_window = 128
last_lstm_size = 32

batch_size = 128
class_num = 2

N_CLASSES = 2 
IMG_W = 64 # resize
IMG_H = 64
BATCH_SIZE = 128
CAPACITY = 20000
MAX_STEP = 10000000
training_batch_num = 10000000

train_dir = './train_attention_resnet_RAM/'
logs_train_dir = './train_attention_resnet_RAM/'

file_dir_TC='/home/ubuntu/data/TC/'
file_dir_nonTC='/home/ubuntu/data/nonTC/'
file_dir_valTC='/home/ubuntu/data/valTC_cp/'
file_dir_valnonTC='/home/ubuntu/data/valnonTC/'

train, train_label = get_files(file_dir_TC,file_dir_nonTC,'TC','nonTC')
valid, valid_label = get_files(file_dir_valTC,file_dir_valnonTC,'valTC','valnonTC')
train_batch_op,train_label_batch_op=get_batch(train,
                                train_label,
                                IMG_W,
                                IMG_H,
                                BATCH_SIZE,
                                CAPACITY)

valid_batch_op,valid_label_batch_op=get_batch(valid,
                                valid_label,
                                IMG_W,
                                IMG_H,
                                BATCH_SIZE,
                                CAPACITY)
x = tf.placeholder(tf.float32, [BATCH_SIZE,IMG_W,IMG_H,1])
y = tf.placeholder(tf.float32, [BATCH_SIZE])
y = tf.cast(y,tf.int64)

ram = RecurrentAttentionModel(img_size_width=64,
                              img_size_height = 64,
                              CNN_patch_width = CNN_patch_width,
                              CNN_patch_height = CNN_patch_height,
                              CNN_patch_number = CNN_patch_number,
                              patch_window_width = patch_window_width,
                              patch_window_height=patch_window_height,
                              g_size=g_size,
                              l_size=l_size,
                              glimpse_output_size=glimpse_output_size,
                              loc_dim=2,   # (x,y)
                              variance=variance,
                              cell_size=cell_size,
                              num_glimpses=num_glimpses,
                              num_classes=class_num,
                              learning_rate=learning_rate,
                              learning_rate_decay_factor=learning_rate_decay_factor,
                              min_learning_rate=min_learning_rate,
                              training_batch_num=training_batch_num,
                              max_gradient_norm=max_gradient_norm,
                              last_lstm_size=last_lstm_size,
                              n_time_window=n_time_window,
                              is_training=True)
output_feed = [ram.train_op, ram.loss,
                             ram.cross_entropy, ram.reward,
                             ram.advantage, ram.baselines_mse,
                             ram.learning_rate]
train_logits = ram.lstm_softmax 
train_loss = ram.loss
train_op = ram.train_op
train_acc = evaluation(ram.lstm_softmax , y, 'train')
train_recall, train_precision = recall_precision(ram.lstm_softmax , y, 'train')

valid_loss = losses(ram.lstm_softmax , y, 'valid')
valid_acc = evaluation(ram.lstm_softmax , y, 'valid')
valid_recall, valid_precision = recall_precision(ram.lstm_softmax , y, 'valid')

summary_op = tf.summary.merge_all() 
 
sess = tf.Session()

train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)

saver = tf.train.Saver()

sess.run(tf.global_variables_initializer())
 

coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
 
try:
    print('----------training start---------')
    train_result=[]
    valid_result=[]
    for step in np.arange(MAX_STEP):
        train_batch,train_label_batch = sess.run([train_batch_op,train_label_batch_op])
        train_batch = np.reshape(train_batch,[BATCH_SIZE,-1])
        if coord.should_stop():
                break
        _, tra_loss, cross_entropy, reward, advantage, baselines_mse, learning_rate = sess.run(output_feed,
                                                                                 feed_dict={
                                                                                      ram.img_ph: train_batch,
                                                                                      ram.lbl_ph: train_label_batch
                                                                                    })
       
        tra_acc, tra_recall, tra_precision= sess.run([train_acc, train_recall, train_precision],feed_dict={ram.img_ph: train_batch,
                                                                                                           ram.lbl_ph: train_label_batch, 
                                                                                                           y:train_label_batch})
        summary_str = sess.run(summary_op,feed_dict={ram.img_ph: train_batch, 
                                                     ram.lbl_ph: train_label_batch,
                                                     y:train_label_batch})
        summary = tf.Summary()
        summary.value.add(tag='tra_loss', simple_value=tra_loss)
        summary.value.add(tag='tra_acc', simple_value=tra_acc)
        summary.value.add(tag='tra_recall', simple_value=tra_recall)
        summary.value.add(tag='tra_precision', simple_value=tra_precision)
        train_writer.add_summary(summary, step)
        train_result.append([step, tra_loss, tra_acc*100.0, tra_recall*100.0, tra_precision*100.0])
        if step % 50 == 0:
            print('Step %d, train loss = %.2f, train accuracy = %.2f%%, train recall = %.2f%%, train precision = %.2f%%' %(step, tra_loss, tra_acc*100.0, tra_recall*100.0, tra_precision*100.0))
            
            
        if step % 500 == 0:
            ckpt=tf.train.get_checkpoint_state('./train_attention_resnet_RAM/')
#             print(ckpt)
            if ckpt and ckpt.all_model_checkpoint_paths:
                valid_batch,valid_label_batch = sess.run([valid_batch_op,valid_label_batch_op])
                valid_batch = np.reshape(valid_batch,[BATCH_SIZE,-1])
                saver.restore(sess,ckpt.model_checkpoint_path)
                val_loss, val_acc, val_recall, val_precision = sess.run([valid_loss, valid_acc, valid_recall, valid_precision], feed_dict={ram.img_ph: valid_batch, 
                                                                                                                                           ram.lbl_ph: valid_label_batch,
                                                                                                                                           y:valid_label_batch})
                valid_result.append([step, val_loss, val_acc*100.0, val_recall*100.0, val_precision*100.0])
                print('*********************')
                print('Step %d, valid loss = %.2f, valid accuracy = %.2f%%, valid recall = %.2f%%, valid precision = %.2f%%' %(step, val_loss, val_acc*100.0, val_recall*100.0, val_precision*100.0))
                print('*********************')
                summary = tf.Summary()
                summary.value.add(tag='val_loss', simple_value=val_loss)
                summary.value.add(tag='val_acc', simple_value=val_acc)
                summary.value.add(tag='val_recall', simple_value=val_recall)
                summary.value.add(tag='val_precision', simple_value=val_precision)
                train_writer.add_summary(summary, step)
#                 summary_str = sess.run(summary_op,feed_dict={x:valid_batch,y:valid_label_batch})
#                 train_writer.add_summary(summary_str, step)
            
      
        if step % 500 == 0 or (step + 1) == MAX_STEP:
            checkpoint_path = os.path.join(logs_train_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)

except tf.errors.OutOfRangeError:
    print('Done training -- epoch limit reached')

finally:
    coord.request_stop()

There are 1737956 TC
There are 1737956 nonTC
There are 57420 TC
There are 434488 nonTC
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(tuple(tensor_list)).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.range(limit).shuffle(limit).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.
Instructions for updating:

Step 1700, train loss = 1.63, train accuracy = 53.12%, train recall = 0.00%, train precision = nan%
Step 1750, train loss = 1.64, train accuracy = 52.34%, train recall = 0.00%, train precision = nan%
Step 1800, train loss = 1.65, train accuracy = 52.34%, train recall = 0.00%, train precision = nan%
Step 1850, train loss = 1.63, train accuracy = 50.00%, train recall = 0.00%, train precision = nan%
Step 1900, train loss = 1.62, train accuracy = 54.69%, train recall = 0.00%, train precision = nan%
Step 1950, train loss = 1.65, train accuracy = 44.53%, train recall = 0.00%, train precision = nan%
Step 2000, train loss = 1.63, train accuracy = 52.34%, train recall = 0.00%, train precision = nan%
INFO:tensorflow:Restoring parameters from ./train_attention_resnet_RAM/model.ckpt-1500
*********************
Step 2000, valid loss = 0.69, valid accuracy = 10.94%, valid recall = 0.00%, valid precision = nan%
*********************
Step 2050, train loss = 1.63, train accuracy = 50.00%, train recall =

Step 5050, train loss = 1.64, train accuracy = 46.88%, train recall = 0.00%, train precision = nan%
Step 5100, train loss = 1.63, train accuracy = 53.91%, train recall = 0.00%, train precision = nan%
Step 5150, train loss = 1.67, train accuracy = 56.25%, train recall = 0.00%, train precision = nan%
Step 5200, train loss = 1.62, train accuracy = 46.09%, train recall = 0.00%, train precision = nan%
Step 5250, train loss = 1.66, train accuracy = 63.28%, train recall = 0.00%, train precision = nan%
Step 5300, train loss = 1.65, train accuracy = 49.22%, train recall = 0.00%, train precision = nan%
Step 5350, train loss = 1.66, train accuracy = 46.09%, train recall = 0.00%, train precision = nan%
Step 5400, train loss = 1.62, train accuracy = 48.44%, train recall = 0.00%, train precision = nan%
Step 5450, train loss = 1.64, train accuracy = 45.31%, train recall = 0.00%, train precision = nan%
Step 5500, train loss = 1.65, train accuracy = 49.22%, train recall = 0.00%, train precision = nan%


Step 8500, train loss = 1.63, train accuracy = 53.12%, train recall = 0.00%, train precision = nan%
INFO:tensorflow:Restoring parameters from ./train_attention_resnet_RAM/model.ckpt-8000
*********************
Step 8500, valid loss = 0.69, valid accuracy = 13.28%, valid recall = 0.00%, valid precision = nan%
*********************
Step 8550, train loss = 1.64, train accuracy = 53.91%, train recall = 0.00%, train precision = nan%
Step 8600, train loss = 1.64, train accuracy = 50.78%, train recall = 0.00%, train precision = nan%
Step 8650, train loss = 1.65, train accuracy = 55.47%, train recall = 0.00%, train precision = nan%
Step 8700, train loss = 1.62, train accuracy = 46.09%, train recall = 0.00%, train precision = nan%
Step 8750, train loss = 1.62, train accuracy = 46.09%, train recall = 0.00%, train precision = nan%
