# Soft Attention PlacePulse

# TENSORFLOW 1

In [1]:
import tensorflow

ImportError: No module named 'tensorflow'

In [None]:
# Import modules
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.io
import os
import random
import skimage.transform
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical
#from vis.visualization import visualize_cam_with_losses

In [None]:
print(tf.version.VERSION)

In [None]:
print(tf.test.is_gpu_available())

In [None]:
# Configuration
IMG_SIZE = 224
MAX_LENGTH = 9
delta = [0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.45, 0.5]
delta_i = delta[-1]

PATH = os.path.abspath('.') + "/dataset/PlacePulse"
IMG_PATH = PATH + "/images/2011/"
LABEL_PATH = PATH + "/labels/"

In [None]:
def load_image(image_path, img_dims=[224, 224]):
    orig = load_img(image_path, target_size=img_dims)
    img = img_to_array(orig)
    #img = tf.keras.applications.vgg16.preprocess_input(img)
    #img = tf.keras.applications.imagenet_utils.preprocess_input(img)
    return img, image_path

In [None]:
def load_labels(delta_i, labels_path=LABEL_PATH+"features.csv"):
    data = pd.read_csv(labels_path)
    data = data.loc[:,["ID","y"]].copy()
    slen = len(data)
    val = round(delta_i*slen)
    data["class"] = data['y']
    data['class'].iloc[:val+1]=1
    data['class'].iloc[slen-val:] = 0
    return data

In [None]:
data = load_labels(delta_i)
print(data[data["ID"]==4340])

In [None]:
all_img = np.asarray([IMG_PATH+str(img)+".jpg" for img in data.loc[:,"ID"].values.copy()])
all_labels = data.loc[:,"class"].copy()
all_scores = data.loc[:,"y"].values.copy(), 
all_features = []

for img in all_img:
  features, _ = load_image(img, img_dims=[IMG_SIZE, IMG_SIZE])
  all_features.append(features)

all_features = np.array(all_features)
all_labels = tf.keras.utils.to_categorical(all_labels, num_classes=2)

In [None]:
slen = len(data)
val = round(delta_i*slen)
X_pos = all_features[:val+1]
X_neg = all_features[slen-val:]
y_pos = all_labels[:val+1]
y_neg = all_labels[slen-val:]
img_pos = all_img[:val+1]
img_neg = all_img[slen-val:]

In [None]:
from sklearn.model_selection import train_test_split
    
xtrain_pos, xtest_pos, ytrain_pos, ytest_pos = train_test_split(X_pos, y_pos, shuffle=True, test_size = 0.25, random_state=35)
xtrain_neg, xtest_neg, ytrain_neg, ytest_neg = train_test_split(X_neg, y_neg, shuffle=True, test_size = 0.25, random_state=35)

train_x = np.concatenate([xtrain_pos, xtrain_neg])/255.
test_x = np.concatenate([xtest_pos, xtest_neg])/255.

imgtrain_pos, imgtest_pos, ytrain_pos, ytest_pos = train_test_split(img_pos, y_pos, shuffle=True, test_size = 0.25, random_state=35)
imgtrain_neg, imgtest_neg, ytrain_neg, ytest_neg = train_test_split(img_neg, y_neg, shuffle=True, test_size = 0.25, random_state=35)

imgtrain_val = np.concatenate([imgtrain_pos, imgtrain_neg])
imgtest = np.concatenate([imgtest_pos, imgtest_neg])

train_y = np.float32(np.concatenate([ytrain_pos, ytrain_neg]))
test_y = np.float32(np.concatenate([ytest_pos, ytest_neg]))

print(train_x.shape, test_x.shape)
print(train_y.shape, test_y.shape)
print(imgtrain_val.shape, imgtest.shape)
print(np.max(train_x), np.min(train_x))

## Parameters

In [None]:
# Parameters 
img_size = train_x.shape[1]
img_flat_size = img_size * img_size

# If you want to train the model -> True, otherwise -> False
Is_train = True

# If you want to load saved model -> True, otherwise -> False 
Load_model = False

# Name of the save file
SAVE_PATH = 'saved_model'

# Numbers of sampling to test the code 
num_test_sample = 50

# labels: 0 - 9
num_label = 2

# Parameters for training
num_epoch = 60

learning_rate = 5e-4
epsilon = 1e-8

batch_size = 256

# Parameter for LSTM
lstm_size = 256
step_size = 4
flatten_size = img_size

gpu_fraction = 0.3

## Plotting Sample Image (Modified MNIST for Attention)

In [None]:
# Plotting example image
img = train_x[0, :, :, :]
print(img.shape)

#plt.imshow(img)#, cmap = 'gray')
#plt.show()
print('Label: ' + str(train_y[0,:]))
print('Shape: ' + str(img_size) + 'x' + str(img_size))

## Functions for Convolutional Network

In [None]:
# Initialize weights and bias 
def conv2d(x,w, stride):
	return tf.nn.conv2d(x,w,strides=[1, stride, stride, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

# Get Variables
def weight_variable(name, shape):
    return tf.get_variable(name,shape=shape, initializer=tf.contrib.layers.xavier_initializer())

def bias_variable(name, shape):
    return tf.get_variable(name,shape=shape, initializer=tf.contrib.layers.xavier_initializer())

## LSTM and Attention function

In [None]:
# Reset the graph
tf.reset_default_graph()

# LSTM function
def LSTM_cell(C_prev, h_prev, x_lstm, Wf, Wi, Wc, Wo, bf, bi, bc, bo):
    # C_prev: Cell state from lstm of previous time step (shape: [batch_size, lstm_size])
    # h_prev: output from lstm of previous time step (shape: [batch_size, lstm_size])
    # x_lstm: input of lstm (shape: [batch_size, data_flatten_size])

    input_concat = tf.concat([x_lstm, h_prev], 1)
    f = tf.sigmoid(tf.matmul(input_concat, Wf) + bf)
    i = tf.sigmoid(tf.matmul(input_concat, Wi) + bi)
    c = tf.tanh(tf.matmul(input_concat, Wc) + bc)
    o = tf.sigmoid(tf.matmul(input_concat, Wo) + bo)
    
    C_t = tf.multiply(f, C_prev) + tf.multiply(i, c) 
    h_t = tf.multiply(o, tf.tanh(C_t))
    
    return C_t, h_t # Cell state, Output

# Soft Attention function
def soft_attention(h_prev, a, Wa, Wh):
    # h_prev: output from lstm of previous time step (shape: [batch_size, lstm_size])
    # a: Result of CNN [batch_size, conv_size * conv_size, channel_size] 

    m_list = [tf.tanh(tf.matmul(a[i], Wa) + tf.matmul(h_prev, Wh)) for i in range(len(a))] 
    print("list", m_list[0].get_shape())
    m_concat = tf.concat([m_list[i] for i in range(len(a))], axis = 1)    
    print("concat", m_concat.get_shape())
    alpha = tf.nn.softmax(m_concat) 
    z_list = [tf.multiply(a[i], tf.slice(alpha, (0, i), (-1, 1))) for i in range(len(a))]
    z_stack = tf.stack(z_list, axis = 2)
    z = tf.reduce_sum(z_stack, axis = 2)

    return alpha, z
    

In [None]:
class SoftAttentionLayer(tf.keras.Model):
  def __init__(self, units):
    super(SoftAttentionLayer, self).__init__()
    self.W1 = tf.keras.layers.Dense(units, kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.contrib.layers.xavier_initializer())
    self.W2 = tf.keras.layers.Dense(units, kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.contrib.layers.xavier_initializer())
    self.V = tf.keras.layers.Dense(1, kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.contrib.layers.xavier_initializer())

  def call(self, features, hidden):
    # features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)

    # hidden shape == (batch_size, hidden_size)
    # hidden_with_time_axis shape == (batch_size, 1, hidden_size)
    hidden_with_time_axis = tf.expand_dims(hidden, 1)

    # score shape == (batch_size, 64, hidden_size)
    #print("feature:", features.shape)
    #print("hidden:", hidden_with_time_axis.shape)
    score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))

    # attention_weights shape == (batch_size, 64, 1)
    # you get 1 at the last axis because you are applying score to self.V
    attention_weights = tf.nn.softmax(self.V(score), axis=1)
    #attention_weights = tf.reshape(attention_weights, (-1, ))

    # context_vector shape after sum == (batch_size, hidden_size)
    context_vector = attention_weights * features
    context_vector = tf.reduce_sum(context_vector, axis=1)
    #print("context:", context_vector.shape)

    return attention_weights, context_vector,  #alpha, z

In [None]:
def reset_state(batch_size, units):
    return tf.zeros((batch_size, units), tf.float32)

## Network

In [None]:
# Network

# Input 
# x_image  = tf.placeholder(tf.float32, shape = [None, img_size, img_size, 3])
# y_target = tf.placeholder(tf.float32, shape=[None, num_label])

# conv_1 = tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), strides=(2,2),padding='same',use_bias=True, activation=tf.nn.relu, name="conv_1")
# conv_2 = tf.keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=(2,2),padding='same',use_bias=True, activation=tf.nn.relu, name="conv_2")
# conv_3 = tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), strides=(2,2),padding='same',use_bias=True, activation=tf.nn.relu, name="conv_3")

# x = conv_1(x_image)
# x = conv_2(x)
# x = conv_3(x)

# conv_size = x.get_shape()[1]
# filters_size = x.get_shape()[3]
# conv_flat = tf.reshape(x, [-1, conv_size*conv_size, filters_size])
# conv_unstack = tf.unstack(conv_flat, axis = 1) #1
# x_unstack = tf.stack(conv_unstack)

# attention = SoftAttentionLayer(lstm_size)
# LSTM_Cell = tf.keras.layers.LSTMCell(lstm_size)
# activation = tf.keras.layers.Dense(self._output_shape, activation="linear", name="activation")

# h = reset_state(batch_size, lstm_size)
# c = tf.zeros([rnn_batch_size, lstm_size], tf.float32)
# print("h", h.get_shape())
# print("c", h.get_shape())
# for i in range(step_size):
#     alpha, z = attention(x, c)
#     lstm_input = tf.concat([z, c], axis=-1)
#     h, [h, c] = LSTM_Cell(lstm_input, [h, c])

# output_conv = activation(h)

In [None]:
# Network

# Input 
x_image  = tf.placeholder(tf.float32, shape = [None, img_size, img_size, 3])
y_target = tf.placeholder(tf.float32, shape=[None, num_label])

# Convolution variables
# w_conv1 = weight_variable('W_conv1', [3, 3, 3, 64])
# b_conv1 = bias_variable('b_conv1', [64])
# w_conv2 = weight_variable('W_conv2', [3, 3, 64, 256])
# b_conv2 = bias_variable('b_conv2', [256])
# w_conv3 = weight_variable('W_conv3', [3, 3, 256, 512])
# b_conv3 = bias_variable('b_conv3', [512])

# conv1 = tf.nn.relu(conv2d(x_image, w_conv1, 2) + b_conv1)
# conv2 = tf.nn.relu(conv2d(conv1, w_conv2, 2) + b_conv2)
# conv3 = tf.nn.relu(conv2d(conv2, w_conv3, 2) + b_conv3)

# conv_size = conv3.get_shape()[1]
# conv_flat = tf.reshape(conv3, [-1, conv_size*conv_size, 512])
# conv_unstack = tf.unstack(conv_flat, axis = 1)

conv_1 = tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), strides=(2,2),padding='same',use_bias=True, activation="linear", name="conv_1", kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.contrib.layers.xavier_initializer())
conv_2 = tf.keras.layers.Conv2D(filters=256, kernel_size=(3,3), strides=(2,2),padding='same',use_bias=True, activation="linear", name="conv_2", kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.contrib.layers.xavier_initializer())
conv_3 = tf.keras.layers.Conv2D(filters=512, kernel_size=(3,3), strides=(2,2),padding='same',use_bias=True, activation="linear", name="conv_3", kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.contrib.layers.xavier_initializer())

x = tf.nn.relu(conv_1(x_image))
x = tf.nn.relu(conv_2(x))
x = tf.nn.relu(conv_3(x))

conv_size = x.get_shape()[1]
filters_size = x.get_shape()[3]
conv_flat = tf.reshape(x, [-1, conv_size*conv_size, filters_size])
conv_unstack = tf.unstack(conv_flat, axis = 1)

attention = SoftAttentionLayer(lstm_size)
LSTM_Cell = tf.keras.layers.LSTMCell(lstm_size)
activation = tf.keras.layers.Dense(num_label, activation="linear", name="activation", kernel_initializer=tf.contrib.layers.xavier_initializer(), bias_initializer=tf.contrib.layers.xavier_initializer())

#LSTM Variables
Wf = weight_variable('Wf', [512 + lstm_size, lstm_size])
Wi = weight_variable('Wi', [512 + lstm_size, lstm_size])
Wc = weight_variable('Wc', [512 + lstm_size, lstm_size])
Wo = weight_variable('Wo', [512 + lstm_size, lstm_size])

bf = bias_variable('bf', [lstm_size])
bi = bias_variable('bi', [lstm_size])
bc = bias_variable('bc', [lstm_size])
bo = bias_variable('bo', [lstm_size]) 

# Attention Variables
Wa = weight_variable('Wa', [512, 1])
Wh = weight_variable('Wh', [lstm_size, 1])

rnn_batch_size = tf.shape(x_image)[0]

# Initial lstm cell state and output 
rnn_state = tf.zeros([rnn_batch_size, lstm_size], tf.float32)
rnn_out = tf.zeros([rnn_batch_size, lstm_size], tf.float32)

#################################### Attention!!! ####################################
for i in range(step_size):
    alpha, z = soft_attention(rnn_out, conv_unstack, Wa, Wh)
    rnn_state, rnn_out = LSTM_cell(rnn_state, rnn_out, z, Wf, Wi, Wc, Wo, bf, bi, bc, bo)

######################################################################################

# Densely connect layer variables 
w_fc1 = weight_variable('w_fc1', [lstm_size, num_label])
b_fc1 = bias_variable('b_fc1', [num_label])

#output = tf.matmul(rnn_out, w_fc1)+b_fc1
output = activation(rnn_out)

In [None]:
# Training 
Loss = tf.nn.sigmoid_cross_entropy_with_logits(labels = y_target, logits = output)
Cost = tf.reduce_mean(Loss)
optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate, epsilon = epsilon).minimize(Cost)

correct_prediction = tf.equal(tf.argmax(y_target,1), tf.argmax(output,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
#print("x", conv_3.output)
print("conv", conv_flat.shape)
print("unstack", len(conv_unstack), conv_unstack[2].get_shape())
print("ünstack tensor", tf.stack(conv_unstack).get_shape())
print("state", rnn_state.get_shape())
print("Memory", rnn_out.get_shape())
print("alpha", alpha.get_shape())
print("z", z.get_shape())
print("output", output.get_shape())
#print("output", output_conv.get_shape())
#alpha, z = soft_attention(rnn_out, conv_unstack, Wa, Wh)

In [None]:
#from vis.visualization import visualize_cam_with_losses, visualize_cam
visualize_cam_with_losses(train_x[0,:,:,:], Cost, 1,conv_3)

## Create Session

In [None]:
# Create Session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = gpu_fraction

sess = tf.InteractiveSession(config=config)
sess.run(tf.global_variables_initializer())

## Save and Restore

In [None]:
# Load the file if the saved file exists
saver = tf.train.Saver()
if Load_model == True:
    checkpoint = tf.train.get_checkpoint_state(SAVE_PATH+"soft_net/")
    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess, checkpoint.model_checkpoint_path)
        print("Successfully loaded:", checkpoint.model_checkpoint_path)
    else:
        print("Could not find old network weights")

## Training

In [None]:
# Training

if Is_train == True:
    train_data_num = train_x.shape[0]
    test_data_num = test_x.shape[0]
    for i in range(num_epoch):
        # Making batches
        random_idx = np.arange(train_data_num)
        np.random.shuffle(random_idx)

        #batch_count = 1
    
        for j in range(0, train_data_num, batch_size):
            if j + batch_size < train_data_num:
                batch_index = [j, j + batch_size]

                batch_x_train = train_x[random_idx[batch_index[0]:batch_index[1]],:,:]
                batch_y_train = train_y[random_idx[batch_index[0]:batch_index[1]],:]
            else:
                batch_index = [j, j + train_data_num-1]

                batch_x_train = train_x[random_idx[batch_index[0]:batch_index[-1]],:,:]
                batch_y_train = train_y[random_idx[batch_index[0]:batch_index[-1]],:]

            # Make image as fractions for attention
            train_batch = np.reshape(batch_x_train, (batch_x_train.shape[0], img_size, img_size, 3))
            #validation_batch = np.reshape(validation_x, (validation_x.shape[0], img_size, img_size, 1))
            
            # Training
            optimizer.run(feed_dict = {x_image: train_batch, y_target: batch_y_train})
            cost = sess.run(Cost, feed_dict = {x_image: train_batch, y_target: batch_y_train})
            acc = sess.run(accuracy, feed_dict = {x_image: train_batch, y_target: batch_y_train})
            #val_acc = sess.run(accuracy, feed_dict = {x_image: validation_batch, y_target: validation_y})

            # Print Progress
            print("Epoch: " + str(i+1) + ' / ' + 
                  "Batch: " + str(j) + '/' + str(train_data_num) + ' / ' + 
                  "Cost: " + str(cost) + ' / ' + 
                  "Training Accuracy: " + str(acc))# + ' / ' + )
#                  "Validation Accuracy: " + str(val_acc))  
        if (i+1)%10==0:
            num_correct = 0
            save_path = os.path.join(SAVE_PATH, 'soft_net')
            if not os.path.isdir(save_path):
                os.makedirs(save_path)
            file_name = os.path.join(save_path, 'soft_net_' + str(i))

            #saver.save(sess, file_name)
            print('Model is saved!!!')

            print('Testing ...')
            idx = random.sample(range(test_x.shape[0]), test_data_num)
            for idx_sample in range(test_data_num):
                test_x_reshape = np.reshape(test_x, ([test_x.shape[0],img_size,img_size,3]))
                test_x_in = test_x_reshape[idx[idx_sample],:,:,:]
                output_ = sess.run(output,feed_dict = {x_image: [test_x_in], y_target: [test_y[idx[idx_sample],:]]})
                
                y_test_pred = np.argmax(output_[:])
                y_test_true = np.argmax(test_y[idx[idx_sample], :])
                if y_test_pred == y_test_true:
                    num_correct += 1.
                
            # Print Progress
            print("Testing Accuracy: " + str(num_correct/test_data_num))# + ' / ' + )

## Testing

In [None]:
# Sampling test indexes
idx = random.sample(range(test_x.shape[0]), num_test_sample)

# Initialize fraction of test images and heatmap
test_fraction = np.zeros([10, img_size, img_size, 3])
heat_map = np.zeros([num_test_sample, img_size, img_size])

num_correct = 0.
#import matplotlib.image as mpimg
import cv2

# Test for Sampling data
for idx_sample in range(num_test_sample):
    # Get alpha(weight of fractions) and output for sample test data
    test_x_reshape = np.reshape(test_x, ([test_x.shape[0],img_size,img_size,3]))
    #test_x_in = test_x_reshape[idx[idx_sample],:,:,:]
    test_x_in = test_x_reshape[idx_sample,:,:,:]
    #alpha_, output_ = sess.run([alpha, output],feed_dict = {x_image: [test_x_in], y_target: [test_y[idx[idx_sample],:]]})
    alpha_, output_ = sess.run([alpha, output],feed_dict = {x_image: [test_x_in], y_target: [test_y[idx_sample,:]]})
    alpha_size = int(np.sqrt(alpha_.shape[1]))
    alpha_reshape = np.reshape(alpha_, (alpha_size, alpha_size))
    alpha_resize = skimage.transform.pyramid_expand(alpha_reshape, upscale = 16, sigma=20)  
#     print(np.max(alpha_resize), np.min(alpha_resize))
#     print(np.max(test_x_in), np.min(test_x_in))
#     print(output_)
    #print(np.max(test_x), np.min(test_x))

    # Get labels for test samples
    y_test_pred = np.argmax(output_[:])
    y_test_true = np.argmax(test_y[idx_sample, :])
    
    # Draw subplot for each sample 
    f1, ax = plt.subplots(1,2)
    ax[0].imshow(alpha_resize, cmap='gray')
    ax[0].axis("off")
    ax[0].set_title('Attention Heatmap')
    ax[1].imshow(test_x_in)#, cmap='gray')
    ax[1].axis("off")
    ax[1].set_title('Prediction: ' + str(y_test_pred) + ' / ' + 'Label: ' + str(y_test_true))

    # Count correct
    if y_test_pred == y_test_true:
        num_correct += 1.
    
    #print(np.max(alpha_resize), np.min(alpha_resize))

# Show results 
#print(np.max(alpha_resize), np.min(alpha_resize))
plt.show()
print('Sample Accuracy: ' + str(num_correct / num_test_sample))

In [None]:
# Sampling test indexes
idx = random.sample(range(test_x.shape[0]), num_test_sample)

# Initialize fraction of test images and heatmap
test_fraction = np.zeros([10, img_size, img_size, 3])
heat_map = np.zeros([num_test_sample, img_size, img_size])

num_correct = 0.
#import matplotlib.image as mpimg
import cv2

# Test for Sampling data
for idx_sample in range(num_test_sample):
    # Get alpha(weight of fractions) and output for sample test data
    test_x_reshape = np.reshape(test_x, ([test_x.shape[0],img_size,img_size,3]))
    #test_x_in = test_x_reshape[idx[idx_sample],:,:,:]
    test_x_in = test_x_reshape[idx_sample,:,:,:]
    #alpha_, output_ = sess.run([alpha, output],feed_dict = {x_image: [test_x_in], y_target: [test_y[idx[idx_sample],:]]})
    conv_output = sess.run([conv3],feed_dict = {x_image: [test_x_in]})#, y_target: [test_y[idx_sample,:]]})
#     alpha_size = int(np.sqrt(alpha_.shape[1]))
#     alpha_reshape = np.reshape(alpha_, (alpha_size, alpha_size))
#     alpha_resize = skimage.transform.pyramid_expand(alpha_reshape, upscale = 16, sigma=20)  

In [None]:
# gradient for partial linearization. We only care about target visualization class. 
#y_c = tf.reduce_sum(tf.multiply(vgg.fc8, labels), axis=1)
#print('y_c:', y_c)
# Get last convolutional layer gradient for generating gradCAM visualization
target_conv_layer = conv_3
#target_conv_layer_grad = tf.gradients(y_c, target_conv_layer)[0]
images = tf.placeholder("float", [batch_size, img_size, img_size, 3])
# Guided backpropagtion back to input layer
gb_grad = tf.gradients(Cost, images)[0]