In [1]:
import tensorflow as tf
import tensorflow.contrib.layers as layers
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import tensorflow.contrib.slim as slim
import os
from sklearn.preprocessing import scale

In [2]:
num_epochs = 25
mb_size = 48
batch_size = 48
X_dim = [224,224,3]
z_dim = 300
h_dim = 128
lr = 1e-3
d_steps = 3
n_class = 2
nz = 300 # z dim
cross_entropy_term = 0.5

In [3]:
from dataset import load_ddsm_data
ddsm_path = '/dfs/scratch0/annhe/tanda_750_90_10_split/'
labels_fl = 'mass_to_label.json'
labels_path = os.path.join(ddsm_path,labels_fl)

X_train, Y_train, X_valid, Y_valid, X_test, Y_test = load_ddsm_data(data_dir=ddsm_path, \
    label_json=ddsm_path+'/'+'mass_to_label.json', validation_set=True, segmentations=False, as_float=True, channels=3)
X_train = X_train[0:1104,:]
Y_train = Y_train[0:1104,:]
print X_train.shape

IN FUNCTION LOAD DDSM DATA
YO YO YO
LEN OF MASK DIR 1592
loading in 1108 images...
loading in 123 images...
loading in 123 images...
(1104, 224, 224, 3)


In [4]:
# mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
# x_train = mnist.train.images[:50,:]
# x_train = x_train.reshape([50,28,28,1])
# #randomNum = random.randint(0,25)
# image = x_train[0]
# plt.imshow(image[:,:,0], cmap=plt.get_cmap('gray_r'))
# plt.show()

In [5]:
def plot(samples):
    #plt.figure(figsize=(800/my_dpi, 800/my_dpi), dpi=my_dpi)
    fig = plt.figure(figsize=(200, 200))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)
    #print len(samples)
    for i, sample in enumerate(samples):
        #print i
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        sample_one_layer = sample[:,:,0]
        #print sample_one_layer.shape
        plt.imshow(sample_one_layer.reshape(224, 224), cmap='Greys_r')

    return fig

In [6]:
def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)

In [7]:
def log(x):
    return tf.log(x + 1e-8)
    

In [8]:
X = tf.placeholder(
            name='image', dtype=tf.float32,
            shape=[batch_size, 224, 224, 3],
        )
z = tf.placeholder(tf.float32, shape=[None, nz])
y = tf.placeholder(
            name='label', dtype=tf.float32, shape=[batch_size, n_class],
        )

In [9]:
data_info = np.array([224, 224, 2, 3])

conv_info = np.array([64, 128, 256])

deconv_info = np.array([[300, 3, 1], [100, 7, 2], [50, 5, 2], [25, 5, 2], [12, 6, 2], [3, 6, 2]])

In [10]:
def lrelu(x, leak=0.2, name="lrelu"):
    with tf.variable_scope(name):
        f1 = 0.5 * (1 + leak)
        f2 = 0.5 * (1 - leak)
        return f1 * x + f2 * abs(x)

def huber_loss(labels, predictions, delta=1.0):
    residual = tf.abs(predictions - labels)
    condition = tf.less(residual, delta)
    small_res = 0.5 * tf.square(residual)
    large_res = delta * residual - 0.5 * tf.square(delta)
    return tf.where(condition, small_res, large_res)

def conv2d(input, output_shape, is_train, k_h=5, k_w=5, stddev=0.02, name="conv2d"):
    with tf.variable_scope(name):
        w = tf.get_variable('w', [k_h, k_w, input.get_shape()[-1], output_shape],
                            initializer=tf.truncated_normal_initializer(stddev=stddev))
        conv = tf.nn.conv2d(input, w, strides=[1, 2, 2, 1], padding='SAME')

        biases = tf.get_variable('biases', [output_shape], initializer=tf.constant_initializer(0.0))
        conv = lrelu(tf.reshape(tf.nn.bias_add(conv, biases), conv.get_shape()))
        bn = tf.contrib.layers.batch_norm(conv, center=True, scale=True,
                                          decay=0.9, is_training=is_train,
                                          updates_collections=None)
    return bn


def deconv2d(input, deconv_info, is_train, name="deconv2d", stddev=0.02, activation_fn=None):
    with tf.variable_scope(name):
        output_shape = deconv_info[0]
        k = deconv_info[1]
        s = deconv_info[2]
        deconv = layers.conv2d_transpose(
            input, num_outputs=output_shape,
            weights_initializer=tf.truncated_normal_initializer(stddev=stddev),
            biases_initializer=tf.zeros_initializer(),
            kernel_size=[k, k], stride=[s, s], padding='VALID'
        )
        if not activation_fn:
            deconv = tf.nn.relu(deconv)
            deconv = tf.contrib.layers.batch_norm(
                deconv, center=True, scale=True,  decay=0.9,
                is_training=is_train, updates_collections=None
            )
        else:
            deconv = activation_fn(deconv)
        return deconv

In [11]:
is_train=True
image_shape = [batch_size, 224, 224, 3]
def G(z, scope='Generator'):
    with tf.variable_scope(scope) as scope:
        #log.warn(scope.name)
        #print z.shape
        z = tf.reshape(z, [batch_size, 1, 1, -1])
        #print z.shape
        g_1 = deconv2d(z, deconv_info[0], is_train, name='g_1_deconv')
        #print g_1.shape
        #log.info('{} {}'.format(scope.name, g_1))
        g_2 = deconv2d(g_1, deconv_info[1], is_train, name='g_2_deconv')
        #print g_2.shape
        #log.info('{} {}'.format(scope.name, g_2))
        g_3 = deconv2d(g_2, deconv_info[2], is_train, name='g_3_deconv')
        #print g_3.shape
        #log.info('{} {}'.format(scope.name, g_3))
        g_4 = deconv2d(g_3, deconv_info[3], is_train, name='g_4_deconv', activation_fn=tf.tanh)
        #print g_4.shape
        g_5 = deconv2d(g_4, deconv_info[4], is_train, name='g_5_deconv', activation_fn=tf.tanh)
        #print g_5.shape
        g_6 = deconv2d(g_5, deconv_info[5], is_train, name='g_6_deconv', activation_fn=tf.tanh)
        #print g_6.shape
        #log.info('{} {}'.format(scope.name, g_4))
        output = g_6
        #print X.get_shape().as_list()
        assert output.get_shape().as_list() == image_shape, output.get_shape().as_list()
        return output

In [12]:
def D(img, scope='Discriminator', reuse=True):
    with tf.variable_scope(scope, reuse=reuse) as scope:
        #if not reuse: log.warn(scope.name)
        #print img.shape
        d_1 = conv2d(img, conv_info[0], is_train, name='d_1_conv')
        d_1 = slim.dropout(d_1, keep_prob=0.5, is_training=is_train, scope='d_1_conv/')
        #print d_1.shape
        #if not reuse: log.info('{} {}'.format(scope.name, d_1))
        d_2 = conv2d(d_1, conv_info[1], is_train, name='d_2_conv')
        d_2 = slim.dropout(d_2, keep_prob=0.5, is_training=is_train, scope='d_2_conv/')
        #print d_2.shape
        #if not reuse: log.info('{} {}'.format(scope.name, d_2))
        d_3 = conv2d(d_2, conv_info[2], is_train, name='d_3_conv')
        d_3 = slim.dropout(d_3, keep_prob=0.5, is_training=is_train, scope='d_3_conv/')
        #print d_3.shape
        #if not reuse: log.info('{} {}'.format(scope.name, d_3))
        d_4 = slim.fully_connected(
            tf.reshape(d_3, [batch_size, -1]), n_class, scope='d_4_fc', activation_fn=None)
        #print d_4.shape
        #if not reuse: log.info('{} {}'.format(scope.name, d_4))
        output = d_4
        assert output.get_shape().as_list() == [batch_size, n_class]
        return tf.nn.softmax(output), output

In [13]:
epsilon = 1.0e-6
LAMBA = 1
# y has dim batch_size x num_classes
# entropy 1
def marginal_entropy(y):
    y_1 = tf.reduce_mean(y, axis=0) #1/N sum y_i
    y_2 = -y_1 * tf.log(y_1+epsilon)
    y_3 = tf.reduce_sum(y_2)
    return y_3

def entropy(y):
    #batch_size= K.int_shape(y)[0]
    y_1 = -y * tf.log(y+epsilon)
    y_2 = tf.reduce_sum(y_1,axis=1)
    y_3 = tf.reduce_mean(y_2,axis=0)
    return y_3

In [14]:
print deconv_info[0]

[300   3   1]


In [15]:

#list of np arrays
def average_grads(grads):
    #print type(grads)
    #print len(grads)
    #print type(grads[0])
    #print len(grads[0])
    #print type(grads[0][0])
    grads_list = []
    for grad in grads:
        grads_list.append(tf.reduce_mean(grad[0]))
    #print "from average grads"
    #print type(grads_list[0])
    return grads_list

In [16]:
#from util import log
#z = tf.random_uniform([batch_size, nz], minval=-1, maxval=1, dtype=tf.float32)
z = tf.random_normal([batch_size, nz], dtype=tf.float32)
G_sample = G(z)

D_real, D_real_logits = D(X, scope='Discriminator', reuse=False)
D_fake, D_fake_logits = D(G_sample, scope='Discriminator', reuse=True)

D_target = 1./mb_size
G_target = 1./(mb_size*2)

#Z = tf.reduce_sum(tf.exp(-D_real)) + tf.reduce_sum(tf.exp(-D_fake))

#D_loss = tf.reduce_sum(D_target * D_real) + log(Z)
#G_loss = tf.reduce_sum(G_target * D_real) + tf.reduce_sum(G_target * D_fake) + log(Z)

D_loss = -marginal_entropy(D_real) + entropy(D_real) - entropy(D_fake) + cross_entropy_term *tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=D_real_logits))
G_loss = -marginal_entropy(D_fake) + entropy(D_fake)

all_vars = tf.trainable_variables()

theta_D = [v for v in all_vars if v.name.startswith('Discriminator')]
#log.warn("********* d_var ********** "); slim.model_analyzer.analyze_vars(d_var, print_info=True)

theta_G = [v for v in all_vars if v.name.startswith(('Generator'))]
#log.warn("********* g_var ********** "); slim.model_analyzer.analyze_vars(g_var, print_info=True)

D_grad_overall = (tf.train.AdamOptimizer(learning_rate=lr)
            .compute_gradients(D_loss, var_list=theta_D, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N))
#D_grad = average_grads(D_grad_overall)
D_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(D_loss, var_list=theta_D))
G_grad_overall = (tf.train.AdamOptimizer(learning_rate=lr)
             .compute_gradients(G_loss, var_list=theta_G, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N))
#G_grad = average_grads(G_grad_overall)
G_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(G_loss, var_list=theta_G))


In [None]:
#sess = tf.Session()
#sess.run(tf.global_variables_initializer())

dir_name ='out_15/'
if not os.path.exists(dir_name):
    os.makedirs(dir_name)

counter = 0

In [None]:
#100,000 is good
#print every 1000
g_loss_overall = []
d_loss_overall = []
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    N_train = X_train.shape[0]
    n_batches = int(np.ceil(N_train / float(batch_size)))
    for it in range(num_epochs):
        for i, b in enumerate(range(0, N_train, batch_size)):
        #for i in range(1):
            X_batch = X_train[b : b + batch_size, :]
            Y_batch = Y_train[b : b + batch_size, :]
            if (X_batch.shape[0] != batch_size): continue
#            X_batch = X_train[i : i + batch_size, :]
#             Y_batch = Y_train[i : i + batch_size, :]

            _, D_loss_curr = sess.run(
                [D_solver, D_loss], feed_dict={X: X_batch, y: Y_batch}
            )

            _, G_loss_curr = sess.run(
                [G_solver, G_loss], feed_dict={X: X_batch, y: Y_batch}
            )

            if i % 10 == 0:
                print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}'
                  .format(i, D_loss_curr, G_loss_curr))
                D_grad_cur_overall = sess.run(D_grad_overall, feed_dict={X: X_batch, y: Y_batch})
                D_grad_curr = average_grads(D_grad_cur_overall)
                D_grad_eval = []
                for grad in D_grad_curr:
                    D_grad_eval.append(grad.eval(session=sess))
                print D_grad_eval
                g_loss_overall.append(G_loss_curr)
                d_loss_overall.append(D_loss_curr)
                samples = sess.run(G_sample)
                truncated_samples = samples[0:16,:]
                fig = plot(truncated_samples)
                plt.savefig(dir_name+'{}.png'
                    .format(str(counter).zfill(3)), bbox_inches='tight')
                counter += 1
                plt.close(fig)
        # Reshuffle
        idxs = range(X_train.shape[0])
        np.random.shuffle(idxs)
        X_train = X_train[idxs]
        Y_train = Y_train[idxs]

Iter: 0; D_loss: 0.2627; G_loss: -0.1212
[0.028256232, -0.23190486, 0.045735359, 0.00014140853, -8.4835032e-05, 0.027549952, -0.0046366984, 4.8778951e-05, 0.00071822293, 0.0013668451, -0.034829512, 0.015196411, 6.0821065e-10, -1.4901161e-08]
Iter: 10; D_loss: 1.991; G_loss: -0.5053
[0.0028884388, -0.040382765, 0.0065481514, 0.00015451864, -0.00048534453, -0.001294991, -0.0049155233, 8.6356886e-06, -0.00040806844, 0.0011629176, 0.01345334, 0.0046254108, -3.8013168e-10, -3.7252903e-09]
Iter: 20; D_loss: 1.006; G_loss: -0.2817
[0.012913963, -0.0063682273, 0.040414196, 0.00032309024, -0.00044842408, -0.0053592334, 0.0037295159, 1.0071322e-05, 0.00088900817, 0.00042993523, 0.0019047812, 0.0094862264, 8.3628965e-10, 1.0244548e-08]
Iter: 0; D_loss: 0.6205; G_loss: -0.4489
[-0.0079223355, -0.26771981, 0.031059703, -2.8874259e-05, -0.00062578177, -0.0078999875, 0.0001919957, 1.30306e-05, 0.00046275667, -0.00037026231, 0.0011047984, 0.0073662717, 0.0, 1.8626451e-09]
Iter: 10; D_loss: 1.703; G_lo

In [None]:
plt.figure()
#print g_loss_overall
#print d_loss_overall
g_loss_plot, = plt.plot(g_loss_overall, label='G Loss')
d_loss_plot, = plt.plot(d_loss_overall, label='D Loss')

plt.legend([g_loss_plot, d_loss_plot], ['G Loss', 'D Loss'])
plt.savefig(dir_name+'plot_for_catgan_.png')