In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib.slim import fully_connected as fc  # pylint: disable=E0611
import os
import errno
import scedar as sce
import argparse
import pandas as pd
from timeit import default_timer as timer
import utils

data  =  pd.read_csv('/Users/dawnstear/desktop/chop_cellpred/data.csv')
sclabels = data['Labels']
scdata = data.drop(['Labels','TYPE'],axis=1)

DataObj = utils.Data(scdata,sclabels,drop_remainder=True)  

assert not np.any(np.isnan(scdata))
assert not np.any(np.isnan(sclabels))

maxval = np.amax(scdata)
scdata = (scdata+1e-7)/maxval


  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:

class VariantionalAutoencoder(object):
    '''VAE implementation from https://github.com/shaohua0116/VAE-Tensorflow
    '''
    def __init__(self, input_dim, nelfirst, nelsecond, ndlfirst, ndlsecond,
                 n_z=2, learning_rate=1e-3, batch_size=100):
        self.input_dim = input_dim
        self.nelfirst = nelfirst
        self.nelsecond = nelsecond
        self.ndlfirst = ndlfirst
        self.ndlsecond = ndlsecond

        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.n_z = n_z

        self.build()

        config = tf.ConfigProto()
        config.gpu_options.allow_growth=True # pylint: disable=E1101
        self.sess = tf.InteractiveSession(config=config)
        self.sess.run(tf.global_variables_initializer())

    # Build the netowrk and the loss functions
    def build(self):
        self.x = tf.placeholder(name='x', dtype=tf.float32,
                                shape=[None, self.input_dim])
        # Encode
        # x -> z_mean, z_sigma -> z
        f1 = fc(self.x, self.nelfirst, scope='enc_fc1',
                activation_fn=tf.nn.elu)
        f2 = fc(f1, self.nelsecond, scope='enc_fc2', activation_fn=tf.nn.elu)

        self.z_mu = fc(f2, self.n_z, scope='enc_fc3_mu', activation_fn=None)
        self.z_log_sigma_sq = fc(f2, self.n_z, scope='enc_fc3_sigma',
                                 activation_fn=None)
        eps = tf.random_normal(shape=tf.shape(self.z_log_sigma_sq),
                               mean=0, stddev=1, dtype=tf.float32)
        self.z = self.z_mu + tf.sqrt(tf.exp(self.z_log_sigma_sq)) * eps

        # Decode
        # z -> x_hat
        g1 = fc(self.z, self.ndlfirst, scope='dec_fc1',
                activation_fn=tf.nn.elu)
        g2 = fc(g1, self.ndlsecond, scope='dec_fc2', activation_fn=tf.nn.elu)
        self.x_hat = fc(g2, self.input_dim, scope='dec_fc3',
                        activation_fn=tf.sigmoid)

        # Loss
        # Reconstruction loss
        # Minimize the cross-entropy loss
        # H(x, x_hat) = -\Sigma x*log(x_hat) + (1-x)*log(1-x_hat)
        epsilon = 1e-6
        recon_loss = -tf.reduce_sum(
            self.x * tf.log(epsilon+self.x_hat)
            + (1-self.x) * tf.log(epsilon+1-self.x_hat),
            axis=1
        )
        self.recon_loss = tf.reduce_mean(recon_loss)

        # Latent loss
        # Kullback Leibler divergence: measure the difference between
        # two distributions Here we measure the divergence between the latent
        # distribution and N(0, 1)
        latent_loss = -0.5 * tf.reduce_sum(
            1 + self.z_log_sigma_sq - tf.square(self.z_mu)
            - tf.exp(self.z_log_sigma_sq), axis=1)
        self.latent_loss = tf.reduce_mean(latent_loss)

        self.total_loss = tf.reduce_mean(recon_loss + latent_loss)
        self.train_op = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate).minimize(self.total_loss)
        return

    # Execute the forward and the backward pass
    def run_single_step(self, x):
        _, loss, recon_loss, latent_loss = self.sess.run(
            [self.train_op, self.total_loss, self.recon_loss, self.latent_loss],
            feed_dict={self.x: x}
        )
        return loss, recon_loss, latent_loss

    # x -> x_hat
    def reconstruct(self, x):
        x_hat = self.sess.run(self.x_hat, feed_dict={self.x: x})
        return x_hat

    # z -> x
    def generate(self, z):
        x_hat = self.sess.run(self.x_hat, feed_dict={self.z: z})
        return x_hat

    # x -> z
    def transform(self, x):
        z = self.sess.run(self.z, feed_dict={self.x: x})
        return z


def train(train_data, nelfirst, nelsecond, ndlfirst, ndlsecond,
          learning_rate=1e-5, batch_size=100, num_epoch=75):
    input_dim = train_data.shape[1]
    n_samples = train_data.shape[0]
    model = VariantionalAutoencoder(
        input_dim, nelfirst, nelsecond, ndlfirst, ndlsecond, n_z=2,
        learning_rate=learning_rate, batch_size=batch_size)

    for epoch in range(num_epoch):
        for i in range(n_samples // batch_size):
            # Obtina a batch
            batch = train_data[i*batch_size:(i+1)*batch_size]
            # Execute the forward and the backward pass and report computed losses
            loss, recon_loss, latent_loss = model.run_single_step(batch)

        if epoch % 5 == 0:
            print('[Epoch {}] Loss: {}, Recon loss: {}, Latent loss: {}'.format(
                epoch, loss, recon_loss, latent_loss))

    print('Done!')
    return model



In [3]:
train_start = timer()
vae = train(scdata, 20, 20,
            20, 20,
            1e-11,
            batch_size=100,
            num_epoch=11)
train_end = timer()
train_time = train_end - train_start

trans_start = timer()
z = vae.transform(scdata)
print(np.shape(z))
trans_end = timer()

trans_time = trans_end - trans_start

fig = sce.eda.cluster_scatter(z, labels=labels, s=50, figsize=(15, 7),
                              n_txt_per_cluster=0, alpha=0.6)




[Epoch 0] Loss: nan, Recon loss: nan, Latent loss: nan
[Epoch 5] Loss: nan, Recon loss: nan, Latent loss: nan
[Epoch 10] Loss: nan, Recon loss: nan, Latent loss: nan
Done!
(1078, 2)


AttributeError: module 'scedar' has no attribute 'eda'

In [4]:
z

array([[nan, nan],
       [nan, nan],
       [nan, nan],
       ...,
       [nan, nan],
       [nan, nan],
       [nan, nan]], dtype=float32)