###### using http://www.easy-tensorflow.com/autoencoders/noise-removal

In [1]:
import numpy as np
import scipy.sparse as ss
import tensorflow as tf
import matplotlib.pyplot as plt
import pickle

  from ._conv import register_converters as _register_converters


In [2]:
# data = ss.random(1000, 23000000, density=0.003, 
#                  format='csr', dtype=np.float32, random_state=10)
data = ss.random(2000, 1000, density=0.003, 
                 format='csr', dtype=np.float32, random_state=10)

In [3]:
# test = ss.random(300, 23000000, density=0.003, 
#                  format='csr', dtype=np.float32, random_state=20)
test = ss.random(300, 1000, density=0.003, 
                 format='csr', dtype=np.float32, random_state=20)

In [4]:
# hyper-parameters
logs_path = "./logs/noiseRemoval_test_csr_matrix"  # path to the folder that we want to save the logs for Tensorboard
learning_rate = 0.001  # The optimization learning rate
epochs = 10  # Total number of training epochs
batch_size = 100  # Training batch size
display_freq = 100  # Frequency of displaying the training results

# Network Parameters
# We know that MNIST images are 28 pixels in each dimension.
# img_h = img_w = 100
num_features = data.shape[1]

# Images are stored in one-dimensional arrays of this length.
# img_size_flat = img_h * img_w

# number of units in the hidden layer
# h1 = 1000
h1 = 5

# level of the noise in noisy data
# noise_level = 0.6


In [5]:
# weight and bais wrappers
def weight_variable(name, shape):
    """
    Create a weight variable with appropriate initialization
    :param name: weight name
    :param shape: weight shape
    :return: initialized weight variable
    """
    initer = tf.truncated_normal_initializer(stddev=0.01)
#     return tf.get_variable('W_' + name,
#                            dtype=tf.float32,
#                            shape=shape,
#                            initializer=initer)
    return tf.get_variable('W_' + name,
                           dtype=tf.float32,
                           shape=shape,
                           initializer=initer)

def bias_variable(name, shape):
    """
    Create a bias variable with appropriate initialization
    :param name: bias variable name
    :param shape: bias variable shape
    :return: initialized bias variable
    """
#     initial = tf.constant(0., shape=shape, dtype=tf.float32)
#     return tf.get_variable('b_' + name,
#                            dtype=tf.float32,
#                            initializer=initial)
    initial = tf.constant(0., shape=shape, dtype=tf.float32)
    return tf.get_variable('b_' + name,
                           dtype=tf.float32,
                           initializer=initial)

def fc_layer(x, input_dim, num_units, name, use_relu=True):
    """
    Create a fully-connected layer
    :param x: input from previous layer
    :param num_units: number of hidden units in the fully-connected layer
    :param name: layer name
    :param use_relu: boolean to add ReLU non-linearity (or not)
    :return: The output array
    """
    with tf.variable_scope(name):
#         in_dim = x.get_shape()[1]
        in_dim = input_dim
        W = weight_variable(name, shape=[in_dim, num_units])
        tf.summary.histogram('W', W)
        b = bias_variable(name, [num_units])
        tf.summary.histogram('b', b)
#         layer = tf.matmul(x, W)
        layer = tf.sparse_tensor_dense_matmul(x, W)
        layer += b
        if use_relu:
            layer = tf.nn.relu(layer)
#         return tf.contrib.layers.dense_to_sparse(layer)
    zero = tf.constant(0, dtype=tf.float32)
    where = tf.not_equal(layer, zero)
    indices = tf.where(where)
    values = tf.gather_nd(layer, indices)
    return tf.SparseTensor(indices, values, layer.shape)


In [32]:
tf.reset_default_graph()

In [34]:
# Create graph
# Placeholders for inputs (x), outputs(y)
with tf.variable_scope('Input'):
    x_original = tf.sparse_placeholder(tf.float32\
                           , shape=[batch_size, num_features]\
                           , name='X_original')
    neg_eye = tf.placeholder(tf.float32\
                           , shape=[num_features, num_features]\
                           , name='neg_eye')

fc1 = fc_layer(x_original, num_features, h1, 'Hidden_layer', use_relu=True)
out = fc_layer(fc1, h1, num_features, 'Output_layer', use_relu=False)

In [26]:
# #make_negative sparseTensor

# # https://stackoverflow.com/a/40666375/2674061
# # out.get_shape().as_list()[0]
# neg_eye = tf.scalar_mul(-1, tf.eye(out.get_shape().as_list()[1]))
# negative_val = tf.sparse_tensor_dense_matmul(out, neg_eye)     

# diff_sparsetensor = tf.sparse_add(x_original , negative_val)

# square_diff = tf.square(diff_sparsetensor)

# type(square_diff)

In [35]:
# Define the loss function, optimizer, and accuracy
with tf.variable_scope('Train'):
    with tf.variable_scope('Loss'):
#         loss = tf.sparse_reduce_sum_sparse(tf.losses.sparse_softmax_cross_entropy(x_original, out), name='loss')
        neg_eye = tf.scalar_mul(-1, tf.eye(out.get_shape().as_list()[1]))
        negative_val = tf.sparse_tensor_dense_matmul(out, neg_eye)     
        square_diff = tf.square(tf.sparse_add(x_original , negative_val))

        loss = tf.reduce_mean(square_diff, name='loss')
        tf.summary.scalar('loss', loss)
    with tf.variable_scope('Optimizer'):
        optimizer = tf.train.AdamOptimizer(\
            learning_rate=learning_rate, 
            name='Adam-op').minimize(loss)

In [11]:
# Define the loss function, optimizer, and accuracy
# with tf.variable_scope('Train'):
#     with tf.variable_scope('Loss'):
#         loss = tf.reduce_mean(tf.losses.mean_squared_error(x_original, out), name='loss')
#         tf.summary.scalar('loss', loss)
#     with tf.variable_scope('Optimizer'):
#         optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, name='Adam-op').minimize(loss)

# # Initializing the variables
# init = tf.global_variables_initializer()

In [12]:
def next_batch_data(batch_size):
    '''
    slice small number of data points from the large csr_matrix
    make a SparseTensor and return it 
    '''
    idx = np.arange(0 , data.shape[0])
    np.random.shuffle(idx)
    idx = idx[:batch_size]

    coo_matrix = data[idx].tocoo()
    tf_coo_matrix = tf.SparseTensorValue(
        indices=np.array([coo_matrix.row, coo_matrix.col]).T,
        values=coo_matrix.data,
        dense_shape=coo_matrix.shape)
    
    return tf.SparseTensor.from_value(tf_coo_matrix)

In [13]:
%%time
C = next_batch_data(5)

CPU times: user 1.83 ms, sys: 0 ns, total: 1.83 ms
Wall time: 1.39 ms


In [14]:
type(C)

tensorflow.python.framework.sparse_tensor.SparseTensor

In [15]:
C.get_shape()

TensorShape([Dimension(5), Dimension(1000)])

#### prepare the test tensor

In [16]:
%%time
np.random.seed(10)
idx = np.arange(0 , test.shape[0])
np.random.shuffle(idx)
idx = idx[:10]

test_coo_matrix = test[idx].tocoo()
test_tf_coo_matrix = tf.SparseTensorValue(
    indices=np.array([test_coo_matrix.row, test_coo_matrix.col]).T,
    values=test_coo_matrix.data,
    dense_shape=test_coo_matrix.shape)

test_sp_tensor = tf.SparseTensor.from_value(test_tf_coo_matrix)

CPU times: user 1.02 ms, sys: 25 µs, total: 1.05 ms
Wall time: 1.01 ms


In [17]:
test_sp_tensor.get_shape()

TensorShape([Dimension(10), Dimension(1000)])

In [18]:
type(test_sp_tensor)

tensorflow.python.framework.sparse_tensor.SparseTensor

In [19]:
# config = tf.ConfigProto()
# config.gpu_options.allocator_type ='BFC'
# config.gpu_options.per_process_gpu_memory_fraction = 0.90

In [20]:
data.shape[0]

2000

In [40]:
# Launch the graph (session)
sess = tf.InteractiveSession() # using InteractiveSession instead of Session to test network in separate cell
# sess = tf.Session() 
init = tf.global_variables_initializer()
sess.run(init)
train_writer = tf.summary.FileWriter(logs_path, sess.graph)
num_tr_iter = int(data.shape[0] / batch_size)
global_step = 0
for epoch in range(epochs):
    print('Training epoch: {}'.format(epoch + 1))
    for iteration in range(num_tr_iter):
        batch_x = next_batch_data(batch_size)
        print(batch_x.get_shape(), flush=True)
#         batch_x_noisy = batch_x + noise_level * np.random.normal(loc=0.0, scale=1.0, size=batch_x.shape)

        global_step += 1
        print(global_step, flush=True)
        # Run optimization op (backprop)
#         feed_dict_batch = {x_original: batch_x, x_noisy: batch_x_noisy}
        feed_dict_batch = {x_original: batch_x} #problem 1
        _, summary_tr = sess.run([optimizer, loss], feed_dict=feed_dict_batch)
        
        train_writer.add_summary(summary_tr, global_step)

        if iteration % display_freq == 0:
            # Calculate and display the batch loss and accuracy
            loss_batch = sess.run(loss,
                                  feed_dict=feed_dict_batch)
            print("iter {0:3d}:\t Reconstruction loss={1:.3f}".
                  format(iteration, loss_batch))

    # Run validation after every epoch
#     x_valid_original  = mnist.validation.images
    x_valid_original  = test_sp_tensor
#     x_valid_noisy = x_valid_original + noise_level * np.random.normal(loc=0.0, scale=1.0, size=x_valid_original.shape)

    feed_dict_valid = {x_original: x_valid_original}
    loss_valid = sess.run(loss, feed_dict=feed_dict_valid)
    print('---------------------------------------------------------')
    print("Epoch: {0}, validation loss: {1:.3f}".
          format(epoch + 1, loss_valid))
    print('---------------------------------------------------------')
 

Training epoch: 1
(100, 1000)
1


InvalidArgumentError: You must feed a value for placeholder tensor 'Input/X_original/values' with dtype float and shape [?]
	 [[Node: Input/X_original/values = Placeholder[dtype=DT_FLOAT, shape=[?], _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
	 [[Node: Input/X_original/shape/_11 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_86_Input/X_original/shape", tensor_type=DT_INT64, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'Input/X_original/values', defined at:
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 478, in start
    self.io_loop.start()
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2728, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2850, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-34-98ca892f08ef>", line 4, in <module>
    x_original = tf.sparse_placeholder(tf.float32                           , shape=[batch_size, num_features]                           , name='X_original')
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1848, in sparse_placeholder
    name=(name + "/values") if name is not None else None),
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py", line 1777, in placeholder
    return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py", line 4521, in placeholder
    "Placeholder", dtype=dtype, shape=shape, name=name)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3290, in create_op
    op_def=op_def)
  File "/home/esadrfa/libs/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1654, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): You must feed a value for placeholder tensor 'Input/X_original/values' with dtype float and shape [?]
	 [[Node: Input/X_original/values = Placeholder[dtype=DT_FLOAT, shape=[?], _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
	 [[Node: Input/X_original/shape/_11 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_86_Input/X_original/shape", tensor_type=DT_INT64, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]


In [None]:
sess.close()