In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd

In [2]:
class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [3]:
latent_dim = 2

encoder_inputs = keras.Input(shape=(17, 1))
x = layers.Conv1D(8,3, activation='relu', padding='same',dilation_rate=2)(encoder_inputs)
# x = ZeroPadding1D((2,1))(x)
x1 = layers.MaxPooling1D(1)(x)
x2 = layers.Conv1D(4,3, activation='relu', padding='same',dilation_rate=2)(x1)
x3 = layers.MaxPooling1D(1)(x2)
x4 = layers.AveragePooling1D()(x3)
flat = layers.Flatten()(x4)
encoder = layers.Dense(2)(flat)
x = encoder
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
encoder.summary()


Model: "encoder"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 17, 1)]      0                                            
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 17, 8)        32          input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling1d (MaxPooling1D)    (None, 17, 8)        0           conv1d[0][0]                     
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 17, 4)        100         max_pooling1d[0][0]              
____________________________________________________________________________________________

In [4]:
latent_inputs = keras.Input(shape=(latent_dim,))
#d = layers.Flatten()(latent_inputs)
d1 = layers.Dense(9)(latent_inputs)
#d2 = layers.Reshape((117,4))(d1)
d2 = layers.Reshape((9,1))(d1)
d3 = layers.Conv1D(4,1,strides=1, activation='relu', padding='same')(d2)
d4 = layers.UpSampling1D()(d3)
d5 = layers.Conv1D(8,1,strides=1, activation='relu', padding='same')(d4)
#d6 = layers.UpSampling1D(2)(d5)
#d7 = layers.UpSampling1D(2)(d6)
d8 = layers.Conv1D(1,2, strides=1, activation='sigmoid', padding='same')(d5)
decoded = layers.Cropping1D(cropping=(1,0))(d8) # this is the added step
#decoded = d8

decoder = keras.Model(latent_inputs, decoded, name="decoder")
decoder.summary()

Model: "decoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 2)]               0         
_________________________________________________________________
dense_1 (Dense)              (None, 9)                 27        
_________________________________________________________________
reshape (Reshape)            (None, 9, 1)              0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 9, 4)              8         
_________________________________________________________________
up_sampling1d (UpSampling1D) (None, 18, 4)             0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 18, 8)             40        
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 18, 1)             17  

In [5]:
class VAE(keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }

In [6]:
(x_train) = pd.read_csv('train_dataset.csv')
(x_test) = pd.read_csv('test_dataset.csv')

print(x_train)
print(x_test)

     Length       MW  Charge  ChargeDensity        pI  InstabilityInd  \
0        34  3578.00  -3.009      -0.000841  4.095459       12.291176   
1        34  3695.24  -0.967      -0.000262  5.475586        0.961765   
2        34  3576.07  -2.009      -0.000562  4.322754       12.291176   
3        34  3755.32  -0.009      -0.000002  7.062500       30.879412   
4        34  3591.05  -2.009      -0.000559  4.322754       12.291176   
..      ...      ...     ...            ...       ...             ...   
844      12  1476.77   2.680       0.001815  9.776367        4.841667   
845      14  1692.97   1.680       0.000992  8.888672       19.335714   
846      20  2100.32   0.836       0.000398  8.759766       -1.775000   
847      10  1306.52   1.680       0.001286  8.888672       23.070000   
848      13  1613.87   1.722       0.001067  8.889648       20.053846   

     Aromaticity  AliphaticInd  BomanInd  HydrophRatio  CrossCorr  AutoCorr  \
0       0.000000    120.588235  1.438235    

In [9]:
peptides = np.concatenate([x_train, x_test], axis=0)
#mnist_digits = np.expand_dims(mnist_digits, -1).astype("float32") / 255
peptides = np.expand_dims(peptides, -1)

vae = VAE(encoder, decoder)
vae.compile(optimizer=keras.optimizers.Adam())
vae.fit(peptides, epochs=3)

Epoch 1/3


ValueError: in user code:

    C:\Anaconda\envs\autoencoders\lib\site-packages\keras\engine\training.py:853 train_function  *
        return step_function(self, iterator)
    C:\Anaconda\envs\autoencoders\lib\site-packages\keras\engine\training.py:842 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Anaconda\envs\autoencoders\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Anaconda\envs\autoencoders\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Anaconda\envs\autoencoders\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Anaconda\envs\autoencoders\lib\site-packages\keras\engine\training.py:835 run_step  **
        outputs = model.train_step(data)
    C:\Users\PC\AppData\Local\Temp\ipykernel_5148\3236315084.py:25 train_step
        tf.reduce_sum(
    C:\Anaconda\envs\autoencoders\lib\site-packages\tensorflow\python\util\dispatch.py:206 wrapper
        return target(*args, **kwargs)
    C:\Anaconda\envs\autoencoders\lib\site-packages\tensorflow\python\ops\math_ops.py:2253 reduce_sum
        return reduce_sum_with_dims(input_tensor, axis, keepdims, name,
    C:\Anaconda\envs\autoencoders\lib\site-packages\tensorflow\python\ops\math_ops.py:2265 reduce_sum_with_dims
        gen_math_ops._sum(input_tensor, dims, keepdims, name=name))
    C:\Anaconda\envs\autoencoders\lib\site-packages\tensorflow\python\ops\gen_math_ops.py:10721 _sum
        _, _, _op, _outputs = _op_def_library._apply_op_helper(
    C:\Anaconda\envs\autoencoders\lib\site-packages\tensorflow\python\framework\op_def_library.py:748 _apply_op_helper
        op = g._create_op_internal(op_type_name, inputs, dtypes=None,
    C:\Anaconda\envs\autoencoders\lib\site-packages\tensorflow\python\framework\func_graph.py:599 _create_op_internal
        return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
    C:\Anaconda\envs\autoencoders\lib\site-packages\tensorflow\python\framework\ops.py:3561 _create_op_internal
        ret = Operation(
    C:\Anaconda\envs\autoencoders\lib\site-packages\tensorflow\python\framework\ops.py:2041 __init__
        self._c_op = _create_c_op(self._graph, node_def, inputs,
    C:\Anaconda\envs\autoencoders\lib\site-packages\tensorflow\python\framework\ops.py:1883 _create_c_op
        raise ValueError(str(e))

    ValueError: Invalid reduction dimension 2 for input with 2 dimensions. for '{{node Sum}} = Sum[T=DT_FLOAT, Tidx=DT_INT32, keep_dims=false](Mean, Sum/reduction_indices)' with input shapes: [?,17], [2] and with computed input tensors: input[1] = <1 2>.
