# ReLU Layers

We can write a ReLU layer $z = \max(Wx+b, 0)$ as the
convex optimization problem
\begin{equation}
\begin{array}{ll}
\mbox{minimize} & \|z-\tilde Wx - b\|_2^2 \\[.2cm]
\mbox{subject to} & z \geq 0, \\
& \tilde W = W,
\end{array}
\label{eq:prob}
\end{equation}
with variables $z$ and $\tilde W$,
and parameters $W$, $b$, and $x$.
(Note that we have added an extra variable $\tilde W$ so
that the problem is DPP.)

We can embed this problem into a TensorFlow Keras `Layer` and use it
as a layer in a sequential neural network.
We note that this example is purely illustrative;
one can implement a ReLU layer much more efficiently
by directly performing the matrix multiplication, vector addition,
and then taking the positive part.

In [1]:
from cvxpylayers.tensorflow import CvxpyLayer
import cvxpy as cp
import tensorflow as tf
from tensorflow.keras import layers

tf.keras.backend.set_floatx('float64')

In [2]:
class ReluLayer(layers.Layer):
    def __init__(self, input_dim, output_dim):
        super(ReluLayer, self).__init__()
        self.W = tf.Variable(1e-3 * tf.random.normal((output_dim, input_dim), dtype=tf.float64))
        self.b = tf.Variable(1e-3 * tf.random.normal((output_dim,),  dtype=tf.float64))
        z = cp.Variable(output_dim)
        Wtilde = cp.Variable((output_dim, input_dim))
        W = cp.Parameter((output_dim, input_dim))
        b = cp.Parameter(output_dim)
        x = cp.Parameter(input_dim)
        problem = cp.Problem(cp.Minimize(
            cp.sum_squares(z - Wtilde @ x - b)), [z >= 0, Wtilde == W])
        self.cvxpy_layer = CvxpyLayer(problem, [W, b, x], [z])

    def call(self, x):
        if tf.rank(x) == 2:
            # when x is batched, repeat W and b 
            batch_size = x.shape[0]
            return self.cvxpy_layer(
                tf.stack([self.W for _ in tf.range(batch_size)]),
                tf.stack([self.b for _ in tf.range(batch_size)]), x)[0]
        else:
            return self.layer(self.W, self.b, x)[0]

We generate synthetic data and create a network of two `ReluLayer`s followed by a linear layer.

In [3]:
tf.random.set_seed(0)
model = tf.keras.Sequential([
    ReluLayer(20, 20),
    ReluLayer(20, 20),
    tf.keras.layers.Dense(1, input_shape=(20,), dtype=tf.float64)
])
X = tf.random.normal((300, 20), dtype=tf.float64)
Y = tf.random.normal((300, 1), dtype=tf.float64)

Now we can optimize the parameters inside the network using, for example, the ADAM optimizer.
The code below solves 15000 convex optimization problems and calls backward 15000 times.

In [4]:
opt = tf.keras.optimizers.Adam(1e-2)
for _ in range(25):
    with tf.GradientTape() as tape:
        loss = (1 / X.shape[0]) * tf.math.reduce_sum((Y - model(X))**2)
        print(loss)
    grads = tape.gradient(loss, model.variables)
    opt.apply_gradients(zip(grads, model.variables))

tf.Tensor(1.1182059444179566, shape=(), dtype=float64)
tf.Tensor(1.1135983945033263, shape=(), dtype=float64)
tf.Tensor(1.1057307058385368, shape=(), dtype=float64)
tf.Tensor(1.096769654269908, shape=(), dtype=float64)
tf.Tensor(1.084459023932493, shape=(), dtype=float64)
tf.Tensor(1.0690279563783676, shape=(), dtype=float64)
tf.Tensor(1.0512619022795688, shape=(), dtype=float64)
tf.Tensor(1.0322302881090637, shape=(), dtype=float64)
tf.Tensor(1.0125819721396856, shape=(), dtype=float64)
tf.Tensor(0.9913663215521406, shape=(), dtype=float64)
tf.Tensor(0.9653676807090706, shape=(), dtype=float64)
tf.Tensor(0.9346387377331861, shape=(), dtype=float64)
tf.Tensor(0.901583417900799, shape=(), dtype=float64)
tf.Tensor(0.8696330587760338, shape=(), dtype=float64)
tf.Tensor(0.8358727832552605, shape=(), dtype=float64)
tf.Tensor(0.8012632687553815, shape=(), dtype=float64)
tf.Tensor(0.7656949960892767, shape=(), dtype=float64)
tf.Tensor(0.732059435220954, shape=(), dtype=float64)
tf.Tensor(0.70