## Lipschitz constant estimation (TensorFlow 2)

In [3]:
# for a single probability
import numpy as np
import tensorflow as tf
print(tf.__version__)

embedding_dim = 2
hidden_dim = 2

# constant tensors
delta_e = tf.constant(np.random.normal(size=(2 * embedding_dim, 1)), dtype=tf.float32)
A = tf.constant(np.random.normal(size=(hidden_dim, 2 * embedding_dim)), dtype=tf.float32)
A_hat = tf.constant(np.random.normal(size=A.shape), dtype=tf.float32)
b = tf.constant(np.random.normal(size=(hidden_dim, 1)), dtype=tf.float32)
b_hat = tf.constant(np.random.normal(size=b.shape), dtype=tf.float32)
C = tf.constant(np.random.normal(size=(1, hidden_dim)), dtype=tf.float32)
C_hat = tf.constant(np.random.normal(size=C.shape), dtype=tf.float32)
d = tf.constant(np.random.normal(size=(1, 1)), dtype=tf.float32)
d_hat = tf.constant(np.random.normal(size=d.shape), dtype=tf.float32)
#print(delta_e, A, A_hat, b, b_hat, C, C_hat, d, d_hat)

2.1.0


In [83]:
@tf.function
def compute_q_derivative(beta):
    with tf.GradientTape() as g:
        g.watch(beta)
        x = beta * d_hat
        #x = (C + beta * C_hat) @ tf.nn.relu((A + beta * A_hat) @ delta_e + b + beta * b_hat) + d + beta * d_hat
        return g.gradient(x, beta)

# obtain a concrete execution graph (it looks like that only types matter)
compute_q_derivative = compute_q_derivative.get_concrete_function(tf.constant(0.))
print(compute_q_derivative(tf.constant(1.0)))
print(compute_q_derivative(tf.constant(-1.0)))

graph = compute_q_derivative.graph
#print(help(graph.outer_graph))
graphdef = graph.as_graph_def()
print(type(graphdef))
# overly verbose
# print(graphdef)
# print(graph.as_default())
#print(graph.as_graph_element(compute_q_derivative))
#print(dir(graph))

tf.Tensor(0.07519977, shape=(), dtype=float32)
tf.Tensor(0.07519977, shape=(), dtype=float32)
<class 'tensorflow.core.framework.graph_pb2.GraphDef'>


In [37]:
# Let's try with sympy

import numpy as np
import sympy

embedding_dim = 2
hidden_dim = 2

delta_e = sympy.Matrix(np.random.normal(size=(2 * embedding_dim, 1)))
A = sympy.Matrix(np.random.normal(size=(hidden_dim, 2 * embedding_dim)))
A_hat = sympy.Matrix(np.random.normal(size=A.shape))
b = sympy.Matrix(np.random.normal(size=(hidden_dim, 1)))
b_hat = sympy.Matrix(np.random.normal(size=b.shape))
C = sympy.Matrix(np.random.normal(size=(1, hidden_dim)))
C_hat = sympy.Matrix(np.random.normal(size=C.shape))
d = sympy.Matrix(np.random.normal(size=(1, 1)))
d_hat = sympy.Matrix(np.random.normal(size=d.shape))

params = sympy.symbols(["beta"])
beta = params[0]

relu = sympy.Function('relu')

class relu(sympy.Function):
    @classmethod
    def eval(cls, x):
        return x.applyfunc(lambda elem: sympy.Max(elem, 0))

    def _eval_is_real(self):
        return True

#print(dir(relu))
#print(relu)
#print(relu(delta_e))

#x = beta * d_hat
#x = relu(beta * d_hat)
x = (C + beta * C_hat) @ relu((A + beta * A_hat) @ delta_e + b + beta * b_hat) + d + beta * d_hat
dx_dbeta = x.diff(beta)
print(dx_dbeta)

Matrix([[-1.68016425229171*(0.652868932416494 - 0.335767740653988*beta)*Heaviside(1.17150940479832 - 1.68016425229171*beta) + 0.472485873134512*(-1.02506115133706*beta - 0.761158444748154)*Heaviside(0.472485873134512*beta - 0.860566554393897) - 0.335767740653988*Max(0, 1.17150940479832 - 1.68016425229171*beta) - 1.02506115133706*Max(0, 0.472485873134512*beta - 0.860566554393897) - 0.0640979205763398]])
