## Lipschitz constant estimation (TensorFlow 2)

In [3]:
# for a single probability
import numpy as np
import tensorflow as tf
print(tf.__version__)

embedding_dim = 2
hidden_dim = 2

# constant tensors
delta_e = tf.constant(np.random.normal(size=(2 * embedding_dim, 1)), dtype=tf.float32)
A = tf.constant(np.random.normal(size=(hidden_dim, 2 * embedding_dim)), dtype=tf.float32)
A_hat = tf.constant(np.random.normal(size=A.shape), dtype=tf.float32)
b = tf.constant(np.random.normal(size=(hidden_dim, 1)), dtype=tf.float32)
b_hat = tf.constant(np.random.normal(size=b.shape), dtype=tf.float32)
C = tf.constant(np.random.normal(size=(1, hidden_dim)), dtype=tf.float32)
C_hat = tf.constant(np.random.normal(size=C.shape), dtype=tf.float32)
d = tf.constant(np.random.normal(size=(1, 1)), dtype=tf.float32)
d_hat = tf.constant(np.random.normal(size=d.shape), dtype=tf.float32)
#print(delta_e, A, A_hat, b, b_hat, C, C_hat, d, d_hat)

2.1.0


In [83]:
@tf.function
def compute_q_derivative(beta):
    with tf.GradientTape() as g:
        g.watch(beta)
        x = beta * d_hat
        #x = (C + beta * C_hat) @ tf.nn.relu((A + beta * A_hat) @ delta_e + b + beta * b_hat) + d + beta * d_hat
        return g.gradient(x, beta)

# obtain a concrete execution graph (it looks like that only types matter)
compute_q_derivative = compute_q_derivative.get_concrete_function(tf.constant(0.))
print(compute_q_derivative(tf.constant(1.0)))
print(compute_q_derivative(tf.constant(-1.0)))

graph = compute_q_derivative.graph
#print(help(graph.outer_graph))
graphdef = graph.as_graph_def()
print(type(graphdef))
# overly verbose
# print(graphdef)
# print(graph.as_default())
#print(graph.as_graph_element(compute_q_derivative))
#print(dir(graph))

tf.Tensor(0.07519977, shape=(), dtype=float32)
tf.Tensor(0.07519977, shape=(), dtype=float32)
<class 'tensorflow.core.framework.graph_pb2.GraphDef'>


In [135]:
# Let's try with sympy

import numpy as np
import sympy

embedding_dim = 10
hidden_dim = 12

def random_matr(*shape):
    return sympy.Matrix(np.random.normal(size=shape).round(1))

A     = random_matr(hidden_dim, 2 * embedding_dim)
A_hat = random_matr(*A.shape)
b     = random_matr(hidden_dim, 1)
b_hat = random_matr(*b.shape)
C     = random_matr(hidden_dim, hidden_dim)
C_hat = random_matr(*C.shape)
d     = random_matr(hidden_dim, 1)
d_hat = random_matr(*d.shape)
E     = random_matr(1, hidden_dim)
E_hat = random_matr(*E.shape)
f     = random_matr(1, 1)
f_hat = random_matr(*f.shape)

params = sympy.symbols(["β"])
beta = params[0]

class relu(sympy.Function):
    @classmethod
    def eval(cls, x):
        return x.applyfunc(lambda elem: sympy.Max(elem, 0))

    def _eval_is_real(self):
        return True

delta_e1 = np.random.normal(size=(2 * embedding_dim, 1)).round(1)
delta_e2 = np.random.normal(size=(2 * embedding_dim, 1)).round(1)
# the first half of the vectors is always the same
delta_e2[:embedding_dim, :] = delta_e1[:embedding_dim, :]
delta_e1 = sympy.Matrix(delta_e1)
delta_e2 = sympy.Matrix(delta_e2)

def q(beta, x):
    result = relu((A + beta * A_hat) @ x + b + beta * b_hat)
    result = relu((C + beta * C_hat) @ result + d + beta * d_hat)
    result = (E + beta * E_hat) @ result + f + beta * f_hat
    return result

def to_scalar(x):
    assert x.shape == (1, 1)
    return x[0, 0]
    
logit = to_scalar(q(beta, delta_e1) - q(beta, delta_e2))
print(logit)
dlogit_dbeta = logit.diff(beta)
print(dlogit_dbeta)

-0.3*β*Max(0, -0.6*β*Max(0, -4.15*β - 6.56) - 0.8*β + (0.5 - 0.3*β)*Max(0, 2.26*β + 0.81) + (0.6 - 1.7*β)*Max(0, 7.07*β - 5.96) + (0.8 - 0.7*β)*Max(0, 0.73 - 4.06*β) + (0.9 - 0.3*β)*Max(0, -0.56*β - 1.92) + (-2.3*β - 0.6)*Max(0, 10.03*β - 5.86) + (-1.4*β - 1.3)*Max(0, -0.72*β - 5.66) + (-1.1*β - 1.6)*Max(0, -1.22*β - 3.13) + (0.5*β + 0.3)*Max(0, 6.74 - 5.63*β) + (0.7*β + 0.2)*Max(0, 3.1*β - 2.56) + (0.8*β - 1.1)*Max(0, 1.46 - 0.150000000000001*β) - 0.4*Max(0, -5.19*β - 2.0) + 2.2) + 0.3*β*Max(0, -0.6*β*Max(0, 1.78*β - 1.42) - 0.8*β + (0.5 - 0.3*β)*Max(0, -4.62*β - 1.21) + (0.6 - 1.7*β)*Max(0, 1.15*β - 6.39) + (0.8 - 0.7*β)*Max(0, 3.53 - 5.51*β) + (0.9 - 0.3*β)*Max(0, -7.26*β - 4.51) + (-2.3*β - 0.6)*Max(0, 6.35*β + 2.18) + (-1.4*β - 1.3)*Max(0, 2.91*β - 3.09) + (-1.1*β - 1.6)*Max(0, -2.11*β - 0.820000000000001) + (0.5*β + 0.3)*Max(0, 7.47 - 6.49*β) + (0.7*β + 0.2)*Max(0, 3.79*β - 1.13) + (0.8*β - 1.1)*Max(0, -6.33*β - 7.1) - 0.4*Max(0, -3.38*β - 5.46) + 2.2) + (0.1 - 1.6*β)*Max(0, -0.3

In [140]:
def estimate_upper_bound(expr, param_name, abs_param_bound):
    if type(expr).__name__ == "Add":
        arg_bounds = [estimate_upper_bound(x, param_name, abs_param_bound) for x in expr.args]
        return sum(arg_bounds)
    if type(expr).__name__ == "Mul":
        arg_bounds = [estimate_upper_bound(x, param_name, abs_param_bound) for x in expr.args]
        return np.prod(arg_bounds)
    if type(expr).__name__ == "Float":
        return np.abs(float(expr))
    if type(expr).__name__ == "NegativeOne":
        return 1.0
    if type(expr).__name__ == "Symbol":
        if str(expr) == param_name:
            return abs_param_bound
        else:
            raise RuntimeError(f"Unexpected symbol {expr}")
    if type(expr).__name__ == "Max":
        if len(expr.args) == 2 and str(expr.args[0] == "0"):
            return estimate_upper_bound(expr.args[1], param_name, abs_param_bound)
        else:
            raise RuntimeError(f"Unexpected Max expression {expr}")
    if type(expr).__name__ == "Heaviside":
        return 1.0
    else:
        raise RuntimeError(f"Unexpected type {type(expr)}")
    # TODO better estimate of the difference of Heavisides

print(estimate_upper_bound(logit, "β", 10))
print(estimate_upper_bound(dlogit_dbeta, "β", 10))

poly_params = [sympy.Function(f"p{i}") for i in range(3)]
p0 = poly_params[0]
p1 = poly_params[1]
p2 = poly_params[2]
poly = 1 - p0(beta)*p1(beta) + 2*p1(beta)*p2(beta)
dpoly_dbeta = poly.diff(beta)
print(dpoly_dbeta)

# TODO
#  compute a pool of bounds
#  construct kappa
#  substitute ps with functions
#  get symbolic derivative
#  recursive estimation for the symbolic derivative
#    see Derivative(p_i) -> subsitute the bound
#    see just p_i -> subsitute with the bound

# if necessary, take the second derivative?

897636.966
246301.66770000002
-p0(β)*Derivative(p1(β), β) - p1(β)*Derivative(p0(β), β) + 2*p1(β)*Derivative(p2(β), β) + 2*p2(β)*Derivative(p1(β), β)
