In [1]:
import tensorflow as tf
import tensorflow_probability as tfp
import tensorflow_addons as tfa
import numpy as np 
from shared import weighting

In [2]:
params = {'sigma': 5.0, 'balanced_weights': "True"}
embedding = np.float32(np.random.normal(0, 1, (10, 3)))
main_labels = np.float32(np.random.binomial(1, 0.5, (10, 1)))
auxiliary_labels = np.float32(np.random.binomial(1, 0.5, (10, 1)))


In [3]:
_, weights_pos, weights_neg = weighting.get_weights(main_labels,
			auxiliary_labels, balanced=params['balanced_weights'])


In [4]:
weights_pos.shape

TensorShape([10, 1])

In [5]:
main_label_pos = tf.reduce_mean(main_labels) * main_labels

In [9]:
tf.math.divide_no_nan(main_label_pos, weights_pos)

<tf.Tensor: shape=(10, 1), dtype=float32, numpy=
array([[0.  ],
       [0.  ],
       [0.  ],
       [0.  ],
       [0.  ],
       [0.56],
       [0.56],
       [0.  ],
       [0.56],
       [0.56]], dtype=float32)>

In [109]:

def mmd_loss_weighted(embedding, main_labels, auxiliary_labels, params, dummy):
	r"""Computes MMD loss between embeddings of groups defined by label.

	Maximum Mean Discrepancy (MMD) is an integrated probability metric.
	It measures the distance between two probability distributions. In this
	setting, we measure (and penalize) the distance between the probability
	distributions of the embeddings of group 0 (where auxiliary_labels ==0), and
	the emeddings of group 1 (where auxiliary_labels ==1). The specific equation
	for computing the MMD is:

	MMD^2(P, Q)= || \E{\phi_sigma(x)} - \E{\phi_sigma(y)} ||^2
						= \E{ K_sigma(x, x) } + \E{ K_sigma(y, y) } - 2 \E{ K_sigma(x, y)},

	where K_sigma = <\phi_sigma(x), \phi_sigma(y)>,is a kernel function,
	in this case a radial basis kernel, with bandwidth sigma.

	For our main approach, we penalize the mmd_loss (encourage the distance to be
	small i.e., encourage the two distributions to be close, which is roughly
	equivalent to an adversarial setting: by forcing MMD to be small, an
	adversary cannot distinguish between the two groups solely based on the
	embedding. This also implies that cross-prediction (predicting
	auxiliary_labels using embedding) is penalized

	Args:
		embedding: tensor with learned embedding
		main_labels: tensor with main labels
		auxiliary_labels: tensor with label defining 2 groups
		sigma: scalar, bandwidth for kernel used to compute MMD
	Returns:
		MMD between embeddings of the two groups defined by label
	"""
	sigma = params['sigma']

	kernel_mat = tfa.losses.metric_learning.pairwise_distance(embedding,
		squared=True)
	kernel_mat = tf.math.exp((- kernel_mat) / 2* sigma**2)
	if dummy:
		weights_pos = np.float32(np.ones((10,1)))
		weights_neg = np.float32(np.ones((10,1)))

	else:
		_, weights_pos, weights_neg = weighting.get_weights(main_labels,
			auxiliary_labels, balanced=params['balanced_weights'])

	if len(auxiliary_labels.shape) == 1:
		auxiliary_labels = tf.expand_dims(auxiliary_labels, axis=-1)

	pos_mask = tf.matmul(auxiliary_labels, tf.transpose(auxiliary_labels))
	neg_mask = tf.matmul(1.0 - auxiliary_labels,
		tf.transpose(1.0 - auxiliary_labels))
	pos_neg_mask = tf.matmul(auxiliary_labels,
		tf.transpose(1.0 - auxiliary_labels))
	neg_pos_mask = tf.matmul((1.0 - auxiliary_labels),
		tf.transpose(auxiliary_labels))

	pos_kernel_mean = kernel_mat * pos_mask
	pos_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(pos_kernel_mean, axis=1),
		tf.reduce_sum(pos_mask, axis=1))
	pos_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(pos_kernel_mean * weights_pos),
		tf.reduce_sum(weights_pos) * 2* sigma**2)

	neg_kernel_mean = kernel_mat * neg_mask
	neg_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(neg_kernel_mean, axis=1),
		tf.reduce_sum(neg_mask, axis=1))
	neg_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(neg_kernel_mean * weights_neg),
		tf.reduce_sum(weights_neg) * 2* sigma**2)

	neg_pos_kernel_mean = kernel_mat * neg_pos_mask
	neg_pos_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(neg_pos_kernel_mean, axis=1),
		tf.reduce_sum(neg_pos_mask, axis=1))
	neg_pos_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(neg_pos_kernel_mean * weights_neg),
		tf.reduce_sum(weights_neg) * 2* sigma**2)

	pos_neg_kernel_mean = kernel_mat * pos_neg_mask
	pos_neg_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(pos_neg_kernel_mean, axis=1),
		tf.reduce_sum(pos_neg_mask, axis=1))
	pos_neg_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(pos_neg_kernel_mean * weights_pos),
		tf.reduce_sum(weights_pos) * 2* sigma**2)

	mmd_val = pos_kernel_mean + neg_kernel_mean - (
		pos_neg_kernel_mean + neg_pos_kernel_mean)
	mmd_val = tf.maximum(0.0, mmd_val)

	return mmd_val, pos_kernel_mean, neg_kernel_mean, neg_pos_kernel_mean, pos_neg_kernel_mean



In [110]:

def mmd_loss_unweighted(embedding, auxiliary_labels, params):
	r"""Computes MMD loss between embeddings of groups defined by label.

	Maximum Mean Discrepancy (MMD) is an integrated probability metric.
	It measures the distance between two probability distributions. In this
	setting, we measure (and penalize) the distance between the probability
	distributions of the embeddings of group 0 (where auxiliary_labels ==0), and
	the emeddings of group 1 (where auxiliary_labels ==1). The specific equation
	for computing the MMD is:

	MMD^2(P, Q)= || \E{\phi_sigma(x)} - \E{\phi_sigma(y)} ||^2
						= \E{ K_sigma(x, x) } + \E{ K_sigma(y, y) } - 2 \E{ K_sigma(x, y)},

	where K_sigma = <\phi_sigma(x), \phi_sigma(y)>,is a kernel function,
	in this case a radial basis kernel, with bandwidth sigma.

	For our main approach, we penalize the mmd_loss (encourage the distance to be
	small i.e., encourage the two distributions to be close, which is roughly
	equivalent to an adversarial setting: by forcing MMD to be small, an
	adversary cannot distinguish between the two groups solely based on the
	embedding. This also implies that cross-prediction (predicting
	auxiliary_labels using embedding) is penalized.

	Args:
		embedding: tensor with learned embedding
		auxiliary_labels: tensor with label defining 2 groups
		sigma: scalar, bandwidth for kernel used to compute MMD
	Returns:
		MMD between embeddings of the two groups defined by label
	"""
	sigma = params['sigma']
	del params

	kernel = tfp.math.psd_kernels.ExponentiatedQuadratic(
		amplitude=1.0, length_scale=sigma)

	kernel_mat = kernel.matrix(embedding, embedding)

	if len(auxiliary_labels.shape) == 1:
		auxiliary_labels = tf.expand_dims(auxiliary_labels, axis=-1)

	pos_mask = tf.matmul(auxiliary_labels, tf.transpose(auxiliary_labels))
	neg_mask = tf.matmul(1.0 - auxiliary_labels,
		tf.transpose(1.0 - auxiliary_labels))
	pos_neg_mask = tf.matmul(auxiliary_labels,
		tf.transpose(1.0 - auxiliary_labels))

	pos_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(pos_mask * kernel_mat), tf.reduce_sum(pos_mask))
	neg_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(neg_mask * kernel_mat), tf.reduce_sum(neg_mask))
	pos_neg_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(pos_neg_mask * kernel_mat), tf.reduce_sum(pos_neg_mask))

	mmd_val = pos_kernel_mean + neg_kernel_mean - 2 * pos_neg_kernel_mean
	mmd_val = tf.maximum(0.0, mmd_val)

	return mmd_val, pos_kernel_mean, neg_kernel_mean, pos_neg_kernel_mean, pos_neg_kernel_mean



In [341]:
def mmd_loss_weighted2(embedding, main_labels, auxiliary_labels, params, dummy):
	r"""Computes MMD loss between embeddings of groups defined by label.

	Maximum Mean Discrepancy (MMD) is an integrated probability metric.
	It measures the distance between two probability distributions. In this
	setting, we measure (and penalize) the distance between the probability
	distributions of the embeddings of group 0 (where auxiliary_labels ==0), and
	the emeddings of group 1 (where auxiliary_labels ==1). The specific equation
	for computing the MMD is:

	MMD^2(P, Q)= || \E{\phi_sigma(x)} - \E{\phi_sigma(y)} ||^2
						= \E{ K_sigma(x, x) } + \E{ K_sigma(y, y) } - 2 \E{ K_sigma(x, y)},

	where K_sigma = <\phi_sigma(x), \phi_sigma(y)>,is a kernel function,
	in this case a radial basis kernel, with bandwidth sigma.

	For our main approach, we penalize the mmd_loss (encourage the distance to be
	small i.e., encourage the two distributions to be close, which is roughly
	equivalent to an adversarial setting: by forcing MMD to be small, an
	adversary cannot distinguish between the two groups solely based on the
	embedding. This also implies that cross-prediction (predicting
	auxiliary_labels using embedding) is penalized.

	Args:
		embedding: tensor with learned embedding
		auxiliary_labels: tensor with label defining 2 groups
		sigma: scalar, bandwidth for kernel used to compute MMD
	Returns:
		MMD between embeddings of the two groups defined by label
	"""
	sigma = params['sigma']
	kernel = tfp.math.psd_kernels.ExponentiatedQuadratic(
		amplitude=1.0, length_scale=sigma)

	kernel_mat = kernel.matrix(embedding, embedding)

	if len(auxiliary_labels.shape) == 1:
		auxiliary_labels = tf.expand_dims(auxiliary_labels, axis=-1)
	if dummy:
		weights_pos = auxiliary_labels
		weights_neg = 1.0 - auxiliary_labels

	else:
		_, weights_pos, weights_neg = weighting.get_weights(main_labels,
			auxiliary_labels, balanced=params['balanced_weights'])
        
	pos_mask = tf.matmul(auxiliary_labels, tf.transpose(auxiliary_labels))
	neg_mask = tf.matmul(1.0 - auxiliary_labels,
		tf.transpose(1.0 - auxiliary_labels))
	pos_neg_mask = tf.matmul(auxiliary_labels,
		tf.transpose(1.0 - auxiliary_labels))
	neg_pos_mask = tf.matmul((1.0 - auxiliary_labels),
		tf.transpose(auxiliary_labels))

	pos_kernel_mean = kernel_mat * pos_mask
	pos_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(pos_kernel_mean, axis=1),
		tf.reduce_sum(pos_mask, axis=1))
	pos_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(pos_kernel_mean * tf.squeeze(weights_pos)),
		tf.reduce_sum(weights_pos))
    
	neg_kernel_mean = kernel_mat * neg_mask
	neg_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(neg_kernel_mean, axis=1),
		tf.reduce_sum(neg_mask, axis=1))
	neg_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(neg_kernel_mean * tf.squeeze(weights_neg)),
		tf.reduce_sum(weights_neg))

	neg_pos_kernel_mean = kernel_mat * neg_pos_mask
	neg_pos_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(neg_pos_kernel_mean, axis=1),
		tf.reduce_sum(neg_pos_mask, axis=1))
	neg_pos_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(neg_pos_kernel_mean * tf.squeeze(weights_neg)),
		tf.reduce_sum(weights_neg))

	pos_neg_kernel_mean = kernel_mat * pos_neg_mask
	pos_neg_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(pos_neg_kernel_mean, axis=1),
		tf.reduce_sum(pos_neg_mask, axis=1))
	pos_neg_kernel_mean = tf.math.divide_no_nan(
		tf.reduce_sum(pos_neg_kernel_mean * tf.squeeze(weights_pos)),
		tf.reduce_sum(weights_pos))

	mmd_val = pos_kernel_mean + neg_kernel_mean - (
		pos_neg_kernel_mean + neg_pos_kernel_mean)
	mmd_val = tf.maximum(0.0, mmd_val)

	return mmd_val, pos_kernel_mean, neg_kernel_mean, pos_neg_kernel_mean, pos_neg_kernel_mean


In [347]:
uw_mmd = mmd_loss_unweighted(embedding, auxiliary_labels, params)
# wd_mmd = mmd_loss_weighted(embedding, main_labels, auxiliary_labels, params, True)
# wnd_mmd = mmd_loss_weighted(embedding, main_labels, auxiliary_labels, params, False)
w2d_mmd = mmd_loss_weighted2(embedding, main_labels, auxiliary_labels, params, True)
w2nd_mmd = mmd_loss_weighted2(embedding, main_labels, auxiliary_labels, params, False)



In [348]:
uw_mmd

(<tf.Tensor: shape=(), dtype=float32, numpy=0.045209646>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.8775899>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.9357638>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.884072>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.884072>)

In [349]:
# wd_mmd

In [350]:
# wnd_mmd

In [351]:
w2d_mmd

(<tf.Tensor: shape=(), dtype=float32, numpy=0.045209646>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.8775899>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.9357638>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.884072>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.884072>)

In [352]:
w2nd_mmd

(<tf.Tensor: shape=(), dtype=float32, numpy=0.045209765>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.8775899>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.9357638>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.884072>,
 <tf.Tensor: shape=(), dtype=float32, numpy=0.884072>)

In [353]:
_, weights_pos, weights_neg = weighting.get_weights(main_labels,
	auxiliary_labels, balanced=params['balanced_weights'])

In [290]:
5.2655387/0.8775899

5.99999920236092

In [78]:
weights_neg

<tf.Tensor: shape=(10, 1), dtype=float32, numpy=
array([[1.3333334],
       [0.       ],
       [0.       ],
       [0.       ],
       [0.       ],
       [1.3333334],
       [0.       ],
       [0.       ],
       [4.       ],
       [1.3333334]], dtype=float32)>

In [79]:
tf.reduce_sum(weights_pos)

<tf.Tensor: shape=(), dtype=float32, numpy=12.0>

In [235]:
x = tf.constant([[3, 5], [7, 11]], dtype=tf.int32)
a = tf.constant([[4, 8]], dtype=tf.int32)
y = a * x

In [236]:
y

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[12, 40],
       [28, 88]], dtype=int32)>

In [291]:
auxiliary_labels

array([[1.],
       [1.],
       [1.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [0.]], dtype=float32)

In [292]:
tf_auxiliary_labels = tf.expand_dims(auxiliary_labels, axis=-1)

In [297]:
(tf_auxiliary_labels * tf.transpose(tf_auxiliary_labels) * kernel_mat).shape

TensorShape([10, 10, 10])