In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
import time

In [2]:
## custom-defined layers

class LocationAdd(layers.Layer):
    def __init__(self, input_dim):
        super(LocationAdd, self).__init__()
        w_init = tf.keras.initializers.GlorotNormal()
        self.w = tf.Variable(initial_value=w_init(shape=(input_dim,)), trainable=True)

    def call(self, inputs):
        return tf.add(inputs, self.w)
    
class RegressionAdd(layers.Layer):
    def __init__(self, input_dim):
        super(RegressionAdd, self).__init__()
        self.input_dim = input_dim
        w_init = tf.keras.initializers.GlorotNormal()
        self.w = tf.Variable(initial_value=w_init(shape=(input_dim-1,)), trainable=True)
    
    def call(self, inputs):
        input_X, input_z = tf.split(inputs, [self.input_dim-1, 1], axis=1)
        return tf.concat([input_X, tf.add(tf.reshape(tf.linalg.matvec(input_X, self.w),[-1,1]),input_z)], axis=1)

In [3]:
## custom-defined constraints

from tensorflow.python.keras import backend as K
from tensorflow.python.ops import math_ops
class Constraint(object):
    def __call__(self, w):
        return w
    def get_config(self):
        return {}
    
class Max1Norm(Constraint):
    """1-Norm weight constraint.
    """

    def __init__(self, max_value=2, axis=0):
        self.max_value = max_value
        self.axis = axis

    def __call__(self, w):
        norms = math_ops.reduce_sum(math_ops.abs(w), axis=self.axis, keepdims=True)
        desired = K.clip(norms, 0, self.max_value)
        return w * (desired / (K.epsilon() + norms))

    def get_config(self):
        return {'max_value': self.max_value, 'axis': self.axis}

In [88]:
class WganError(tf.keras.losses.Loss):
    def call(self, y_true, y_pred):
        #y_pred = ops.convert_to_tensor(y_pred)
        #y_true = math_ops.cast(y_true, y_pred.dtype)
        return K.mean(y_pred - y_true, axis=-1)

In [191]:
class WGAN:
    
    ''' A static model, with fixed input size.
        Model should not be defined in train(), so it should not depend on dataset dimension.
        Model API has an advantage that it can save weights between different calls of train.
        Instead of using tf.Session() as before, where only one training can happen, next will refresh,
        using Model() API avoids this, it provides a model which saves weights outside tf.Session()!
    '''
    
    def __init__(self, dim_x, target):
        self.dim_x = dim_x
        self.generator = self.generator_model(dim_x, target)
        self.discriminator = self.discriminator_model(dim_x)

    
    def generator_model(self, dim, target):
        if target == "location":
            inputs = tf.keras.Input(shape=(dim,))
            out = LocationAdd(dim)(inputs)
            model = tf.keras.Model(inputs=inputs, outputs=out)
            return model
        elif target == "cov-matrix":
            inputs = tf.keras.Input(shape=(dim,))
            out = layers.Dense(units=dim, use_bias=False)(inputs)
            model = tf.keras.Model(inputs=inputs, outputs=out)
            return model
        elif target == "regression":
            inputs = tf.keras.Input(shape=(dim,))
            out = RegressionAdd(dim)(inputs)
            model = tf.keras.Model(inputs=inputs, outputs=out)
            return model
    
    def discriminator_model(self, dim):        
        inputs = tf.keras.Input(shape=(dim,))
        dense1 = layers.Dense(units=2*dim, activation=tf.keras.activations.sigmoid, 
                              kernel_constraint=tf.keras.constraints.MaxNorm(max_value=1, axis=0))(inputs)
        dense2 = layers.Dense(units=4*dim, activation=tf.keras.activations.relu, 
                              kernel_constraint=tf.keras.constraints.MaxNorm(max_value=1, axis=0))(dense1)
        out = layers.Dense(units=1, activation=None, 
                           kernel_constraint=Max1Norm(max_value=1,axis=0))(dense2)
        model = tf.keras.Model(inputs=inputs, outputs=out)
        return model
    
    @staticmethod
    def discriminator_loss(real_output, fake_output):
        #cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        #real_loss = cross_entropy(tf.ones_like(real_output), real_output)
        #fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
        #total_loss = real_loss + fake_loss
        total_loss = WganError()(real_output, fake_output)
        return total_loss
    
    @staticmethod
    def generator_loss(fake_output):
        # cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        # loss = cross_entropy(tf.ones_like(fake_output), fake_output)
        loss = WganError()(fake_output, tf.zeros_like(fake_output))
        return loss
    
    
    def train(self, dataset, epochs, batch_size, step_size):
        self.generator_optimizer = tf.keras.optimizers.RMSprop(step_size)
        self.discriminator_optimizer = tf.keras.optimizers.RMSprop(step_size)
        for epoch in range(epochs):
            start = time.time()
            for i in range(dataset.shape[0]//batch_size):
                noise = tf.random.normal([batch_size, self.dim_x])
                with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
                    generated = self.generator(noise, training=True)
                    real_output = self.discriminator(dataset[i*batch_size:(i+1)*batch_size], training=True)
                    fake_output = self.discriminator(generated, training=True)

                    gen_loss = WGAN.generator_loss(fake_output)
                    disc_loss = WGAN.discriminator_loss(real_output, fake_output)

                    gradients_of_generator = gen_tape.gradient(gen_loss, self.generator.trainable_variables)
                    gradients_of_discriminator = disc_tape.gradient(disc_loss, self.discriminator.trainable_variables)

                    self.generator_optimizer.apply_gradients(zip(gradients_of_generator, self.generator.trainable_variables))
                    self.discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, self.discriminator.trainable_variables))
    
            print('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))
            A = self.generator.trainable_variables[0].numpy()
            # print(np.linalg.norm(self.discriminator.trainable_variables[0].numpy(), ord=1, axis=0))
            # print(np.matmul(A, A.T))   ## for cov-matrix
            print(A)  ## for location estimation and regression
            print("generator loss:", gen_loss.numpy(), "discriminator loss: ", disc_loss.numpy())

## Covariance matrix estimation

https://stackoverflow.com/questions/56201185/how-to-find-a-variable-by-name-in-tensorflow2-0

In [185]:
N = 1000
p = 4

data = np.random.normal(size=(N,p)).astype(np.float32) ## change to float32 for tensorflow
print("sample covariance: \n", np.cov(data.T))

sample covariance: 
 [[ 1.06207285  0.03006412  0.00372015 -0.04736984]
 [ 0.03006412  1.01144075 -0.06582223  0.00390109]
 [ 0.00372015 -0.06582223  1.00023798 -0.01642981]
 [-0.04736984  0.00390109 -0.01642981  0.96230879]]


In [187]:
epochs = 10
batch_size = 32
step_size = 0.01

wgan = WGAN(dim_x=p, target="cov-matrix")
wgan.train(data, epochs=epochs, batch_size=batch_size, step_size=step_size)

Time for epoch 1 is 8.711712121963501 sec
[[ 0.9999624  -0.01137087  0.29432845  0.07042147]
 [-0.01137087  0.4587177   0.24475011  0.3217195 ]
 [ 0.29432845  0.24475011  1.0532123  -0.68895334]
 [ 0.07042147  0.3217195  -0.68895334  1.2594512 ]]
generator loss: 0.006464634 discriminator loss:  -0.015455792
Time for epoch 2 is 8.898500204086304 sec
[[ 1.1726494  -0.06395453  0.2813566  -0.0968382 ]
 [-0.06395453  0.8004384   0.53677464  0.5472659 ]
 [ 0.2813566   0.53677464  1.0877877  -0.04575237]
 [-0.0968382   0.5472659  -0.04575237  1.1446836 ]]
generator loss: -0.06882776 discriminator loss:  -0.077590466
Time for epoch 3 is 9.164647102355957 sec
[[ 1.23648     0.29594547  0.08317912 -0.28725505]
 [ 0.29594547  1.2908907  -0.09132874 -0.06942075]
 [ 0.08317912 -0.09132874  1.3612378   0.3130958 ]
 [-0.28725505 -0.06942075  0.3130958   1.5620112 ]]
generator loss: 0.011176802 discriminator loss:  -0.016457537
Time for epoch 4 is 10.340165138244629 sec
[[ 1.1685554   0.21455106  0.1

In [None]:
wgan.train(data, epochs=10, batch_size=32, step_size=0.001)

## Location Estimation

In [183]:
N = 100
p = 4
theta = np.array([1,2,3,4])

data = np.random.normal(size=(N, p)) + theta
data = data.astype(np.float32)
print("sample mean: \n", np.mean(data, axis=0))

z = np.random.binomial(n=1,p=0.2,size=(N,1))
# noise = np.random.standard_cauchy(size=(N,p))
# noise = np.random.normal(2,size=(N,p))
# noise = np.random.normal(0.5, size=(N,p))
# A = np.random.uniform(size=(p,p))
# noise = np.random.normal(size=(N,p)) @ A
noise = np.random.gumbel(size=(N,p))
data_perturbed = data * (1-z) + noise * z
data_perturbed = data_perturbed.astype(np.float32)
print("noisy sample mean: \n", np.mean(data_perturbed, axis=0))

sample mean: 
 [1.085083  1.848611  2.9304116 3.8409376]
noisy sample mean: 
 [0.97268575 1.6706607  2.568898   3.3398514 ]


In [121]:
epochs = 100
batch_size = 64
step_size = 0.01

wgan = WGAN(dim_x=p, target="location")
wgan.train(data_perturbed, epochs=epochs, batch_size=batch_size, step_size=step_size)

Time for epoch 1 is 28.74838399887085 sec
[0.21047485 0.67292506 1.3881041  1.0307283 ]
generator loss: 0.24857643 discriminator loss:  -0.6197872
Time for epoch 2 is 30.155303239822388 sec
[0.73737204 0.96448165 1.4212497  1.8233352 ]
generator loss: 0.19159165 discriminator loss:  -0.45163202
Time for epoch 3 is 24.991045713424683 sec
[0.7815021 1.4306906 2.2227762 2.5838559]
generator loss: 0.13382448 discriminator loss:  -0.22719237
Time for epoch 4 is 24.098383903503418 sec
[0.9397373 1.7474004 2.8358245 3.354781 ]
generator loss: 0.12292438 discriminator loss:  -0.103685796
Time for epoch 5 is 23.920121669769287 sec
[0.55565476 1.6406938  2.337537   3.4468472 ]
generator loss: -0.34470317 discriminator loss:  -0.18769215
Time for epoch 6 is 23.45095705986023 sec
[0.7737611 1.5590043 2.340589  3.583774 ]
generator loss: -0.16158211 discriminator loss:  -0.23214094
Time for epoch 7 is 24.289983987808228 sec
[0.86226773 1.5869552  2.3822947  3.654326  ]
generator loss: -0.12482942 d

Time for epoch 57 is 22.517171144485474 sec
[0.91895235 1.7662553  2.6716306  3.5053577 ]
generator loss: -0.12530866 discriminator loss:  -0.22363658
Time for epoch 58 is 22.352845191955566 sec
[0.88844323 1.7615476  2.6376572  3.5127077 ]
generator loss: -0.070527546 discriminator loss:  -0.28027764
Time for epoch 59 is 22.756879091262817 sec
[0.9861868 1.7215385 2.6505997 3.4953706]
generator loss: -0.087481424 discriminator loss:  -0.25353926
Time for epoch 60 is 22.8980610370636 sec
[0.9345974 1.7293556 2.6621509 3.5124695]
generator loss: -0.08346833 discriminator loss:  -0.24766788
Time for epoch 61 is 22.59460973739624 sec
[0.95406777 1.7405769  2.648832   3.5138578 ]
generator loss: -0.10062018 discriminator loss:  -0.2518391
Time for epoch 62 is 22.700992107391357 sec
[0.9647227 1.7309546 2.6607928 3.4729002]
generator loss: -0.11362153 discriminator loss:  -0.2361522
Time for epoch 63 is 21.88555908203125 sec
[0.85777   1.7608651 2.6544778 3.455934 ]
generator loss: -0.08724

In [117]:
wgan.train(data_perturbed, epochs=epochs//2, batch_size=batch_size, step_size=step_size/2)

Time for epoch 1 is 0.9796490669250488 sec
[0.77604896 1.5157262  2.1762435  3.2488031 ]
generator loss: -0.3662386 discriminator loss:  -0.2911882
Time for epoch 2 is 0.9468278884887695 sec
[0.8036269 1.5173982 2.201513  3.273699 ]
generator loss: -0.21841952 discriminator loss:  -0.3735035
Time for epoch 3 is 0.9065437316894531 sec
[0.825439  1.4986236 2.2198682 3.2916005]
generator loss: -0.19563393 discriminator loss:  -0.39427704
Time for epoch 4 is 0.9309597015380859 sec
[0.84795   1.4758803 2.2386518 3.3100014]
generator loss: -0.29642737 discriminator loss:  -0.2552811
Time for epoch 5 is 0.9330430030822754 sec
[0.8671304 1.4601793 2.2544427 3.3258483]
generator loss: -0.17755479 discriminator loss:  -0.37688398
Time for epoch 6 is 0.8796870708465576 sec
[0.8862762 1.4557648 2.270294  3.3419034]
generator loss: -0.2164856 discriminator loss:  -0.34871703
Time for epoch 7 is 0.894524097442627 sec
[0.9046401 1.4650825 2.285781  3.357752 ]
generator loss: -0.2714462 discriminator 

In [122]:
print(np.linalg.norm(np.mean(data_perturbed, axis=0)-theta)**2)
print(np.linalg.norm(wgan.generator.trainable_variables[0].numpy()-theta)**2)

4.02552350725706
0.5323310450902631


## Regression

In [188]:
N = 1000
p = 4
sigma = 1
beta = np.array([1,2,3,4], dtype=np.float32)

data_X = np.random.normal(size=(N,p)).astype(np.float32)
data_y = data_X @ beta + np.random.normal(scale=sigma, size=(N,)).astype(np.float32)
data_y = data_y.reshape([-1,1])
data_reg = np.concatenate([data_X, data_y], axis=1)

In [189]:
betahat = np.linalg.solve(data_X.T@data_X, (data_X.T@data_y))
print("least square estimate: \n", betahat)

least square estimate: 
 [[0.918088 ]
 [1.9790355]
 [3.0042918]
 [3.978514 ]]


In [192]:
epochs = 10
batch_size = 32
step_size = 0.01

wgan = WGAN(dim_x=p+1, target="regression")
wgan.train(data_reg, epochs=epochs, batch_size=batch_size, step_size=step_size)

Time for epoch 1 is 9.458981037139893 sec
[ 0.8137293   0.45727253 -1.116679   -0.67006314]
generator loss: -0.05943303 discriminator loss:  -0.22450735
Time for epoch 2 is 9.441058874130249 sec
[ 1.0455842  0.7334564 -1.3095939 -0.5707018]
generator loss: -0.011014687 discriminator loss:  -0.2832605
Time for epoch 3 is 9.305871963500977 sec
[ 1.2688947   0.9833873  -1.432649   -0.35507995]
generator loss: 0.07638909 discriminator loss:  -0.2896044
Time for epoch 4 is 9.490269899368286 sec
[ 1.5105995   1.2216096  -1.4756104  -0.10068965]
generator loss: 0.15273888 discriminator loss:  -0.38662213
Time for epoch 5 is 9.741095304489136 sec
[ 1.7713228   1.4890993  -1.4523885   0.11344511]
generator loss: 0.18510675 discriminator loss:  -0.4062869
Time for epoch 6 is 10.159914016723633 sec
[ 2.0671592   1.7392192  -1.3528737   0.36751267]
generator loss: 0.2995221 discriminator loss:  -0.43001193
Time for epoch 7 is 9.508867979049683 sec
[ 2.3325555  2.0300498 -1.1113856  0.6464563]
gene

In [None]:
wgan.train(data, epochs=10, batch_size=32, step_size=0.001)

## Miscellaneous

In [None]:
# correct test version of model with self defined layers
# def build_model():
#     a = tf.keras.Input(shape=(4,))
#     out = LocationAdd(input_dim=4)(a+5)
#     model = tf.keras.Model(inputs=a, outputs=out)
#     return model
# model = build_model()
# model2 = build_model()
# print(model.trainable_variables)
# print(model2.trainable_variables)
# model.compile(optimizer='rmsprop', loss=tf.keras.losses.MeanSquaredError())
# model.fit(x=data,y=data, batch_size=1, epochs=100)
# print(model.trainable_variables)
# print(model2.trainable_variables)

## tf.keras.layers.add can make variables not trainable, below is not correct
# a = tf.keras.Input(shape=(4,))
# b = tf.Variable(initial_value=tf.random_normal_initializer()(shape=(4,)), trainable=True)
# out = tf.keras.layers.add([a+5,b])