In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
import time
from collections import defaultdict
import itertools

In [2]:
## custom-defined layers

class LocationAdd(layers.Layer):
    def __init__(self, input_dim):
        super(LocationAdd, self).__init__()
        w_init = tf.keras.initializers.GlorotNormal()
        self.w = tf.Variable(initial_value=w_init(shape=(input_dim,)), trainable=True)

    def call(self, inputs):
        return tf.add(inputs, self.w)
    
class RegressionAdd(layers.Layer):
    def __init__(self, input_dim):
        super(RegressionAdd, self).__init__()
        self.input_dim = input_dim
        w_init = tf.keras.initializers.GlorotNormal()
        self.w = tf.Variable(initial_value=w_init(shape=(input_dim-1,)), trainable=True)
    
    def call(self, inputs):
        input_X, input_z = tf.split(inputs, [self.input_dim-1, 1], axis=1)
        return tf.concat([input_X, tf.add(tf.reshape(tf.linalg.matvec(input_X, self.w),[-1,1]),input_z)], axis=1)

In [3]:
## custom-defined constraints

from tensorflow.python.keras import backend as K
from tensorflow.python.ops import math_ops
class Constraint(object):
    def __call__(self, w):
        return w
    def get_config(self):
        return {}
    
class Max1Norm(Constraint):
    """1-Norm weight constraint.
    """

    def __init__(self, max_value=2, axis=0):
        self.max_value = max_value
        self.axis = axis

    def __call__(self, w):
        norms = math_ops.reduce_sum(math_ops.abs(w), axis=self.axis, keepdims=True)
        desired = K.clip(norms, 0, self.max_value)
        return w * (desired / (K.epsilon() + norms))

    def get_config(self):
        return {'max_value': self.max_value, 'axis': self.axis}

In [4]:
class WganError(tf.keras.losses.Loss):
    def call(self, y_true, y_pred):
        #y_pred = ops.convert_to_tensor(y_pred)
        #y_true = math_ops.cast(y_true, y_pred.dtype)
        return K.mean(y_pred - y_true, axis=-1)

In [5]:
class WGAN:
    
    ''' A static model, with fixed input size.
        Model should not be defined in train(), so it should not depend on dataset dimension.
        Model API has an advantage that it can save weights between different calls of train.
        Instead of using tf.Session() as before, where only one training can happen, next will refresh,
        using Model() API avoids this, it provides a model which saves weights outside tf.Session()!
    '''
    
    def __init__(self, dim_x, target):
        self.dim_x = dim_x
        self.target = target
        self.generator = self.generator_model(dim_x, target)
        self.discriminator = self.discriminator_model(dim_x)

    
    def generator_model(self, dim, target):
        if target == "location":
            inputs = tf.keras.Input(shape=(dim,))
            out = LocationAdd(dim)(inputs)
            model = tf.keras.Model(inputs=inputs, outputs=out)
            return model
        elif target == "cov-matrix":
            inputs = tf.keras.Input(shape=(dim,))
            out = layers.Dense(units=dim, use_bias=False)(inputs)
            model = tf.keras.Model(inputs=inputs, outputs=out)
            return model
        elif target == "regression":
            inputs = tf.keras.Input(shape=(dim,))
            out = RegressionAdd(dim)(inputs)
            model = tf.keras.Model(inputs=inputs, outputs=out)
            return model
    
    def discriminator_model(self, dim):        
        inputs = tf.keras.Input(shape=(dim,))
        dense1 = layers.Dense(units=dim//2, activation=tf.keras.activations.sigmoid, 
                              kernel_constraint=tf.keras.constraints.MaxNorm(max_value=10, axis=0))(inputs)
        dense2 = layers.Dense(units=dim//4, activation=tf.keras.activations.relu, 
                              kernel_constraint=Max1Norm(max_value=1, axis=0))(dense1)
        out = layers.Dense(units=1, activation=None, 
                           kernel_constraint=Max1Norm(max_value=1,axis=0))(dense2)
        model = tf.keras.Model(inputs=inputs, outputs=out)
        return model
    
    @staticmethod
    def discriminator_loss(real_output, fake_output):
        #cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        #real_loss = cross_entropy(tf.ones_like(real_output), real_output)
        #fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output)
        #total_loss = real_loss + fake_loss
        total_loss = WganError()(real_output, fake_output)
        return total_loss
    
    @staticmethod
    def generator_loss(fake_output):
        # cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True)
        # loss = cross_entropy(tf.ones_like(fake_output), fake_output)
        loss = WganError()(fake_output, tf.zeros_like(fake_output))
        return loss
    
    
    def train(self, dataset, epochs, batch_size, step_size):
        self.generator_optimizer = tf.keras.optimizers.RMSprop(step_size)
        self.discriminator_optimizer = tf.keras.optimizers.RMSprop(step_size)
        data = tf.data.Dataset.from_tensor_slices(dataset).repeat().batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
        for epoch in range(epochs):
            noises = tf.data.Dataset.from_tensor_slices(tf.random.normal(dataset.shape)).batch(batch_size).take(dataset.shape[0]//batch_size).prefetch(tf.data.experimental.AUTOTUNE)
            for batch, noise in zip(data, noises):
                with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
                    generated = self.generator(noise, training=True)
                    real_output = self.discriminator(batch, training=True)
                    fake_output = self.discriminator(generated, training=True)

                    gen_loss = WGAN.generator_loss(fake_output)
                    disc_loss = WGAN.discriminator_loss(real_output, fake_output)

                    gradients_of_generator = gen_tape.gradient(gen_loss, self.generator.trainable_variables)
                    gradients_of_discriminator = disc_tape.gradient(disc_loss, self.discriminator.trainable_variables)

                    self.generator_optimizer.apply_gradients(zip(gradients_of_generator, self.generator.trainable_variables))
                    self.discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator, self.discriminator.trainable_variables))

#             print('Time for epoch {} is {} sec'.format(epoch + 1, time.time()-start))
#             A = self.generator.trainable_variables[0].numpy()
#             if self.target == "location" or self.target == "regression":
#                 print(A)
#             elif self.target == "cov-matrix":
#                 sigma_hat = np.matmul(A, A.T)
#                 print(sigma_hat)
#                 print(np.linalg.norm(sigma_hat-np.identity(self.dim_x), ord=2))
            if epoch >= epochs - 10:
                print("generator loss: {:.4f}, discriminator loss: {:.4f}".format(gen_loss.numpy(),disc_loss.numpy()))
#             print(np.linalg.norm(self.discriminator.trainable_variables[0].numpy(), ord=1, axis=0))

## Covariance matrix estimation

https://stackoverflow.com/questions/56201185/how-to-find-a-variable-by-name-in-tensorflow2-0

In [36]:
def simulate_cov(N, p, model):        
    if N <= 100:
        batch_size = 32 
    elif N <= 5000:
        batch_size = 128
    else:
        batch_size = 256
    epochs = 200
    step_size = 0.005
    
    wgan = WGAN(dim_x=p, target="cov-matrix")
    wgan.train(data_perturbed, epochs=epochs, batch_size=batch_size, step_size=step_size)
    
    Ahat = wgan.generator.trainable_variables[0].numpy()
    wgan_error = np.linalg.norm(Ahat.T@Ahat-cov, ord=2)
    return wgan_error

In [39]:
Ns = [100]
ps = [10, 20, 40]
models = ["Cauchy", "Normal", "Gumbel"]
res_sample_cov = dict()
res_wgan_cov = defaultdict(list)
# outfile = open("cov_est.txt", 'w')

for N, p, model in itertools.product(Ns, ps, models):
    # A = np.random.uniform(size=(p,p))
    A = np.eye(p)
    cov = A.T @ A
    data = np.random.normal(size=(N, p)) @ A
    z = np.random.binomial(n=1,p=0.1,size=(N,1))
    if model == "Cauchy":
        noise = np.random.standard_cauchy(size=(N,p))
    elif model == "Normal":
        noise = np.random.normal(5,size=(N,p))
    elif model == "Gumbel":
        noise = np.random.gumbel(size=(N,p)) * 5
    data_perturbed = data * (1-z) + noise * z
    data_perturbed = data_perturbed.astype(np.float32)
    
    t = time.time()
    print("N={}, p={}, model={} in progress.".format(N, p, model))

    sample_2norm_error = np.linalg.norm(np.cov(data_perturbed.T)-cov, ord=2)
    res_sample_cov[(N,p,model)] = sample_2norm_error
    print("sample cov error: {:.4f}".format(sample_2norm_error))
    
    for i in range(10):
        wgan_error = simulate_cov(N, p, model)
        res_wgan_cov[(N,p,model)].append(wgan_error)
        print("wgan error: {:.4f}".format(wgan_error))
        
    key = (N, p, model)
    print(key, "{:.4f}".format(res_sample_cov[key]))
    print(key, "{:.4f}".format(np.mean(res_wgan_cov[key])), "{:.4f}".format(np.std(res_wgan_cov[key])))
    print("N={}, p={}, model={} in done.({:.2f} minutes)".format(N, p, model, (time.time()-t)/60))
#     outfile.write("N={}, p={}, model={} samp: {}\n".format(N, p, model, res_sample_cov[(N,p,model)]))
#     outfile.write("N={}, p={}, model={} wgan: {}\n".format(N, p, model, res_wgan_cov[(N,p,model)]))
#     outfile.flush()

N=100, p=10, model=Cauchy in progress.
sample cov error: 989.1726
generator loss: 0.1043, discriminator loss: -0.2424
generator loss: 0.0238, discriminator loss: -0.1643
generator loss: 0.0445, discriminator loss: -0.1857
generator loss: 0.0468, discriminator loss: -0.1893
generator loss: 0.0074, discriminator loss: -0.1529
generator loss: -0.0131, discriminator loss: -0.1336
generator loss: 0.1717, discriminator loss: -0.3180
generator loss: 0.0341, discriminator loss: -0.1805
generator loss: 0.0409, discriminator loss: -0.1885
generator loss: 0.0080, discriminator loss: -0.1595
wgan error: 2.6623
generator loss: -0.4117, discriminator loss: -0.2360
generator loss: -0.6338, discriminator loss: -0.0143
generator loss: -0.4676, discriminator loss: -0.1813
generator loss: -0.5412, discriminator loss: -0.1085
generator loss: -0.5042, discriminator loss: -0.1462
generator loss: -0.5400, discriminator loss: -0.1103
generator loss: -0.3422, discriminator loss: -0.3081
generator loss: -0.4567

generator loss: -0.5892, discriminator loss: -0.0465
wgan error: 2.6485
generator loss: -0.5887, discriminator loss: 0.0011
generator loss: -0.4768, discriminator loss: -0.1116
generator loss: -0.5526, discriminator loss: -0.0368
generator loss: -0.5571, discriminator loss: -0.0334
generator loss: -0.3998, discriminator loss: -0.1920
generator loss: -0.5181, discriminator loss: -0.0841
generator loss: -0.4483, discriminator loss: -0.1544
generator loss: -0.5526, discriminator loss: -0.0512
generator loss: -0.4346, discriminator loss: -0.1696
generator loss: -0.4578, discriminator loss: -0.1468
wgan error: 4.4542
generator loss: 0.0371, discriminator loss: -0.1483
generator loss: 0.0136, discriminator loss: -0.1225
generator loss: 0.0019, discriminator loss: -0.1134
generator loss: 0.0263, discriminator loss: -0.1370
generator loss: 0.0508, discriminator loss: -0.1631
generator loss: -0.0131, discriminator loss: -0.0964
generator loss: -0.0282, discriminator loss: -0.0801
generator loss

generator loss: -0.5004, discriminator loss: -0.1254
generator loss: -0.4883, discriminator loss: -0.1360
generator loss: -0.4659, discriminator loss: -0.1606
generator loss: -0.5537, discriminator loss: -0.0741
wgan error: 2.0250
(100, 10, 'Gumbel') 28.0165
(100, 10, 'Gumbel') 3.0510 0.6039
N=100, p=10, model=Gumbel in done.(1.87 minutes)
N=100, p=20, model=Cauchy in progress.
sample cov error: 341.9525
generator loss: -0.4290, discriminator loss: -0.1291
generator loss: -0.3253, discriminator loss: -0.2285
generator loss: -0.4429, discriminator loss: -0.1000
generator loss: -0.4089, discriminator loss: -0.1251
generator loss: -0.3499, discriminator loss: -0.1792
generator loss: -0.4245, discriminator loss: -0.1048
generator loss: -0.3996, discriminator loss: -0.1260
generator loss: -0.3190, discriminator loss: -0.2050
generator loss: -0.3593, discriminator loss: -0.1679
generator loss: -0.3864, discriminator loss: -0.1345
wgan error: 4.2819
generator loss: -0.1915, discriminator loss

generator loss: -0.3134, discriminator loss: -0.2674
generator loss: -0.3177, discriminator loss: -0.2625
generator loss: -0.3058, discriminator loss: -0.2721
generator loss: -0.4186, discriminator loss: -0.1608
generator loss: -0.3327, discriminator loss: -0.2458
generator loss: -0.3945, discriminator loss: -0.1821
generator loss: -0.3483, discriminator loss: -0.2300
generator loss: -0.2419, discriminator loss: -0.3350
generator loss: -0.3226, discriminator loss: -0.2530
generator loss: -0.2960, discriminator loss: -0.2803
wgan error: 3.6862
generator loss: -0.3700, discriminator loss: -0.2172
generator loss: -0.4600, discriminator loss: -0.1270
generator loss: -0.3685, discriminator loss: -0.2165
generator loss: -0.3414, discriminator loss: -0.2449
generator loss: -0.5236, discriminator loss: -0.0624
generator loss: -0.4549, discriminator loss: -0.1335
generator loss: -0.3328, discriminator loss: -0.2548
generator loss: -0.3239, discriminator loss: -0.2620
generator loss: -0.3561, di

generator loss: -0.1030, discriminator loss: -0.1713
generator loss: 0.1027, discriminator loss: -0.3746
wgan error: 4.2796
generator loss: 0.0316, discriminator loss: -0.2881
generator loss: 0.0906, discriminator loss: -0.3470
generator loss: 0.0578, discriminator loss: -0.3138
generator loss: 0.0476, discriminator loss: -0.3021
generator loss: -0.0994, discriminator loss: -0.1586
generator loss: -0.0090, discriminator loss: -0.2522
generator loss: -0.1147, discriminator loss: -0.1447
generator loss: -0.0194, discriminator loss: -0.2366
generator loss: 0.0437, discriminator loss: -0.2989
generator loss: 0.0642, discriminator loss: -0.3191
wgan error: 4.6759
(100, 20, 'Gumbel') 43.0277
(100, 20, 'Gumbel') 4.4205 0.6598
N=100, p=20, model=Gumbel in done.(3.58 minutes)
N=100, p=40, model=Cauchy in progress.
sample cov error: 1992.2081
generator loss: -0.3383, discriminator loss: -0.3270
generator loss: -0.3670, discriminator loss: -0.2989
generator loss: -0.4054, discriminator loss: -0.2

generator loss: -0.3007, discriminator loss: -0.2051
generator loss: -0.2225, discriminator loss: -0.2848
generator loss: -0.1202, discriminator loss: -0.3884
generator loss: -0.1474, discriminator loss: -0.3604
generator loss: -0.1563, discriminator loss: -0.3508
generator loss: -0.1875, discriminator loss: -0.3209
generator loss: -0.2645, discriminator loss: -0.2461
generator loss: -0.1391, discriminator loss: -0.3723
wgan error: 6.8349
generator loss: 0.4968, discriminator loss: -0.3007
generator loss: 0.5292, discriminator loss: -0.3325
generator loss: 0.5146, discriminator loss: -0.3186
generator loss: 0.4119, discriminator loss: -0.2142
generator loss: 0.5706, discriminator loss: -0.3729
generator loss: 0.5258, discriminator loss: -0.3274
generator loss: 0.5068, discriminator loss: -0.3075
generator loss: 0.6295, discriminator loss: -0.4327
generator loss: 0.6602, discriminator loss: -0.4651
generator loss: 0.4985, discriminator loss: -0.3060
wgan error: 4.5639
generator loss: 0.

generator loss: 0.0347, discriminator loss: -0.2160
generator loss: -0.0084, discriminator loss: -0.1735
generator loss: 0.2470, discriminator loss: -0.4299
generator loss: 0.0573, discriminator loss: -0.2434
generator loss: 0.0892, discriminator loss: -0.2749
generator loss: -0.0885, discriminator loss: -0.0981
generator loss: 0.0642, discriminator loss: -0.2522
generator loss: 0.1772, discriminator loss: -0.3655
generator loss: 0.1010, discriminator loss: -0.2894
generator loss: 0.0721, discriminator loss: -0.2608
wgan error: 6.5970
generator loss: -0.0071, discriminator loss: -0.3116
generator loss: 0.0855, discriminator loss: -0.4036
generator loss: -0.0510, discriminator loss: -0.2661
generator loss: -0.0185, discriminator loss: -0.2981
generator loss: 0.0854, discriminator loss: -0.4036
generator loss: 0.0182, discriminator loss: -0.3403
generator loss: -0.0524, discriminator loss: -0.2700
generator loss: -0.1510, discriminator loss: -0.1732
generator loss: 0.0364, discriminator 

In [8]:
# res_sample_cov = defaultdict(list)
# res_wgan_cov = defaultdict(list)
# simulate_cov(100, 10, "Cauchy")
# print(res_sample_cov)
# print(res_wgan_cov)

In [9]:
# N = 1000
# p = 4

# A = np.random.uniform(size=(p,p))
# cov = A.T @ A

# data = np.random.normal(size=(N,p)) @ A
# noise = np.random.standard_cauchy(size=(N,p))
# #noise = np.random.gumbel(size=(N,p)) * 5
# z = np.random.binomial(n=1,p=0.1,size=(N,1))
# data_perturbed = data * (1-z) + noise * z
# data_perturbed = data_perturbed.astype(np.float32)
# print("sample covariance: \n", np.cov(data_perturbed.T))
# print("true covariance: \n", cov)

In [10]:
# epochs = 100
# batch_size = 32
# step_size = 0.01

# wgan = WGAN(dim_x=p, target="cov-matrix")
# wgan.train(data_perturbed, epochs=epochs, batch_size=batch_size, step_size=step_size)

In [11]:
# print("2-norm loss, samp: ", np.linalg.norm(np.cov(data_perturbed.T)-cov, ord=2))
# Ahat = wgan.generator.trainable_variables[0].numpy()
# print("2-norm loss, wgan: ", np.linalg.norm(Ahat.T@Ahat - cov, ord=2))
# print("cov hat: \n", A.T @ A)

## Location Estimation

In [6]:
def simulate_loc(data_perturbed, N, p, model):    
    if N <= 100:
        batch_size = 32 
    elif N <= 5000:
        batch_size = 128
    else:
        batch_size = 256
    epochs = 200
    step_size = 0.005
    
    wgan = WGAN(dim_x=p, target="location")
    wgan.train(data_perturbed, epochs=epochs, batch_size=batch_size, step_size=step_size)
    
    wgan_error = np.linalg.norm(wgan.generator.trainable_variables[0].numpy()-theta)**2
    return wgan_error

In [None]:
Ns = [1024]
models = ["Normal","Gumbel"]
ps = [10, 20, 40, 80]
res_sample_mean = dict()
res_wgan_mean = defaultdict(list)
# outfile = open("location_est_new.txt", 'w')

for N, model, p in itertools.product(Ns, models, ps):
    np.random.seed(5)
    # theta = np.repeat(np.array([1,2,3,4,5]), p//5)
    theta = np.zeros(shape=(p,))
    data = np.random.normal(size=(N, p)) + theta
    # print("sample mean: \n", np.mean(data, axis=0))
    z = np.random.binomial(n=1,p=0.1,size=(N,1))
    if model == "Cauchy":
        noise = np.random.standard_cauchy(size=(N,p))
    elif model == "Normal":
        noise = np.random.normal(2,size=(N,p))
    elif model == "Gumbel":
        noise = np.random.gumbel(size=(N,p))
    data_perturbed = data * (1-z) + noise * z
    data_perturbed = data_perturbed.astype(np.float32)
    # print("noisy sample mean: \n", np.mean(data_perturbed, axis=0))
    
    t = time.time()
    print("N={}, p={}, model={} in progress.".format(N, p, model))
    
    sample_mean_error = np.round(np.linalg.norm(np.mean(data_perturbed, axis=0)-theta)**2, 4)
    res_sample_mean[(N,p,model)] = sample_mean_error
    print("sample mean error: {:.4f}".format(sample_mean_error))
    
    for i in range(10):
        wgan_error = simulate_loc(data_perturbed, N, p, model)
        res_wgan_mean[(N,p,model)].append(wgan_error)
        print("wgan error: {:.4f}".format(wgan_error))
    
    key = (N, p, model)
    print(key, res_sample_mean[key])
    print(key, np.mean(res_wgan_mean[key]), np.std(res_wgan_mean[key]))
    print("N={}, p={}, model={} in done.({:.2f} minutes)".format(N, p, model, (time.time()-t)/60))
#     outfile.write("N={}, p={}, model={} samp: {}\n".format(N, p, model, res_sample_mean[(N,p,model)]))
#     outfile.write("N={}, p={}, model={} wgan: {}\n".format(N, p, model, res_wgan_mean[(N,p,model)]))
#     outfile.flush()

In [14]:
# res_sample_mean = defaultdict(list)
# res_wgan_mean = defaultdict(list)
# simulate_loc(100, 80, "Gumbel")

## Regression

In [6]:
def simulate_reg(data_reg, N, p, model):    
    if N <= 100:
        batch_size = 32 
    elif N <= 5000:
        batch_size = 128
    else:
        batch_size = 512
    epochs = 200
    step_size = 0.005

    wgan = WGAN(dim_x=p+1, target="regression")
    wgan.train(data_reg, epochs=epochs, batch_size=batch_size, step_size=step_size)
    
    betahat_wgan = wgan.generator.trainable_variables[0].numpy()
    wgan_error = np.linalg.norm(betahat_wgan - beta)
    return wgan_error

In [18]:
Ns = [4096]
ps = [10]
models = ["Cauchy"]
epsilons = [0.05]
res_ols_reg = dict()
res_wgan_reg = defaultdict(list)
# outfile = open("regression_est.txt", 'w')

p = 10
N = 4096
np.random.seed(4)
beta = np.repeat(np.array([-1,-1,0,1,1]), p//5) * 0.05
data_X = np.random.normal(size=(N,p))
for p, N, model, epsilon in itertools.product(ps, Ns, models, epsilons):
    
    z = np.random.binomial(n=1,p=epsilon,size=(N,))
    if model == "Cauchy":
        noise = (1-z)*np.random.normal(scale=1, size=(N,)) + z*(np.abs(np.random.standard_cauchy(size=(N,))))
    elif model == "Normal":
        noise = (1-z)*np.random.normal(scale=1, size=(N,)) + z*(np.abs(np.random.normal(loc=2, scale=5, size=(N,))))
    elif model == "Gumbel":
        noise = (1-z)*np.random.normal(scale=1, size=(N,)) + z*(np.abs(np.random.gumbel(size=(N,))*10))
    data_y = data_X @ beta + noise
    data_y = data_y.reshape([-1,1])
    data_reg = np.concatenate([data_X, data_y], axis=1)
    data_reg = data_reg.astype(np.float32)

    t = time.time()
    print("N={}, p={}, model={} in progress.".format(N, p, model))

    betahat = np.linalg.solve(data_X.T@data_X, (data_X.T@data_y)).reshape(-1)
    ols_error = np.linalg.norm(betahat - beta)
    res_ols_reg[(N,p,model)] = ols_error
    print("ols_error: {:.4f}".format(ols_error))

    for i in range(10):
        print("repeat: ", i+1)
        wgan_error = simulate_reg(data_reg, N, p, model)
        res_wgan_reg[(N,p,model)].append(wgan_error)
        print("wgan error: {:.4f}".format(wgan_error))
    
    key = (N,p,model)
    print(key, "{:.4f}".format(res_ols_reg[key]))
    print(key, "{:.4f}".format(np.mean(res_wgan_reg[key])), "{:.4f}".format(np.std(res_wgan_reg[key])))
    print(key, res_wgan_reg[key])
    print("N={}, p={}, model={} in done.({:.2f} minutes)".format(N, p, model, (time.time()-t)/60))
    # outfile.write("N={}, p={}, model={} samp: {}\n".format(N, p, model, res_ols_reg[(N,p,model)]))
    # outfile.write("N={}, p={}, model={} wgan: {}\n".format(N, p, model, res_wgan_reg[(N,p,model)]))
    # outfile.flush()

N=4096, p=10, model=Cauchy in progress.
ols_error: 0.1404
repeat:  1
generator loss: 0.4922, discriminator loss: -0.0593
generator loss: 0.5399, discriminator loss: -0.0919
generator loss: 0.4916, discriminator loss: -0.0478
generator loss: 0.5103, discriminator loss: -0.0650
generator loss: 0.5716, discriminator loss: -0.1209
generator loss: 0.4737, discriminator loss: -0.0269
generator loss: 0.4914, discriminator loss: -0.0398
generator loss: 0.5367, discriminator loss: -0.0826
generator loss: 0.4319, discriminator loss: 0.0114
generator loss: 0.4955, discriminator loss: -0.0461
wgan error: 0.4268
repeat:  2
generator loss: 0.5349, discriminator loss: -0.0719
generator loss: 0.5290, discriminator loss: -0.0748
generator loss: 0.5797, discriminator loss: -0.1104
generator loss: 0.5383, discriminator loss: -0.0722
generator loss: 0.5552, discriminator loss: -0.0812
generator loss: 0.5615, discriminator loss: -0.0929
generator loss: 0.5494, discriminator loss: -0.0787
generator loss: 0.

In [14]:
Ns = [4096]
ps = [40]
models = ["Cauchy"]
epsilons = [0.10, 0.20, 0.50]
res_ols_reg = dict()
res_wgan_reg = defaultdict(list)
# outfile = open("regression_est.txt", 'w')

p = 40
N = 4096
np.random.seed(5)
beta = np.repeat(np.array([-1,-1,0,1,1]), p//5) * 0.05
data_X = np.random.normal(size=(N,p))
for p, N, model, epsilon in itertools.product(ps, Ns, models, epsilons):
    
    z = np.random.binomial(n=1,p=epsilon,size=(N,))
    if model == "Cauchy":
        noise = (1-z)*np.random.normal(scale=1, size=(N,)) + z*(np.abs(np.random.standard_cauchy(size=(N,))))
    elif model == "Normal":
        noise = (1-z)*np.random.normal(scale=1, size=(N,)) + z*(np.abs(np.random.normal(loc=2, scale=5, size=(N,))))
    elif model == "Gumbel":
        noise = (1-z)*np.random.normal(scale=1, size=(N,)) + z*(np.abs(np.random.gumbel(size=(N,))*10))
    data_y = data_X @ beta + noise
    data_y = data_y.reshape([-1,1])
    data_reg = np.concatenate([data_X, data_y], axis=1)
    data_reg = data_reg.astype(np.float32)

    t = time.time()
    print("N={}, p={}, model={} in progress.".format(N, p, model))

    betahat = np.linalg.solve(data_X.T@data_X, (data_X.T@data_y)).reshape(-1)
    ols_error = np.linalg.norm(betahat - beta)
    res_ols_reg[(N,p,model)] = ols_error
    print("ols_error: {:.4f}".format(ols_error))

    for i in range(10):
        print("repeat: ", i+1)
        wgan_error = simulate_reg(data_reg, N, p, model)
        res_wgan_reg[(N,p,model)].append(wgan_error)
        print("wgan error: {:.4f}".format(wgan_error))
    
    key = (N,p,model)
    print(key, "{:.4f}".format(res_ols_reg[key]))
    print(key, "{:.4f}".format(np.mean(res_wgan_reg[key])), "{:.4f}".format(np.std(res_wgan_reg[key])))
    print(key, res_wgan_reg[key])
    print("N={}, p={}, model={} in done.({:.2f} minutes)".format(N, p, model, (time.time()-t)/60))
    # outfile.write("N={}, p={}, model={} samp: {}\n".format(N, p, model, res_ols_reg[(N,p,model)]))
    # outfile.write("N={}, p={}, model={} wgan: {}\n".format(N, p, model, res_wgan_reg[(N,p,model)]))
    # outfile.flush()

N=4096, p=40, model=Cauchy in progress.
ols_error: 0.4137
repeat:  1
generator loss: -0.1441, discriminator loss: -0.0956
generator loss: -0.2061, discriminator loss: -0.0314
generator loss: -0.1837, discriminator loss: -0.0526
generator loss: -0.1466, discriminator loss: -0.0928
generator loss: -0.2158, discriminator loss: -0.0269
generator loss: -0.1940, discriminator loss: -0.0432
generator loss: -0.1091, discriminator loss: -0.1268
generator loss: -0.1941, discriminator loss: -0.0394
generator loss: -0.1470, discriminator loss: -0.0854
generator loss: -0.2514, discriminator loss: 0.0157
wgan error: 0.6472
repeat:  2
generator loss: -0.2018, discriminator loss: -0.0755
generator loss: -0.2733, discriminator loss: -0.0042
generator loss: -0.2171, discriminator loss: -0.0630
generator loss: -0.2109, discriminator loss: -0.0674
generator loss: -0.2226, discriminator loss: -0.0519
generator loss: -0.2059, discriminator loss: -0.0725
generator loss: -0.2240, discriminator loss: -0.0527
g

generator loss: 0.0503, discriminator loss: -0.0453
generator loss: 0.0782, discriminator loss: -0.0694
generator loss: 0.2106, discriminator loss: -0.2046
generator loss: 0.1138, discriminator loss: -0.1114
generator loss: 0.1504, discriminator loss: -0.1444
generator loss: 0.1476, discriminator loss: -0.1428
generator loss: 0.1489, discriminator loss: -0.1448
generator loss: 0.0994, discriminator loss: -0.0943
generator loss: 0.1697, discriminator loss: -0.1642
generator loss: 0.2127, discriminator loss: -0.2021
wgan error: 0.7671
repeat:  6
generator loss: 0.2269, discriminator loss: -0.1376
generator loss: 0.2892, discriminator loss: -0.1929
generator loss: 0.2129, discriminator loss: -0.1106
generator loss: 0.2978, discriminator loss: -0.1979
generator loss: 0.2746, discriminator loss: -0.1753
generator loss: 0.2639, discriminator loss: -0.1622
generator loss: 0.1884, discriminator loss: -0.0875
generator loss: 0.2345, discriminator loss: -0.1370
generator loss: 0.2241, discrimina

generator loss: 0.0439, discriminator loss: -0.2875
generator loss: -0.0751, discriminator loss: -0.1686
generator loss: -0.0388, discriminator loss: -0.2057
wgan error: 0.7644
repeat:  9
generator loss: 0.0847, discriminator loss: -0.2275
generator loss: 0.0494, discriminator loss: -0.1918
generator loss: 0.0828, discriminator loss: -0.2253
generator loss: 0.0405, discriminator loss: -0.1825
generator loss: 0.1692, discriminator loss: -0.3101
generator loss: 0.0962, discriminator loss: -0.2375
generator loss: 0.1176, discriminator loss: -0.2598
generator loss: 0.1079, discriminator loss: -0.2506
generator loss: 0.0528, discriminator loss: -0.1949
generator loss: 0.0226, discriminator loss: -0.1629
wgan error: 0.7064
repeat:  10
generator loss: -0.1882, discriminator loss: -0.2809
generator loss: -0.1918, discriminator loss: -0.2783
generator loss: -0.3056, discriminator loss: -0.1652
generator loss: -0.2197, discriminator loss: -0.2506
generator loss: -0.2627, discriminator loss: -0.2

In [18]:
# betahat = np.linalg.solve(data_X.T@data_X, (data_X.T@data_y)).reshape(-1)
# print("least square estimate: \n", betahat)

In [19]:
# epochs = 200
# batch_size = 64
# step_size = 0.005

# wgan = WGAN(dim_x=p+1, target="regression")
# wgan.train(data_reg, epochs=epochs, batch_size=batch_size, step_size=step_size)

In [20]:
# print("OLS error: ", np.round(np.linalg.norm(betahat - beta), 4))
# betahat_wgan = wgan.generator.trainable_variables[0].numpy()
# print("wgan error: ", np.round(np.linalg.norm(betahat_wgan - beta), 4))

## Miscellaneous

In [21]:
# correct test version of model with self defined layers
# def build_model():
#     a = tf.keras.Input(shape=(4,))
#     out = LocationAdd(input_dim=4)(a+5)
#     model = tf.keras.Model(inputs=a, outputs=out)
#     return model
# model = build_model()
# model2 = build_model()
# print(model.trainable_variables)
# print(model2.trainable_variables)
# model.compile(optimizer='rmsprop', loss=tf.keras.losses.MeanSquaredError())
# model.fit(x=data,y=data, batch_size=1, epochs=100)
# print(model.trainable_variables)
# print(model2.trainable_variables)

## tf.keras.layers.add can make variables not trainable, below is not correct
# a = tf.keras.Input(shape=(4,))
# b = tf.Variable(initial_value=tf.random_normal_initializer()(shape=(4,)), trainable=True)
# out = tf.keras.layers.add([a+5,b])