In [1]:
from collections import Counter

import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.datasets import load_breast_cancer
from imblearn.datasets import make_imbalance

from sklearn.manifold import TSNE

In [2]:
import numpy as np
import tensorflow as tf

In [3]:
# Generate the dataset
# multiplier = 1.0
# desiredtotal = 100000
# nsamp = int(np.round(2*desiredtotal/(1+multiplier)))
# Xfull, yfull = make_moons(n_samples=nsamp, shuffle=True, noise=0.05, random_state=10)
Xdat, ydat = load_breast_cancer(return_X_y=True)


# def ratio_func(y, multiplier, minority_class):
#     target_stats = Counter(y)
#     return {minority_class: int(multiplier * target_stats[minority_class])}



# Xdat, ydat = make_imbalance(Xfull, yfull, sampling_strategy=ratio_func,
#                             **{"multiplier": multiplier,
#                                "minority_class": 1})


In [4]:
print(np.unique(ydat))
print(np.array([np.sum(ydat==1), np.sum(ydat==0)]))

[0 1]
[357 212]


In [5]:
alldat = np.hstack([Xdat, np.expand_dims(ydat, axis=1)])
np.random.shuffle(alldat)
print(alldat.shape)

(569, 31)


In [6]:
def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)

In [7]:
X = tf.placeholder(tf.float32, shape=[None, 31])

Dnn = 8

D_W1 = tf.Variable(xavier_init([31, Dnn]))
D_b1 = tf.Variable(tf.zeros(shape=[Dnn]))

D_W2 = tf.Variable(xavier_init([Dnn, Dnn]))
D_b2 = tf.Variable(tf.zeros(shape=[Dnn]))

D_W3 = tf.Variable(xavier_init([Dnn, 1]))
D_b3 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3]

Instructions for updating:
Colocations handled automatically by placer.


In [8]:
Z_dim = 50
Z = tf.placeholder(tf.float32, shape=[None, Z_dim])

Gnn = 8

G_W1 = tf.Variable(xavier_init([Z_dim, Gnn]))
G_b1 = tf.Variable(tf.zeros(shape=[Gnn]))

G_W2 = tf.Variable(xavier_init([Gnn, Gnn]))
G_b2 = tf.Variable(tf.zeros(shape=[Gnn]))

G_W3 = tf.Variable(xavier_init([Gnn, 31]))
G_b3 = tf.Variable(tf.zeros(shape=[31]))

theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3]

In [9]:
def sample_Z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def generator(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2)
    G_h3 = tf.matmul(G_h2, G_W3) + G_b3
    return G_h3


def discriminator(x):
    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)
    D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2)
    D_logit = tf.matmul(D_h2, D_W3) + D_b3
    D_prob = tf.nn.sigmoid(D_logit)

    return D_prob, D_logit


In [10]:
G_sample = generator(Z)
D_real, D_logit_real = discriminator(X)
D_fake, D_logit_fake = discriminator(G_sample)


In [11]:
D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_real, labels=tf.ones_like(D_logit_real)))
D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake)))
D_loss = D_loss_real + D_loss_fake
G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)))

D_solver = tf.train.AdamOptimizer(learning_rate=0.000001).minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer(learning_rate=0.000001).minimize(G_loss, var_list=theta_G)




In [None]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

mb_size = 100
adr = alldat.shape[0]

i = 0

for it in range(10000):
    eit = it % (adr // mb_size)
    si = eit * mb_size
    ei = (eit+1) * mb_size
    X_mb = alldat[si:ei, :]

    _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
    _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: sample_Z(mb_size, Z_dim)})

    if it % 1000 == 0:
        print('Iter: {}'.format(it))
        print('D loss: {:.4}'. format(D_loss_curr))
        print('G_loss: {:.4}'.format(G_loss_curr))
        print()
    
    if it % 1000 == 0:
        testraw = sess.run(G_sample, feed_dict={Z: sample_Z(1000, Z_dim)})
        test = TSNE(n_components=2).fit_transform(testraw[:,:30])
        testc = np.array(np.round(testraw[:,30]),dtype='int32')
        fig = plt.figure(figsize=(4, 4))
        plt.scatter(test[testc == 0, 0], test[testc == 0, 1], label="Class #0", alpha=0.5, s=0.01)
        plt.scatter(test[testc != 0, 0], test[testc != 0, 1], label="Class #1", alpha=0.5, s=0.01)
        plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)

        


Iter: 0
D loss: 1.013
G_loss: 0.5074

Iter: 1000
D loss: 0.9841
G_loss: 0.5399

Iter: 2000
D loss: 0.9938
G_loss: 0.5622

Iter: 3000
D loss: 0.9955
G_loss: 0.4957

Iter: 4000
D loss: 0.9976
G_loss: 0.4654

Iter: 5000
D loss: 0.9331
G_loss: 0.5292

Iter: 6000
D loss: 1.072
G_loss: 0.5373

Iter: 7000
D loss: 1.009
G_loss: 0.5317

Iter: 8000
D loss: 1.076
G_loss: 0.4991

Iter: 9000
D loss: 0.9063
G_loss: 0.5365



In [13]:
# plt.scatter(Xfull[yfull == 0, 0], Xfull[yfull == 0, 1], label="Class #0", alpha=0.5, s=0.01)
# plt.scatter(Xfull[yfull == 1, 0], Xfull[yfull == 1, 1], label="Class #1", alpha=0.5, s=0.01)
# plt.show()
# # ax.scatter(X_[y_ == 1, 0], X_[y_ == 1, 1], label="Class #1", alpha=0.5)

NameError: name 'Xfull' is not defined

In [13]:
test = sess.run(G_sample, feed_dict={Z: sample_Z(900000, Z_dim)})

In [22]:
testc = np.array(np.round(test[:,30]),dtype='int32')
print(np.unique(testc))
print(np.sum(testc==0))
print(np.sum(testc!=0)) 

[-1  0  1  2]
789132
110868


In [None]:
plt.scatter(test[testc == 0, 0], test[testc == 0, 1], label="Class #0", alpha=0.5)
plt.scatter(test[testc == 1, 0], test[testc == 1, 1], label="Class #1", alpha=0.5)
plt.show()


In [None]:
test[:,2] = testc

In [None]:
newdat = np.vstack([alldat, test[testc==1,:]])

In [None]:
newdat.shape

In [None]:
print(np.array([np.sum(newdat[:,2]==1), np.sum(newdat[:,2]==0)]))

In [None]:
plt.scatter(newdat[newdat[:,2] == 0, 0], newdat[newdat[:,2] == 0, 1], label="Class #0", alpha=0.5)
plt.scatter(newdat[newdat[:,2] == 1, 0], newdat[newdat[:,2] == 1, 1], label="Class #1", alpha=0.5)
plt.show()
