In [None]:
from __future__ import print_function, division
from tensorflow.python.keras.layers import Input, Dense, Reshape, Flatten, Dropout, MaxPooling2D, Activation, ZeroPadding2D
from tensorflow.python.keras.layers.convolutional import UpSampling2D, Conv2D
from tensorflow.python.keras.models import Sequential, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.datasets import mnist
from sklearn.metrics import confusion_matrix, roc_curve, auc

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

In [None]:
def discriminator():
   start = Input(shape = (image_size,image_size,1))

   d1 = Conv2D(8, kernel_size=3, strides=2, input_shape=(image_size,image_size,1), padding="same")(start)
   d1 = LeakyReLU(alpha=0.2)(d1)

   d2 = Conv2D(16, kernel_size=3, strides=2, padding="same")(d1)
   d2 = ZeroPadding2D(padding=((0, 1), (0, 1)))(d2)
   d2 = LeakyReLU(alpha=0.2)(d2)

   d3 = Conv2D(32, kernel_size=3, strides=2, padding="same")(d2)
   d3 = LeakyReLU(alpha=0.2)(d3)

   d4 = Flatten()(d3)
   output = Dense(1, activation='sigmoid')(d4)

   return Model(start, output)


In [None]:
def generator(input_dim=32):
   noise = Input(shape=(input_dim,))

   g1 = Dense(7*7*32, activation="relu", input_dim=input_dim)(noise)
   g1 = Reshape((7, 7, 32))(g1)

   g2 = UpSampling2D()(g1)
   g2 = Conv2D(32, kernel_size=3, padding="same", activation="relu")(g2)

   g3 = UpSampling2D()(g2)
   g3 = Conv2D(16, kernel_size=3, padding="same", activation="relu")(g3)

   g4 = Conv2D(1, kernel_size=3, padding="same")(g3)
   img = Activation("tanh")(g4)

   return Model(noise, img)


In [None]:
def classifier():
    start = Input(shape = (image_size,image_size,1))
    
    c1 = Conv2D(8, kernel_size=3, strides=2, input_shape=(image_size,image_size,1), padding='same', activation='relu')(start)
    c1 = MaxPooling2D(pool_size=(2, 2))(c1)

    c2 = Conv2D(16, kernel_size=3, strides=2, input_shape=(image_size,image_size,1), padding='same', activation='relu')(c1)
    c2 = MaxPooling2D(pool_size=(2, 2))(c2)

    flat = Flatten()(c2)
    hidden1 = Dense(64, activation='relu')(flat)
    output = Dense(2, activation='softmax')(hidden1)

    return Model(inputs=start, outputs=output)

In [None]:
def obtain_dataset(selected_class, number_of_images):
  # load data from mnist
  (x_train, y_train), (x_test, y_test) = mnist.load_data()

  x_train = x_train[y_train==selected_class][:int(number_of_images*(1-test_size)),:,:]
  y_train = y_train[y_train==selected_class][:int(number_of_images*(1-test_size))]
  x_test = x_test[y_test==selected_class][:int(number_of_images*test_size),:,:]
  y_test = y_test[y_test==selected_class][:int(number_of_images*test_size)]
  
  # Set data shape, type and groundtruth
  x_train = x_train.astype('float32').reshape(-1, image_size, image_size, 1)
  x_train /= 255

  x_test = x_test.astype('float32').reshape(-1, image_size, image_size, 1)
  x_test /= 255

  y_train = y_train.astype('uint8').reshape((-1,1))
  y_test = y_test.astype('uint8').reshape((-1,1))

  return x_train, y_train, x_test, y_test

In [None]:
def build_and_train_classifier(epochs, batch_size):
  # build a classifier named 'model'
  model = classifier()

  # compile 'model'
  optimizer_model = Adam(0.0001, 0.5)
  model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer_model, metrics=['accuracy'])

  # train the classifier
  history = model.fit(x_train, y_train, epochs=epochs, validation_split=0.1)

  # test the classifier
  y_pred = model.predict(x_test)
  y_pred = y_pred.argmax(axis=-1)
  
  print('\nconfusion matrix:\n', confusion_matrix(y_test, y_pred))
  fpr, tpr, thresholds = roc_curve(y_test, y_pred)

  plt.plot(fpr,tpr)
  plt.xlabel('False Positive Rate')
  plt.ylabel('True Positive Rate')
  plt.title('ROC Curve')
  plt.show()

  auc_ = auc(fpr, tpr)
  return auc_

In [None]:
def build_and_train_GAN(epochs, batch_size):
  
  # define hyper-parameter for GAN
  optimizer_GEN = Adam(0.0001, 0.5)
  optimizer_DIS = Adam(0.0004, 0.5)

  # build a discriminator named 'Dis' 
  Dis = discriminator()

  # compile 'Dis'
  Dis.compile(loss='binary_crossentropy', optimizer=optimizer_DIS, metrics=['accuracy'])

  # build a generator named 'Gen' with input random_vector
  Gen = generator(random_vector)

  # Generator training route
  start = Input(shape=(random_vector,))
  fake_image = Gen(start)
  Dis.trainable = False
  decide = Dis(fake_image)
  comb_model = Model(start, decide)
  comb_model.compile(loss='binary_crossentropy', optimizer=optimizer_GEN)

  valid = np.ones((batch_size, 1))
  fake = np.zeros((batch_size, 1))

  # Use train_on_batch instead of model.fit
  for epoch in range(epochs):
      count = 0
      for time in range(x_train_0.shape[0]//batch_size):
          # Get Real&Fake sample for discriminator from dataset&generator
          noise = np.random.normal(0, 1, (batch_size, random_vector))
          gen_images = Gen.predict(noise)
          train_images = x_train_0[count:count+batch_size, :, :, :]
          # Training Discriminator
          d_loss_real = Dis.train_on_batch(train_images, valid)
          d_loss_fake = Dis.train_on_batch(gen_images, fake)
          d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
          # Training generator
          noise = np.random.normal(0, 1, (batch_size, random_vector))
          g_loss = comb_model.train_on_batch(noise, valid)

          print("%d-%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, time, d_loss[0], 100*d_loss[1], g_loss))

          count += batch_size

  return Gen

In [None]:
def create_fake_images(number_of_fake_images):
  fake_images = np.empty((0,image_size,image_size,1))
  for ni in range(number_of_fake_images):
    noise = np.random.normal(0, 1, (1,random_vector))
    temp = Gen.predict(noise)
    temp = 0.5 * temp + 0.5
    fake_images = np.append(fake_images,temp)
  
  return fake_images
    

In [None]:
# [1] Define the hyper-parameters
test_size = 0.1
image_size = 28 
random_vector = 32

In [None]:
# [2] Obtain dataset '0' and '1' from mnist
x_train_0, y_train_0, x_test_0, y_test_0 = obtain_dataset(selected_class=0, number_of_images=100)
x_train_1, y_train_1, x_test_1, y_test_1 = obtain_dataset(selected_class=1, number_of_images=1000)
print("class 0: ", x_train_0.shape, x_test_0.shape, y_train_0.shape, y_test_0.shape)
print("class 1: ", x_train_1.shape, x_test_1.shape, y_train_1.shape, y_test_1.shape)

In [None]:
# [3] Create imbalanced dataset by merging class '0' and '1'
x_train=np.concatenate((x_train_0, x_train_1))
x_test=np.concatenate((x_test_0, x_test_1))
y_train=np.concatenate((y_train_0, y_train_1))
y_test=np.concatenate((y_test_0, y_test_1))
print("merged 0 and 1 data: ", x_train.shape, x_test.shape, y_train.shape, y_test.shape)

In [None]:
# [4] Build, train, and test a classifier 
# TODO: call function build_and_train_classifier with epoch 5 and batch size 32, print the output 
# ...

In [None]:
# [5] Build and train GAN
# TODO: call function build_and_train_GAN with epoch 20 and batch size 32, save the output to a variable named 'Gen'
# ...

In [None]:
# [6] Create fake data for class '0'
number_of_fake_images = 900
# TODO: call function create_fake_images with given number of fake images, save the output to a variable named 'fake_images'
# ...
fake_images = fake_images.reshape((-1, image_size, image_size, 1))
print(fake_images.shape)

In [None]:
# [7] Merge fake data with the original data
train_num = int(number_of_fake_images*(1-test_size))
test_num = int(number_of_fake_images*test_size)
x_train = np.concatenate((x_train, fake_images[:train_num,:,:]))
x_test = np.concatenate((x_test, fake_images[train_num:,:,:]))
y_train = np.concatenate((y_train, np.zeros(train_num).reshape((-1,1))))
y_test = np.concatenate((y_test, np.zeros(test_num).reshape((-1,1))))
print("final data: ", x_train.shape, x_test.shape, y_train.shape, y_test.shape)

In [None]:
# [8] Build, train, and test a classifier
# TODO: call function build_and_train_classifier with epoch 5 and batch size 32, print the output 
# ...