# SGAN(Semi-supervised GAN) : 준지도학습 기반의 GAN
* DCGAN 등의 모델과 다르게, 실제 데이터셋 중 레이블링 된 일부 데이터와 레이블링되지 않은 나머지 데이터, Generator가 생성한 데이터 3가지를 입력으로 받고
* Discriminator는 해당 모델이 진짜/가짜일 확률(sigmoid)과 진짜라면 어떤 클래스에 해당되는지(softmax)에 대해 학습

In [1]:
import numpy as np 
import matplotlib.pyplot as plt
from tensorflow.keras import backend as K
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import (Activation, BatchNormalization, Concatenate, Dense, Dropout, Flatten, Input, Lambda, Reshape, LeakyReLU, 
                                    Conv2D, Conv2DTranspose)
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

In [2]:
# Dataset : Train 이미지 5만개 중 num_labeled 개만 이미지를 사용하고 레이블 없는 샘플 배치 만들때는 나머지 (5만-num_labeled)개 사용

class Dataset:

  def __init__(self, num_labeled):
    self.num_labeled = num_labeled

    (self.X_train, self.y_train), (self.X_test, self.y_test) = mnist.load_data()

    def preprocess_img(x):
      '''
      image pixel 값 정규화 및 채널 추가
      '''   
      x = (x.astype(np.float32) - 127.5) / 127.5
      x = np.expand_dims(x,axis=3)
      return x
    
    def preprocess_labels(y):
      return y.reshape(-1,1)

    self.X_train = preprocess_img(self.X_train)
    self.y_train = preprocess_labels(self.y_train)
    self.X_test = preprocess_img(self.X_test)
    self.y_test = preprocess_labels(self.y_test)


  def batch_labeled(self, batch_size):
    '''
    Label이 붙은 이미지 데이터를 배치로 추출
    1에서 num_labeled 사이의 랜덤한 정수 추출
    batch_size개 만큼 추출될 것임
    '''
    idx = np.random.randint(0, self.num_labeled, batch_size)
    imgs = self.X_train[idx]
    labels = self.y_train[idx]
    return imgs, labels
  
  def batch_unlabeled(self, batch_size):
    '''
    Label이 붙지 않은 이미지 데이터를 배치로 추출
    num_labeled에서 5만개 사이의 랜덤한 정수 추출
    batch_size개 만큼 추출될 것임
    '''
    idx = np.random.randint(self.num_labeled, self.X_train.shape[0], batch_size)
    imgs = self.X_train[idx]
    return imgs
  
  def training_set(self):
    '''
    레이블이 붙은 train데이터셋 구분
    '''

    X_train = self.X_train[range(self.num_labeled)]
    y_train = self.y_train[range(self.num_labeled)]
    return X_train, y_train
  
  def test_set(self):
    return self.X_test, self.y_test

In [3]:
# 훈련에 쓸 레이블 붙은 Train데이터 수 : 100
num_labeled = 100 
dataset = Dataset(num_labeled)

In [4]:
img_rows = 28
img_cols = 28
channels = 1
img_shape = (img_rows, img_cols, channels)
z_dim = 100
num_classes = 10


# Generator - DCGAN모델과 동일(Conv2DTranspose층으로 입력벡터를 28*28*1 크기로 생성)
def build_generator(z_dim):

  model = Sequential()
  model.add(Dense(256*7*7, input_dim=z_dim))
  model.add(Reshape((7,7,256)))    # (7,7,256)
  model.add(Conv2DTranspose(128, kernel_size=3, strides=2, padding='same'))  # (14,14,128)
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha=0.01))

  model.add(Conv2DTranspose(64, kernel_size=3, strides=1, padding='same'))  # (14,14,64)
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha=0.01))

  model.add(Conv2DTranspose(1, kernel_size=3, strides=2, padding='same'))  # (28, 28, 1)
  model.add(Activation('tanh'))

  return model

# Discriminator - DCGAN모델과 거의 유사하나 Flatten()층 앞에 Dropout층이 있다는 차이가 있음
def build_discriminator(img_shape):

  model = Sequential()
  model.add(Conv2D(32, kernel_size=3, strides=2, input_shape=img_shape,
                   padding='same')) # (28,28,1) --> (14,14,32)
  model.add(LeakyReLU(alpha=0.01))

  model.add(Conv2D(64, kernel_size=3, strides=2, input_shape=img_shape, padding='same')) # (32,32,64)
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha=0.01))
  

  model.add(Conv2D(128, kernel_size=3, strides=2, input_shape=img_shape, padding='same'))
  model.add(BatchNormalization())
  model.add(LeakyReLU(alpha=0.01))
  
  model.add(Dropout(0.5))
  model.add(Flatten())
  model.add(Dense(num_classes))

  return model


# 위 Discriminator 모델을 받아 지도학습용 판별자 & 비지도학습용 판별자 생성

# 1) discriminator for supervised - 다중 분류를 수행하는 판별자
def build_discriminator_supervised(discriminator):

  model = Sequential()
  model.add(discriminator)
  model.add(Activation('softmax'))  # 진짜 클래스에 대한 예측 확률을 출력하는 Softmax 활성화 함수
  return model

# 2) discriminator for unsupervised - 이진 분류를 수행하는 판별자
def build_discriminator_unsupervised(discriminator):

  model = Sequential()
  model.add(discriminator)
  
  def predict(x):
    '''
    모델에서 온 10개 뉴런의 출력을 진짜/가짜의 이진 예측으로 변환
    '''
    prediction = 1.0 - (1.0 / K.sum(K.exp(x), axis=-1, keepdims=True) + 1.0)

    return prediction
  
  model.add(Lambda(predict))

  return model


# SGAN 모델 구성
def build_sgan(generator, discriminator):

  model = Sequential()
  model.add(generator)
  model.add(discriminator)

  return model

discriminator = build_discriminator(img_shape)

discriminator_supervised = build_discriminator_supervised(discriminator)
discriminator_supervised.compile(loss='categorical_crossentropy', metrics=['accuracy'],
                                 optimizer=Adam(learning_rate=0.0003))

discriminator_unsupervised = build_discriminator_unsupervised(discriminator)
discriminator_unsupervised.compile(loss='binary_crossentropy',
                                   optimizer=Adam())

generator = build_generator(z_dim)

discriminator_unsupervised.trainable = False   # 생성자 훈련을 위해 판별자의 모델 파라미터 freeze

gan = build_sgan(generator, discriminator_unsupervised)  # discriminator로 discriminator_unsupervised 사용
gan.compile(loss='binary_crossentropy', optimizer=Adam())

In [5]:
# SGAN Training

supervised_losses = []
iteration_checkpoints = []

def train(iterations, batch_size, sample_interval):

  real = np.ones((batch_size, 1))   # 진짜 이미지의 레이블 --> 1
  fake = np.zeros((batch_size, 1))  # 가짜 이미지의 레이블 --> 0

  for iteration in range(iterations):

    imgs, labels = dataset.batch_labeled(batch_size)   # 레이블 붙은 샘플배치를 가져옴

    labels = to_categorical(labels, num_classes=num_classes)  # 레이블을 one-hot encoding
 
    imgs_unlabeled = dataset.batch_unlabeled(batch_size)  # 레이블 안붙은 샘플배치를 가져옴

    z = np.random.normal(0, 1, (batch_size, z_dim))  # 가짜 이미지의 배치를 생성
    gen_imgs = generator.predict(z)

    d_loss_supervised, accuracy = discriminator_supervised.train_on_batch(imgs, labels) # label이 붙은 진짜 샘플에서 훈련

    d_loss_real = discriminator_unsupervised.train_on_batch(imgs_unlabeled, real) # label이 없는 진짜 샘플에서 훈련

    d_loss_fake = discriminator_unsupervised.train_on_batch(gen_imgs, fake)   # 가짜 샘플에서 훈련함

    d_loss_unsupervised = 0.5 * np.add(d_loss_real, d_loss_fake)

    z = np.random.normal(0,1,(batch_size, z_dim))   # 가짜 이미지의 배치를 생성함
    gen_imgs = generator.predict(z)

    g_loss = gan.train_on_batch(z, np.ones((batch_size, 1))) # 생성자 훈련

    if (iteration + 1) % sample_interval == 0:

      supervised_losses.append(d_loss_supervised)
      iteration_checkpoints.append(iteration + 1)
      
      print(
          "%d [D loss supervised: %.4f, acc.: %.2f%%] [D loss unsupervised: %.4f] [G loss: %f]"
          % (iteration + 1, d_loss_supervised, 100 * accuracy,
              d_loss_unsupervised, g_loss))

In [6]:
iterations = 8000
batch_size = 32
sample_interval = 800

train(iterations, batch_size, sample_interval)

800 [D loss supervised: 0.0017, acc.: 100.00%] [D loss unsupervised: 7.7125] [G loss: 15.424949]
1600 [D loss supervised: 0.0004, acc.: 100.00%] [D loss unsupervised: 7.7125] [G loss: 15.424949]
2400 [D loss supervised: 0.0004, acc.: 100.00%] [D loss unsupervised: 7.7125] [G loss: 15.424949]
3200 [D loss supervised: 0.0001, acc.: 100.00%] [D loss unsupervised: 7.7125] [G loss: 15.424949]
4000 [D loss supervised: 0.0001, acc.: 100.00%] [D loss unsupervised: 7.7125] [G loss: 15.424949]
4800 [D loss supervised: 0.0000, acc.: 100.00%] [D loss unsupervised: 7.7125] [G loss: 15.424949]
5600 [D loss supervised: 0.0000, acc.: 100.00%] [D loss unsupervised: 7.7125] [G loss: 15.424949]
6400 [D loss supervised: 0.0000, acc.: 100.00%] [D loss unsupervised: 7.7125] [G loss: 15.424949]
7200 [D loss supervised: 0.0000, acc.: 100.00%] [D loss unsupervised: 7.7125] [G loss: 15.424949]
8000 [D loss supervised: 0.0000, acc.: 100.00%] [D loss unsupervised: 7.7125] [G loss: 15.424949]


In [7]:
# test dataset에 대한 discriminator_supervised의 정확도 계산
x, y = dataset.test_set()
y = to_categorical(y, num_classes=num_classes)

_, accuracy = discriminator_supervised.evaluate(x, y)
print(np.round(100 * accuracy, 2))

68.29


In [8]:
# 샘플이미지 말고 모든 이미지를 학습한 discriminator_supervised와의 비교

imgs, labels = dataset.training_set()
labels = to_categorical(labels, num_classes=num_classes)

fully_supervised_classifier = build_discriminator_supervised(build_discriminator(img_shape))
fully_supervised_classifier.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer=Adam())

training = fully_supervised_classifier.fit(imgs, labels, batch_size=32,
                                          epochs=30, verbose=1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [9]:
x, y = dataset.test_set()
y = to_categorical(y, num_classes=num_classes)

# Compute classification accuracy on the test set
_, accuracy = fully_supervised_classifier.evaluate(x, y)
print("Test Accuracy: %.2f%%" % (100 * accuracy))

Test Accuracy: 40.55%
