<a href="https://colab.research.google.com/github/YianKim/2022_uncertainty_aware_semisupervise/blob/main/Keras_UPS_TS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
from tensorflow import keras
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import clone_model

import PIL
from PIL import Image

import pickle
import random
from tqdm import tqdm
from collections import Counter

from keras.layers.core import Lambda
from keras import backend as K

from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import BatchNormalization
from keras.regularizers import l2
from keras.layers import Activation
from keras.layers import Dropout
from keras.layers import MaxPooling2D, AveragePooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Reshape
from keras import optimizers
from keras.callbacks import *
from sklearn.metrics import *
from keras.models import load_model

from torchvision import transforms

import tensorflow as tf
import tensorflow.keras.backend as backend
import math
import gc

# cifar10 dataset

In [None]:
cifar10 = keras.datasets.cifar10 
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
train_images = train_images/255
test_images = test_images/255

In [None]:
def dummy_labels(labels):
  zero_labels = np.zeros([labels.shape[0], 10], np.int8)  
  for i in range(labels.shape[0]):
    zero_labels[i][labels[i]] = 1
  return(zero_labels)

In [None]:
train_labels = dummy_labels(train_labels)
test_labels = dummy_labels(test_labels)

In [None]:
# 1000 labeled, 49000 unlabeled
indx = random.sample(range(train_labels.shape[0]),train_labels.shape[0])

lbl_train_images = train_images[indx[:1000]]
ubl_train_images = train_images[indx[1000:]]

lbl_train_labels = train_labels[indx[:1000]]
ubl_train_labels = train_labels[indx[1000:]]

# valids1 =  train_images[indx[800:1000]]
# valids2 =  train_labels[indx[800:1000]]

# pseudo labeling

### Augmentation


In [None]:
def basic_augmentation(imagearray):
  image = Image.fromarray(imagearray)
  tr1 = transforms.RandomHorizontalFlip()
  tr2 = transforms.RandomRotation(10)
  tr3 = transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2)
  tr4 = transforms.RandomAffine(0, shear=10, scale=(0.8, 1.2))
  image = tr1(tr2(tr3(tr4(image))))
  return(np.array(image))

def makeaugs(n, input):
  augs = []
  for j in range(n):
    for i in input:
      augs.append(basic_augmentation(np.array(i*255, np.uint8)))
  return(np.array(augs)/255)

### Mixup Augmentation


In [None]:
def sample_beta_distribution(size, concentration_0=0.3, concentration_1=0.3):
    gamma_1_sample = tf.random.gamma(shape=[size], alpha=concentration_1)
    gamma_2_sample = tf.random.gamma(shape=[size], alpha=concentration_0)
    return gamma_1_sample / (gamma_1_sample + gamma_2_sample)

def mixup (size, data, alpha = 0.2):
  image, label = data
  L = sample_beta_distribution(size, alpha, alpha)
  XL = tf.reshape(L, (size, 1, 1, 1))
  YL = tf.reshape(L, (size, 1))
  IND1 = np.random.choice(len(label), size)
  IND2 = np.random.choice(len(label), size)
  newimage = XL*image[IND1] + (1-XL)*image[IND2]
  newlabel = YL*label[IND1] + (1-YL)*label[IND2]
  return (newimage, newlabel)

### 스케줄러

In [None]:
class SGDR(Callback):

    def __init__(self, min_lr=0.0, max_lr=0.03, base_epochs=20, mul_epochs=2):
        super(SGDR, self).__init__()

        self.min_lr = min_lr
        self.max_lr = max_lr
        self.base_epochs = base_epochs
        self.mul_epochs = mul_epochs

        self.cycles = 0.
        self.cycle_iterations = 0.
        self.trn_iterations = 0.

        self._reset()

    def _reset(self, new_min_lr=None, new_max_lr=None,
               new_base_epochs=None, new_mul_epochs=None):
        """Resets cycle iterations."""
        
        if new_min_lr != None:
            self.min_lr = new_min_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_base_epochs != None:
            self.base_epochs = new_base_epochs
        if new_mul_epochs != None:
            self.mul_epochs = new_mul_epochs
        self.cycles = 0.
        self.cycle_iterations = 0.
        
    def sgdr(self):
        
        cycle_epochs = self.base_epochs * (self.mul_epochs ** self.cycles)
        tide = ((self.cycles == 0) * 1) * (self.cycle_iterations*self.max_lr + (self.base_epochs - self.cycle_iterations)*self.min_lr) / self.base_epochs + ((self.cycles != 0) * 1)*(self.min_lr + 0.5 * (self.max_lr - self.min_lr) * (1 + np.cos(np.pi * (self.cycle_iterations + 1) / cycle_epochs)))
        return tide
        
    def on_train_begin(self, logs=None):
        
        if self.cycle_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.max_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.sgdr())
            
    def on_epoch_end(self, epoch, logs=None):
        
        logs = logs or {}
        logs['lr'] = K.get_value(self.model.optimizer.lr)
        
        self.trn_iterations += 1
        self.cycle_iterations += 1
        if self.cycle_iterations >= self.base_epochs * (self.mul_epochs ** self.cycles):
            self.cycles += 1
            self.cycle_iterations = 0
            K.set_value(self.model.optimizer.lr, self.max_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.sgdr())

### main

In [None]:
def PermaDropout(rate):
    return Lambda(lambda x: K.dropout(x, level=rate))

In [None]:
def create_cnn_1(n):
  inputlayer = keras.Input(shape=(32, 32, 3))

  conv1a = Conv2D(128*n, (3,3), padding = 'same')
  bn1a = BatchNormalization()
  conv1b = Conv2D(128*n, (3,3), padding = 'same')
  bn1b = BatchNormalization()
  conv1c = Conv2D(128*n, (3,3), padding = 'same')
  bn1c = BatchNormalization()
  pl1 = MaxPooling2D(2, 2)
  MCdrop1 = PermaDropout(0.3)

  conv2a = Conv2D(256*n, (3,3), padding = 'same')
  bn2a = BatchNormalization()
  conv2b = Conv2D(256*n, (3,3), padding = 'same')
  bn2b = BatchNormalization()
  conv2c = Conv2D(256*n, (3,3), padding = 'same')
  bn2c = BatchNormalization()
  pl2 = MaxPooling2D(2, 2)
  MCdrop2 = PermaDropout(0.3)

  conv3a = Conv2D(512*n, (3,3))
  bn3a = BatchNormalization()
  conv3b = Conv2D(256*n, (1,1))
  bn3b = BatchNormalization()
  conv3c = Conv2D(128*n, (1,1))
  bn3c = BatchNormalization()
  pl3 = AveragePooling2D(6, 2)

  fc = Dense(10)
  activ = keras.layers.LeakyReLU(0.1)

  model = Sequential([
                  inputlayer, 
                  conv1a, bn1a, activ,
                  conv1b, bn1b, activ,
                  conv1c, bn1c, activ,
                  pl1, MCdrop1,

                  conv2a, bn2a, activ,
                  conv2b, bn2b, activ,
                  conv2c, bn2c, activ,
                  pl2, MCdrop2,

                  conv3a, bn3a, activ,
                  conv3b, bn3b, activ,
                  conv3c, bn3c, activ,
                  pl3, Flatten(),

                  fc
                  ])

  opt = keras.optimizers.SGD(0.03, momentum = 0.9)

  model.compile(
    optimizer = opt,
    loss=keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
  )

  return model

In [None]:
def create_cnn_2(n):
  inputlayer = keras.Input(shape=(32, 32, 3))

  conv1a = Conv2D(128*n, (3,3), padding = 'same')
  bn1a = BatchNormalization()
  conv1b = Conv2D(128*n, (3,3), padding = 'same')
  bn1b = BatchNormalization()
  pl1 = MaxPooling2D(2, 2)
  MCdrop1 = PermaDropout(0.3)

  conv2a = Conv2D(256*n, (3,3), padding = 'same')
  bn2a = BatchNormalization()
  conv2b = Conv2D(256*n, (3,3), padding = 'same')
  bn2b = BatchNormalization()
  pl2 = MaxPooling2D(2, 2)
  MCdrop2 = PermaDropout(0.3)

  conv3a = Conv2D(512*n, (3,3))
  bn3a = BatchNormalization()
  conv3b = Conv2D(256*n, (1,1))
  bn3b = BatchNormalization()
  pl3 = AveragePooling2D(6, 2)

  fc = Dense(10)
  activ = keras.layers.LeakyReLU(0.1)

  model = Sequential([
                  inputlayer, 
                  conv1a, bn1a, activ,
                  conv1b, bn1b, activ,
                  pl1, MCdrop1,

                  conv2a, bn2a, activ,
                  conv2b, bn2b, activ,
                  pl2, MCdrop2,

                  conv3a, bn3a, activ,
                  conv3b, bn3b, activ,
                  pl3, Flatten(),

                  fc
                  ])

  opt = keras.optimizers.SGD(0.03, momentum = 0.9)

  model.compile(
    optimizer = opt,
    loss=keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
  )

  return model

In [None]:
def create_cnn_3(n):
  inputlayer = keras.Input(shape=(32, 32, 3))

  conv1a = Conv2D(128*n, (3,3), padding = 'same')
  bn1a = BatchNormalization()
  pl1 = MaxPooling2D(2, 2)
  MCdrop1 = PermaDropout(0.3)

  conv2a = Conv2D(256*n, (3,3), padding = 'same')
  bn2a = BatchNormalization()
  pl2 = MaxPooling2D(2, 2)
  MCdrop2 = PermaDropout(0.3)

  conv3a = Conv2D(512*n, (3,3))
  bn3a = BatchNormalization()
  pl3 = AveragePooling2D(6, 2)

  fc = Dense(10)
  activ = keras.layers.LeakyReLU(0.1)

  model = Sequential([
                  inputlayer, 
                  conv1a, bn1a, activ,
                  pl1, MCdrop1,

                  conv2a, bn2a, activ,
                  pl2, MCdrop2,

                  conv3a, bn3a, activ,
                  pl3, Flatten(),

                  fc
                  ])

  opt = keras.optimizers.SGD(0.03, momentum = 0.9)

  model.compile(
    optimizer = opt,
    loss=keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
  )

  return model

In [None]:
def fit_and_labeling_cnn_13(Epoch, Batch):

  X = lbl_train_images
  y = lbl_train_labels

  lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.9, patience=3)
  early_stopper = EarlyStopping(monitor='val_loss', min_delta=0, patience=30, mode='auto')
  sgdr = SGDR(min_lr=0.0, max_lr=0.03/(1+itr*0.05), base_epochs=20) #스케줄러
  
#   size = len(y) * 7
#   newimage, newlabel = mixup(size, (X, y))
#   augimage, auglabel = makeaugs(7, X), np.concatenate((y,y,y,y,y,y,y))
#   X = np.concatenate((X, newimage, augimage))
#   y = np.concatenate((y, newlabel, auglabel))
#   del newimage, newlabel

  model.fit(
      x=X,
      y=y,
      epochs=Epoch,
      verbose=0,
#       validation_data = (valids1, valids2),
      batch_size=Batch,
#       callbacks=[sgdr, early_stopper]
      callbacks=[sgdr]
  )
  
  model_test_eval(model, test_images, test_labels)
  T = 1

  for predsamples in range(10):
    if predsamples == 0 :
      predictions = np.array(tf.nn.softmax(model.predict(ubl_train_images)/T))
      predictions = predictions.reshape((1,) + predictions.shape)
    else:
      pred = np.array(tf.nn.softmax(model.predict(ubl_train_images)/T))
      pred = pred.reshape((1,) + pred.shape)
      predictions = np.concatenate((predictions, pred))

  return predictions

def model_test_eval(model, test_images, test_labels):
  T = 1
  pred = np.array(tf.nn.softmax(model.predict(test_images)/T))
  for i in range(1,10):
    pred += np.array(tf.nn.softmax(model.predict(test_images)))
  acc = (np.argmax(pred,axis=1) == np.argmax(test_labels,axis=1))*1
  acc = sum(acc)/len(acc)
  print("test set 성능 : " + str(acc))

In [None]:
def label_selecting():
  K_conf = 0.8
  K_uncert = 0.05

  pseudo = np.argmax(np.mean(predictions, axis=0), axis=1)
  conf = np.max(np.mean(predictions, axis=0), axis=1)
  uncert = np.std(predictions, axis=0)
  uncert = np.array([uncert[i][pseudo[i]] for i in range(len(pseudo))])

  select_pseudo = (1*(conf > K_conf)) * (1*(uncert < K_uncert))

  labels = []
  for i in pseudo:
    temp = [0,0,0,0,0,0,0,0,0,0]
    temp[i] = 1
    labels.append(temp)
  pseudo = np.array(labels)
#   pseudo = np.mean(predictions, axis=0)

  lbl_idx = []
  ubl_idx = []
  k = 0
  for i in select_pseudo:
    if i == 1:
      lbl_idx.append(k)
    if i == 0:
      ubl_idx.append(k)
    k += 1

    
  ubl_append = ubl_train_images[lbl_idx]
  pseudo_append = pseudo[lbl_idx]
    
  if itr < 20: # 일시적 수정
      try: 
        numsamples = np.min(list(Counter(np.argmax(pseudo_append, axis=1)).values()))
      except:
        numsamples = 0
      multlabel = np.argmax(pseudo_append, axis=1)
      sufindx = random.sample(range(len(multlabel)), len(multlabel))

      idxcounter = [0,0,0,0,0,0,0,0,0,0]
      idxsample = []

      for i in sufindx:
#         if idxcounter[multlabel[i]] < numsamples+25:
        if idxcounter[multlabel[i]] < (5*itr)+200:
          idxcounter[multlabel[i]] += 1
          idxsample.append(i)
      
      image1 = np.concatenate((lbl_train_images, ubl_append[idxsample]))
      label1 = np.concatenate((lbl_train_labels, pseudo_append[idxsample]))
      image2 = np.concatenate((ubl_train_images[ubl_idx], ubl_append[np.delete(list(range(len(ubl_append))), idxsample)]))
  
  else:
      image1 = np.concatenate((lbl_train_images, ubl_append))
      label1 = np.concatenate((lbl_train_labels, pseudo_append))
      image2 = ubl_train_images[ubl_idx]

  return image1, label1, image2

### Knowledge Distillation

In [None]:
class Distiller(keras.Model):
    def __init__(self, student, teacher):
        super(Distiller, self).__init__()
        self.teacher = teacher
        self.student = student

    def compile(
        self,
        optimizer,
        metrics,
        student_loss_fn,
        distillation_loss_fn,
        alpha=0.1,
        temperature=1,
    ):

        super(Distiller, self).compile(optimizer=optimizer, metrics=metrics)
        self.student_loss_fn = student_loss_fn
        self.distillation_loss_fn = distillation_loss_fn
        self.alpha = alpha
        self.temperature = temperature

    def train_step(self, data):
        # Unpack data
        x, y = data

        # Forward pass of teacher
        teacher_predictions = self.teacher(x, training=False)

        with tf.GradientTape() as tape:
            # Forward pass of student
            student_predictions = self.student(x, training=True)

            # Compute losses
            student_loss = self.student_loss_fn(y, student_predictions)
            distillation_loss = self.distillation_loss_fn(
                tf.nn.softmax(teacher_predictions / self.temperature, axis=1),
                tf.nn.softmax(student_predictions / self.temperature, axis=1),
            )
            loss = self.alpha * student_loss + (1 - self.alpha) * distillation_loss

        # Compute gradients
        trainable_vars = self.student.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)

        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))

        # Update the metrics configured in `compile()`.
        self.compiled_metrics.update_state(y, student_predictions)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update(
            {"student_loss": student_loss, "distillation_loss": distillation_loss}
        )
        return results

    def test_step(self, data):
        # Unpack the data
        x, y = data

        # Compute predictions
        y_prediction = self.student(x, training=False)

        # Calculate the loss
        student_loss = self.student_loss_fn(y, y_prediction)

        # Update the metrics.
        self.compiled_metrics.update_state(y, y_prediction)

        # Return a dict of performance
        results = {m.name: m.result() for m in self.metrics}
        results.update({"student_loss": student_loss})
        return results
    
    def call(self, inputs, *args, **kwargs):
        return self.model(inputs)

### 실행

In [None]:
import time
start = time.time()

for itr in range(20):
  if itr == 0:
    model = create_cnn_1(1)
  else:
    if itr <= 2:
        student = create_cnn_1(1/2)
    elif itr > 2 & itr <= 5:
        student = create_cnn_2(1/2)
    elif itr > 5 & itr <= 9:
        student = create_cnn_2(1/4)
    elif itr > 9 & itr <= 13:
        student = create_cnn_2(1/8)
    elif itr > 13 & itr <= 16:
        student = create_cnn_3(1/8)
    elif itr > 16 & itr <= 19:
        student = create_cnn_3(1/16)
    model = student
    print("**********itr " + str(itr) + " student evaluation (before & after)**********")
    model_test_eval(model, test_images, test_labels)
    distiller = Distiller(student=student, teacher=teacher)
    distiller.compile(
        optimizer=keras.optimizers.Adam(),
        metrics=[keras.metrics.CategoricalAccuracy()],
        student_loss_fn=keras.losses.CategoricalCrossentropy(from_logits=True),
        distillation_loss_fn=keras.losses.KLDivergence(),
        alpha=0.1,
        temperature=1,
    )

    # Distill teacher to student
    distiller.fit(lbl_train_images, lbl_train_labels, epochs=1000, batch_size=128, verbose=0)
    model = student
    model_test_eval(model, test_images, test_labels)

  print(Counter(np.argmax(lbl_train_labels, axis=1)))
  print("**********itr " + str(itr+1) + " teacher evaluation**********")
  print("layer 개수 : " + str(len(model.layers)))
  predictions = fit_and_labeling_cnn_13(1000, 128)
  lbl_train_images, lbl_train_labels, ubl_train_images = label_selecting()
  del predictions
  teacher = model
  gc.collect()

    
print("time :", time.time() - start)

Counter({6: 117, 8: 115, 4: 109, 9: 104, 3: 99, 1: 98, 5: 97, 7: 95, 2: 85, 0: 81})
**********itr 1 teacher evaluation**********
layer 개수 : 26
test set 성능 : 0.5702
**********itr 1 student evaluation (before & after)**********
test set 성능 : 0.0932
test set 성능 : 0.6036
Counter({6: 317, 8: 315, 4: 309, 9: 304, 3: 299, 1: 298, 5: 297, 7: 295, 2: 285, 0: 281})
**********itr 2 teacher evaluation**********
layer 개수 : 26
test set 성능 : 0.6247
**********itr 2 student evaluation (before & after)**********
test set 성능 : 0.1
test set 성능 : 0.6321
Counter({6: 522, 8: 520, 4: 514, 9: 509, 3: 504, 1: 503, 5: 502, 7: 500, 2: 490, 0: 486})
**********itr 3 teacher evaluation**********
layer 개수 : 26
test set 성능 : 0.6458
**********itr 3 student evaluation (before & after)**********
test set 성능 : 0.1
test set 성능 : 0.6465
Counter({6: 732, 8: 730, 4: 724, 9: 719, 3: 714, 1: 713, 5: 712, 7: 710, 2: 700, 0: 696})
**********itr 4 teacher evaluation**********
layer 개수 : 20
test set 성능 : 0.6483
**********itr 4 stud