<a href="https://colab.research.google.com/github/YianKim/2022_uncertainty_aware_semisupervise/blob/main/Keras_UPS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import matplotlib.pyplot as plt
from tensorflow import keras
import numpy as np
from sklearn.model_selection import train_test_split

import PIL
import pickle
import random
from tqdm import tqdm
from collections import Counter

from keras.layers.core import Lambda
from keras import backend as K

from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import BatchNormalization
from keras.regularizers import l2
from keras.layers import Activation
from keras.layers import Dropout
from keras.layers import MaxPooling2D, AveragePooling2D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Reshape
from keras import optimizers
from keras.callbacks import *
from sklearn.metrics import *
from keras.models import load_model

import tensorflow as tf
import tensorflow.keras.backend as backend
import math

from tqdm import tqdm

# cifar10 dataset

In [2]:
cifar10 = keras.datasets.cifar10 
(train_images, train_labels), (test_images, test_labels) = cifar10.load_data()
train_images = train_images/255
test_images = test_images/255

In [3]:
def dummy_labels(labels):
  zero_labels = np.zeros([labels.shape[0], 10], np.int8)  
  for i in range(labels.shape[0]):
    zero_labels[i][labels[i]] = 1
  return(zero_labels)

In [4]:
train_labels = dummy_labels(train_labels)
test_labels = dummy_labels(test_labels)

In [5]:
# 1000 labeled, 49000 unlabeled
indx = random.sample(range(train_labels.shape[0]),train_labels.shape[0])

lbl_train_images = train_images[indx[:1000]]
ubl_train_images = train_images[indx[1000:]]

lbl_train_labels = train_labels[indx[:1000]]
ubl_train_labels = train_labels[indx[1000:]]

# pseudo labeling

### 스케줄러

In [6]:
class SGDR(Callback):
    """This callback implements the learning rate schedule for
    Stochastic Gradient Descent with warm Restarts (SGDR),
    as proposed by Loshchilov & Hutter (https://arxiv.org/abs/1608.03983).
    
    The learning rate at each epoch is computed as:
    lr(i) = min_lr + 0.5 * (max_lr - min_lr) * (1 + cos(pi * i/num_epochs))
    
    Here, num_epochs is the number of epochs in the current cycle, which starts
    with base_epochs initially and is multiplied by mul_epochs after each cycle.
    
    # Example
        ```python
            sgdr = CyclicLR(min_lr=0.0, max_lr=0.05,
                                base_epochs=10, mul_epochs=2)
            model.compile(optimizer=keras.optimizers.SGD(decay=1e-4, momentum=0.9),
                          loss=loss)
            model.fit(X_train, Y_train, callbacks=[sgdr])
        ```
    
    # Arguments
        min_lr: minimum learning rate reached at the end of each cycle.
        max_lr: maximum learning rate used at the beginning of each cycle.
        base_epochs: number of epochs in the first cycle.
        mul_epochs: factor with which the number of epochs is multiplied
                after each cycle.
    """

    def __init__(self, min_lr=0.0, max_lr=0.03, base_epochs=20, mul_epochs=2):
        super(SGDR, self).__init__()

        self.min_lr = min_lr
        self.max_lr = max_lr
        self.base_epochs = base_epochs
        self.mul_epochs = mul_epochs

        self.cycles = 0.
        self.cycle_iterations = 0.
        self.trn_iterations = 0.

        self._reset()

    def _reset(self, new_min_lr=None, new_max_lr=None,
               new_base_epochs=None, new_mul_epochs=None):
        """Resets cycle iterations."""
        
        if new_min_lr != None:
            self.min_lr = new_min_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_base_epochs != None:
            self.base_epochs = new_base_epochs
        if new_mul_epochs != None:
            self.mul_epochs = new_mul_epochs
        self.cycles = 0.
        self.cycle_iterations = 0.
        
    def sgdr(self):
        
        cycle_epochs = self.base_epochs * (self.mul_epochs ** self.cycles)
        tide = ((self.cycles == 0) * 1) * (self.cycle_iterations*self.max_lr + (self.base_epochs - self.cycle_iterations)*self.min_lr) / self.base_epochs + ((self.cycles != 0) * 1)*(self.min_lr + 0.5 * (self.max_lr - self.min_lr) * (1 + np.cos(np.pi * (self.cycle_iterations + 1) / cycle_epochs)))
        return tide
        
    def on_train_begin(self, logs=None):
        
        if self.cycle_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.max_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.sgdr())
            
    def on_epoch_end(self, epoch, logs=None):
        
        logs = logs or {}
        logs['lr'] = K.get_value(self.model.optimizer.lr)
        
        self.trn_iterations += 1
        self.cycle_iterations += 1
        if self.cycle_iterations >= self.base_epochs * (self.mul_epochs ** self.cycles):
            self.cycles += 1
            self.cycle_iterations = 0
            K.set_value(self.model.optimizer.lr, self.max_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.sgdr())

### main

In [7]:
def PermaDropout(rate):
    return Lambda(lambda x: K.dropout(x, level=rate))

def create_cnn_13():
  conv1a = Conv2D(128, (3,3), padding = 'same', activation='relu')
  bn1a = BatchNormalization()
  conv1b = Conv2D(128, (3,3), padding = 'same', activation='relu')
  bn1b = BatchNormalization()
  conv1c = Conv2D(128, (3,3), padding = 'same', activation='relu')
  bn1c = BatchNormalization()
  pl1 = MaxPooling2D(2, 2)
  MCdrop1 = Dropout(0.3)

  conv2a = Conv2D(256, (3,3), padding = 'same', activation='relu')
  bn2a = BatchNormalization()
  conv2b = Conv2D(256, (3,3), padding = 'same', activation='relu')
  bn2b = BatchNormalization()
  conv2c = Conv2D(256, (3,3), padding = 'same', activation='relu')
  bn2c = BatchNormalization()
  pl2 = MaxPooling2D(2, 2)
  MCdrop2 = Dropout(0.3)

  conv3a = Conv2D(512, (3,3), activation='relu')
  bn3a = BatchNormalization()
  conv3b = Conv2D(256, (1,1), activation='relu')
  bn3b = BatchNormalization()
  conv3c = Conv2D(128, (1,1), activation='relu')
  bn3c = BatchNormalization()
  pl3 = AveragePooling2D(6, 2)

  fc = Dense(10, activation='softmax')
  activ = keras.layers.LeakyReLU(0.1)

  model = Sequential([
                      keras.Input(shape=(32, 32, 3)), 
                      conv1a, bn1a, activ,
                      conv1b, bn1b, activ,
                      conv1c, bn1c, activ,
                      pl1, MCdrop1,

                      conv2a, bn2a, activ,
                      conv2b, bn2b, activ,
                      conv2c, bn2c, activ,
                      pl2, MCdrop2,

                      conv3a, bn3a, activ,
                      conv3b, bn3b, activ,
                      conv3c, bn3c, activ,
                      pl3, Flatten(),
                      
                      fc
                      ])
  
  return model

def compile_cnn_13(model):

  opt = keras.optimizers.SGD(0.03, momentum=0.9)

  model.compile(
    optimizer = opt,
    loss='categorical_crossentropy',
    metrics=['accuracy']
  )

  return model

def cnn_13():

  model = create_cnn_13()
  model = compile_cnn_13(model)

  return model

def fit_and_labeling_cnn_13(Epoch, Batch):
  
  numsamples = int(np.min(list(Counter(np.argmax(lbl_train_labels, axis=1)).values()))*0.9)
  multlabel = np.argmax(lbl_train_labels, axis=1)
  sufindx = random.sample(range(len(multlabel)), len(multlabel))

  idxcounter = [0,0,0,0,0,0,0,0,0,0]
  idxsample = []

  for i in sufindx:
    if idxcounter[multlabel[i]] <= numsamples:
      idxcounter[multlabel[i]] += 1
      idxsample.append(i)

  lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.9, patience=3)
  early_stopper = EarlyStopping(monitor='val_loss', min_delta=0, patience=30, mode='auto')
  sgdr = SGDR(min_lr=0.0, max_lr=0.03, base_epochs=20) #스케줄러
  
  model.fit(
      x=lbl_train_images[idxsample],
      y=lbl_train_labels[idxsample],
      epochs=Epoch,
      verbose=0,
      validation_split=0.2,
      batch_size=Batch,
      callbacks=[sgdr, early_stopper]
  )

  pred = model.predict(test_images)
  acc = (np.argmax(pred,axis=1) == np.argmax(test_labels,axis=1))*1
  print("test set 성능 : " + str(sum(acc)/len(acc)))
  print(Counter(np.argmax(lbl_train_labels[idxsample], axis=1)))

  for predsamples in tqdm(range(30)):
    if predsamples == 0 :
      predictions = model.predict(ubl_train_images)
      predictions = predictions.reshape((1,) + predictions.shape)
    else:
      pred = model.predict(ubl_train_images)
      pred = pred.reshape((1,) + pred.shape)
      predictions = np.concatenate((predictions, pred))

  return predictions

In [8]:
def label_selecting():
  K_conf = 0.9
  K_uncert = 1e-8

  pseudo = np.argmax(np.mean(predictions, axis=0), axis=1)
  conf = np.max(np.mean(predictions, axis=0), axis=1)
  uncert = np.std(predictions, axis=0)
  uncert = np.array([uncert[i][pseudo[i]] for i in range(len(pseudo))])

  select_pseudo = (1*(conf > K_conf)) * (1*(uncert < K_uncert))

  labels = []
  for i in pseudo:
    temp = [0,0,0,0,0,0,0,0,0,0]
    temp[i] = 1
    labels.append(temp)
  pseudo = np.array(labels)

  lbl_idx = []
  ubl_idx = []
  k = 0
  for i in select_pseudo:
    if i == 1:
      lbl_idx.append(k)
    if i == 0:
      ubl_idx.append(k)
    k += 1

  image1 = np.concatenate((lbl_train_images, ubl_train_images[lbl_idx]))
  label1 = np.concatenate((lbl_train_labels, pseudo[lbl_idx]))
  image2 = ubl_train_images[ubl_idx]

  return image1, label1, image2

In [None]:
for itr in range(500):
  model = cnn_13()
  predictions = fit_and_labeling_cnn_13(3000, 64)
  lbl_train_images, lbl_train_labels, ubl_train_images = label_selecting()