Please download the following zip file, extract it and put it under your google 
drive

https://tinyurl.com/bsff25hs


In this example, we will perform a binary classification for heavy makeup and no-heavy makeup images coming from CelebA.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# if you mount Google drive correctly, the following commands should be able to be executed correctly
!ls /content/drive/
%cd "/content/drive/My Drive"
% cd "heavy_makeup_CelebA_s"

!ls

MyDrive  Shareddrives
/content/drive/My Drive
/content/drive/My Drive/heavy_makeup_CelebA_s
train  val


In [None]:
# step 1: preparing HOG feature vectors for heavy makeup & no-heavy makeup classification in training & validation, respectively

from skimage import data, color, feature
import skimage.data
import cv2
from google.colab.patches import cv2_imshow
import glob
import numpy as np
from timeit import default_timer as timer

# image size before cropping
resize_img_w = 256
resize_img_h = 256

# image size after cropping
crop_img_w = 224
crop_img_h = 224

# feature size for 224x224 image using default HOG settings from sklearn
HOG_dim = 54756

# the folder of training data in YOUR Google drive
training_heavy_makeup_imgs_dir = "/content/drive/My Drive/heavy_makeup_CelebA_s/train/heavy_makeup/"
training_non_heavy_makeup_imgs_dir = "/content/drive/My Drive/heavy_makeup_CelebA_s/train/no_heavy_makeup/"

# the folder of validation data in YOUR Google drive
validation_heavy_makeup_imgs_dir = "/content/drive/My Drive/heavy_makeup_CelebA_s/val/heavy_makeup/"
validation_non_heavy_makeup_imgs_dir = "/content/drive/My Drive/heavy_makeup_CelebA_s/val/no_heavy_makeup/"

# the file lists of training data
training_heavy_makeup_img_files = glob.glob(training_heavy_makeup_imgs_dir + '*.jpg')
training_no_heavy_makeup_img_files = glob.glob(training_non_heavy_makeup_imgs_dir + '*.jpg')

# the file lists of validation data
validation_heavy_makeup_img_files = glob.glob(validation_heavy_makeup_imgs_dir + '*.jpg')
validation_no_heavy_makeup_img_files = glob.glob(validation_non_heavy_makeup_imgs_dir + '*.jpg')

# the number of trainimg images: 1000 for postive and negative, respectively
training_heavy_makeup_img_file_num = len(training_heavy_makeup_img_files)
training_no_heavy_makeup_img_file_num = len(training_no_heavy_makeup_img_files)

# the number of validation images: 1000 for postive and negative, respectively
validation_heavy_makeup_img_file_num = len(validation_heavy_makeup_img_files)
validation_no_heavy_makeup_img_file_num = len(validation_no_heavy_makeup_img_files)


#print(len(training_heavy_makeup_img_files))
#print(training_no_heavy_makeup_img_files)
#print(validation_heavy_makeup_img_files)
#print(validation_no_heavy_makeup_img_files)

print("the number of heavy makeup images in training folder:{} ".format(training_heavy_makeup_img_file_num))
print("the number of no heavy makeup images in training folder:{} ".format(training_no_heavy_makeup_img_file_num))

print("the number of heavy makeup images in validation folder:{}".format(validation_heavy_makeup_img_file_num))
print("the number of no heavy makeup images in validation folder:{} ".format(validation_no_heavy_makeup_img_file_num))

def img_crop(img, resize_img_w, resize_img_h, target_img_w, target_img_h):
  #h, w, c =img.shape
  img = cv2.resize(img, (int(resize_img_w), int(resize_img_h)))  
  top_y = (resize_img_h-target_img_h)/2
  btm_y = resize_img_h-(resize_img_h-target_img_h)/2
  left_x = (resize_img_w-target_img_w)/2
  right_x = resize_img_w-(resize_img_w-target_img_w)/2
  cropped_img = img[int(top_y):int(btm_y),int(left_x):int(right_x) ]
  
  return cropped_img

HOG_postitive_matrix = np.zeros([int(training_heavy_makeup_img_file_num), HOG_dim])
HOG_negative_matrix = np.zeros([int(training_no_heavy_makeup_img_file_num), HOG_dim])

print("start to prepare HOG features for postive training images")
start = timer()
for i in range(0,int(training_heavy_makeup_img_file_num)):
  #print("i={}".format(i))
  img = cv2.imread(training_heavy_makeup_img_files[i])
  img_cropped = img_crop(img, resize_img_w, resize_img_h, crop_img_w, crop_img_h)
  image = cv2.cvtColor(img_cropped, cv2.COLOR_BGR2GRAY)
  hog_vec = feature.hog(image)
  #print(hog_vec.shape)
  HOG_postitive_matrix[i,:] = hog_vec
end = timer()
print("finish preparing HOG features for postive training images and totally {}-seconds are consumed".format(end-start))


print("start to prepare HOG features for negative training images")
start = timer()
for i in range(0,int(training_no_heavy_makeup_img_file_num)):
  #print("i={}".format(i))
  img = cv2.imread(training_no_heavy_makeup_img_files[i])
  img_cropped = img_crop(img, resize_img_w, resize_img_h, crop_img_w, crop_img_h)
  image = cv2.cvtColor(img_cropped, cv2.COLOR_BGR2GRAY)
  hog_vec = feature.hog(image)
  #print(hog_vec.shape)
  HOG_negative_matrix[i,:] = hog_vec
end = timer()
print("finish preparing HOG features for negative training images and totally {}-seconds are consumed".format(end-start))

# stack HOG postive & negative features vertically
training_matrix = np.vstack((HOG_postitive_matrix,HOG_negative_matrix))

# initialize the label matrix where top half is positive
label_matrix = np.zeros(int(training_heavy_makeup_img_file_num) + int(training_no_heavy_makeup_img_file_num))
label_matrix[0:int(training_heavy_makeup_img_file_num)] = 1


the number of heavy makeup images in training folder:200 
the number of no heavy makeup images in training folder:200 
the number of heavy makeup images in validation folder:40
the number of no heavy makeup images in validation folder:40 
start to prepare HOG features for postive training images
finish preparing HOG features for postive training images and totally 6.524903999999879-seconds are consumed
start to prepare HOG features for negative training images
finish preparing HOG features for negative training images and totally 6.382667414000025-seconds are consumed


In [None]:
## Optuna is not installed in colab so we need to manually install it.
!pip install optuna



In [None]:
# training and testing function used by Optuna

from sklearn.svm import LinearSVC
from sklearn.svm import SVC
#from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import GridSearchCV
import optuna

def SVM_training_n_testing(trial):
    # hyperparameters to be optimized by Optuna in training SVM     
    cfg = { 'degree' : trial.suggest_uniform('degree', 0.1, 10), 'C' : trial.suggest_uniform('C', 0.1, 100)}

    print("start to perform training")
    start = timer()
   

    ## poly
    model = SVC(kernel='poly', probability=True, degree=cfg['degree'], C=cfg['C'])
    model.fit(training_matrix, label_matrix)
    ##

    end = timer()
    print("training is done and totally {}-seconds are consumed".format(end-start))

    # step 3: accuracy estimation by classifying all the images in the validation folder

    # for positive validation data
    TP_num = 0
    for i in range(0,validation_heavy_makeup_img_file_num): #
      img = cv2.imread(validation_heavy_makeup_img_files[i])
      img_cropped = img_crop(img, resize_img_w, resize_img_h, crop_img_w, crop_img_h)
      image = cv2.cvtColor(img_cropped, cv2.COLOR_BGR2GRAY)
      hog_vec = feature.hog(image)
      hog_vec = hog_vec.reshape(1,-1)
      labels = model.predict(hog_vec)
      #print(labels)

      if int(labels[0]) == 1:
          #print("this is TP")
          TP_num = TP_num + 1

    print("TP_num={}".format(TP_num))

    # for negative validation data
    TN_num = 0
    for i in range(0,validation_no_heavy_makeup_img_file_num): #
      img = cv2.imread(validation_no_heavy_makeup_img_files[i])
      img_cropped = img_crop(img, resize_img_w, resize_img_h, crop_img_w, crop_img_h)  
      image = cv2.cvtColor(img_cropped, cv2.COLOR_BGR2GRAY)
      hog_vec = feature.hog(image)

      #print(hog_vec.shape)
      hog_vec = hog_vec.reshape(1,-1)
      # perform classification
      labels = model.predict(hog_vec)
      #print(labels)
      if int(labels[0]) == 0:
        #print("this is TP")
        TN_num = TN_num + 1

    print("TN_num={}".format(TN_num))
    accuracy = (TP_num+TN_num)/(validation_heavy_makeup_img_file_num + validation_no_heavy_makeup_img_file_num)
    print("accuracy={}".format(accuracy))

    return accuracy

In [None]:
# step 2: performing (SVM) classifier training and testing using sklearn

sampler = optuna.samplers.TPESampler()      
study = optuna.create_study(sampler=sampler, direction='maximize')
study.optimize(func=SVM_training_n_testing, n_trials=10)



[32m[I 2021-07-01 23:57:51,465][0m A new study created in memory with name: no-name-d2cdb9c9-33a6-45be-890f-a935e84a4317[0m


start to perform training
training is done and totally 66.46260433299994-seconds are consumed
TP_num=35


[32m[I 2021-07-01 23:59:02,782][0m Trial 0 finished with value: 0.825 and parameters: {'degree': 1.4372439781692703, 'C': 75.0667503221738}. Best is trial 0 with value: 0.825.[0m


TN_num=31
accuracy=0.825
start to perform training
training is done and totally 68.89115977900019-seconds are consumed
TP_num=36


[32m[I 2021-07-02 00:00:16,816][0m Trial 1 finished with value: 0.85 and parameters: {'degree': 2.9239468556213097, 'C': 28.223966362317242}. Best is trial 1 with value: 0.85.[0m


TN_num=32
accuracy=0.85
start to perform training
training is done and totally 70.75636533299985-seconds are consumed
TP_num=34


[32m[I 2021-07-02 00:01:32,872][0m Trial 2 finished with value: 0.8 and parameters: {'degree': 4.77269303180191, 'C': 29.322955373296402}. Best is trial 1 with value: 0.85.[0m


TN_num=30
accuracy=0.8
start to perform training
training is done and totally 70.90990822899948-seconds are consumed
TP_num=33


[32m[I 2021-07-02 00:02:49,151][0m Trial 3 finished with value: 0.8125 and parameters: {'degree': 5.1585720767384275, 'C': 72.92051409436228}. Best is trial 1 with value: 0.85.[0m


TN_num=32
accuracy=0.8125
start to perform training
training is done and totally 70.25441670000055-seconds are consumed
TP_num=0


[32m[I 2021-07-02 00:04:04,757][0m Trial 4 finished with value: 0.5 and parameters: {'degree': 0.356805284004159, 'C': 1.366546419443867}. Best is trial 1 with value: 0.85.[0m


TN_num=40
accuracy=0.5
start to perform training
training is done and totally 71.09533503900002-seconds are consumed
TP_num=31


[32m[I 2021-07-02 00:05:21,301][0m Trial 5 finished with value: 0.7875 and parameters: {'degree': 6.703974028376497, 'C': 81.40421419187254}. Best is trial 1 with value: 0.85.[0m


TN_num=32
accuracy=0.7875
start to perform training
training is done and totally 70.1564828419996-seconds are consumed
TP_num=37


[32m[I 2021-07-02 00:06:36,741][0m Trial 6 finished with value: 0.8625 and parameters: {'degree': 3.8118878620436583, 'C': 8.705112719869602}. Best is trial 6 with value: 0.8625.[0m


TN_num=32
accuracy=0.8625
start to perform training
training is done and totally 70.83179823799946-seconds are consumed
TP_num=34


[32m[I 2021-07-02 00:07:52,922][0m Trial 7 finished with value: 0.8 and parameters: {'degree': 4.419242094850716, 'C': 74.77386741417952}. Best is trial 6 with value: 0.8625.[0m


TN_num=30
accuracy=0.8
start to perform training
training is done and totally 69.72425390100034-seconds are consumed
TP_num=40


[32m[I 2021-07-02 00:09:08,003][0m Trial 8 finished with value: 0.5 and parameters: {'degree': 0.11352086286656904, 'C': 7.011572501104452}. Best is trial 6 with value: 0.8625.[0m


TN_num=0
accuracy=0.5
start to perform training
training is done and totally 71.17529613699935-seconds are consumed
TP_num=31


[32m[I 2021-07-02 00:10:24,622][0m Trial 9 finished with value: 0.7875 and parameters: {'degree': 6.427872916350362, 'C': 55.62770926330358}. Best is trial 6 with value: 0.8625.[0m


TN_num=32
accuracy=0.7875
