# Practical Work: Classification for facial beauty detection
## Part 1: Creating a simple beauty detector from an existing dataset

Please keep in mind that this is a not so ethical use of Machine Learning.  
With great technological power comes great responsabilities.  
Please students, don't be evil.

_Hints: https://github.com/ustcqidi/BeautyPredict_

In [None]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

## 1- Get the data
- Get the dataset from here: https://github.com/HCIILAB/SCUT-FBP5500-Database-Release
- Put it in the datasets folder

In [None]:
local_path = os.path.abspath('C:/poubelle/MECA653')  # Specific to Polytech Annecy
if os.path.isdir(local_path):
    print('Will use a local path on Polytech Annecy desktop', local_path)
else:
    local_path = None

dataset_path = os.path.relpath('datasets/SCUT-FBP5500_v2')
if local_path:
    dataset_path = os.path.join(local_path, dataset_path)

csv_file_path = os.path.join(dataset_path, 'train_test_files', 'All_labels.txt')
images_path = os.path.join(dataset_path, 'Images')

models_path = os.path.relpath('models')
if local_path:
    models_path = os.path.join(local_path, models_path)

print("local_path:", local_path)
print("dataset_path:", dataset_path)
print("csv_file_path:", csv_file_path)
print("images_path:", images_path)
print("models_path:", models_path)

In [None]:
# Mean rate by the 60th raters
df = 'add your code to read the csv with pandas (columns names=["filename", "rating"] and the separator is a space))'

# Check the 5th first row
# df.head()

# Describe the dataset
# df.describe()

# Rates histogram
# df.rating.hist(bins=30, density=1)

## 2- Configure Keras

In [None]:
# pip3 install tensorflow==1.15.2
# pip3 install keras
# pip3 install Pillow

from keras.applications.resnet50 import ResNet50, preprocess_input
# from keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input

from keras_preprocessing.image import ImageDataGenerator

# Create a batch generator
img_size = 224
batch_size = 32  # 64
validation_split = 0.2
training_steps_per_epoch = (len(df) * (1 - validation_split)) // batch_size
validation_steps = (len(df) * validation_split) // batch_size


datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,  # This is what modify image color for ImageNet normalization
    # rescale=1/255.0,  # not used because of preprocessing_function
    validation_split=validation_split
    )

train_generator = datagen.flow_from_dataframe(     
    subset='training',
    dataframe=df,  
    directory=images_path,
    x_col='filename', # name of col in data frame that contains file names
    y_col='rating', # name of col with labels
    has_ext=True, 
    batch_size=batch_size,
    shuffle=True,
    target_size=(img_size, img_size),
    class_mode='other',  # : regression, 'categorical' for classification task
    interpolation='nearest')  # 'bilinear'

validation_generator = datagen.flow_from_dataframe(     
    subset='validation',
    dataframe=df,  
    directory=images_path,
    x_col='filename', # name of col in data frame that contains file names
    y_col='rating', # name of col with labels
    has_ext=True,
    batch_size=batch_size,
    shuffle=True,
    target_size=(img_size, img_size),
    class_mode='other',  # : regression, 'categorical' for classification task
    interpolation='nearest')  # 'bilinear'

In [None]:
# Display the dataset
x_batch, y_batch = next(train_generator)

plt.figure(figsize=(12, 8), dpi=150)
plt.subplots_adjust(bottom=0, left=.01, right=1.2, top=0.9, hspace=.01)
for i, (image, label) in enumerate(zip(x_batch[:32], y_batch[:32])):
    plt.subplot(4, 8, i + 1)
    plt.axis('off')
    plt.imshow(image.astype(np.uint8))
    plt.title('N°%i | Beauty: %.2f' % (i, label))

### Create your neural network
We will use transfer learning.  
We use a pre-trained model with no last layer.  
We add our custom last layer on the model.  
Then we train only train our last layer.  
It is important to choose a good last layer architecture, you should try some or use AutoML (bayesian/genetic search).  

In [None]:
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout

model = Sequential()

# 1st layer as the lumpsum weights from resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
# NOTE that this layer will be set below as NOT TRAINABLE, i.e., use it as is
# model.add(ResNet50(include_top = False, pooling = 'avg', weights='imagenet'))

model.add('add a ResNet50 or a MobileNetV2 network to your model with include_top=False')

# 2nd layer as Dense with ReLu
model.add('add a first Dense layer')

# 3rd layer as Dense for regression
model.add('add a second Dense layer if you want')

# Say not to train first layer (ResNet) model as it is already trained
model.layers[0].trainable = False

model.summary()

In [None]:
from keras.optimizers import Adam, SGD

sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
# adam = Adam()  # lr=1e-3, decay=1e-3/200

model.compile(optimizer=sgd, loss='mean_absolute_error')  # 'mean_absolute_percentage_error', 'mean_squared_error', 'kullback_leibler_divergence'

## 3- Train the model

In [None]:
import multiprocessing

n_workers = multiprocessing.cpu_count() - 2
print('We have %d physical CPUs. We will use %d workers for preprocessing.' % (multiprocessing.cpu_count(), n_workers))

n_epochs = 100  # You can reduce the number of epoch to train faster

history = model.fit_generator(
    train_generator,
    epochs=n_epochs,
    steps_per_epoch=training_steps_per_epoch,
    validation_steps=validation_steps,
    validation_data=validation_generator,
    max_queue_size=30,  # default=10
    workers=n_workers,  # default=1
    use_multiprocessing=False  # default=False
)

# Save the final model
model.save(os.path.join(models_path, 'beauty_model_untuned.h5'))  # creates a HDF5 file of our model

In [None]:
plt.figure(1, figsize=(15, 8)) 
plt.plot(history.history['loss'])  
plt.plot(history.history['val_loss'])  
plt.title('Model loss during training')  
plt.ylabel('Mean Absolute Error')  
plt.xlabel('Epoch')  
plt.legend(['train', 'validation']) 

plt.show()
# plt.savefig('performance.png')

## 4 - Validate the trained model performance
We will use the model to predict the score on every image of the __validation__ dataset.  
Then we will compute some statistics to study the model performance.  

In [None]:
# Predict with the train model for all images in validation
y_predicted = model.predict_generator(validation_generator, steps = validation_steps).reshape(-1)

In [None]:
# Get the reference (true) values
y_test = validation_generator.labels[:y_predicted.shape[0]]

In [None]:
# Compute some performance metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import pearsonr

rmse = np.sqrt(mean_squared_error(y_test, y_predicted))
print('Root Mean Squared Error: %.3f' % rmse)

mse = mean_absolute_error(y_test, y_predicted)
print('Mean Absolute Error: %.3f' % mse)

r, p_value = pearsonr(y_test, y_predicted.reshape(-1))
print("Pearson's Correlation Coefficient: %.3f" % r)
print('Two-tailed p-value: %.3f' % p_value)

## 5- Use the model to predict on any image

In [None]:
from keras.models import load_model
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing import image

from IPython.display import Image
import numpy as np
import os
import pandas as pd
import PIL
try:
    import ipywidgets as widgets
    from ipywidgets import interact, interact_manual
except ImportError:
    raise ImportError('ipywidgets is not installed.', 'Check the comment to install on current env.')
# How to install ipywidgets in current Jupyter env
#   Execute in a Jupyter field:
#     import sys
#     !{sys.executable} -m pip install ipywidgets
#
#   Then run in CLI:
#     $jupyter nbextension enable --py widgetsnbextension  # for Jupyter Notebook
#     $jupyter labextension install @jupyter-widgets/jupyterlab-manager  # for Jupyter Lab

# Define paths
local_path = os.path.abspath('C:/poubelle/MECA653')  # Specific to Polytech Annecy
if not os.path.isdir(local_path):
    local_path = None

dataset_path = os.path.relpath('datasets/SCUT-FBP5500_v2')
if local_path:
    dataset_path = os.path.join(local_path, dataset_path)

csv_file_path = os.path.join(dataset_path, 'train_test_files', 'All_labels.txt')
df = pd.read_csv(csv_file_path, header=None, names=['filename', 'rating'], sep=' ')
images_path = os.path.join(dataset_path, 'Images')

models_path = os.path.relpath('models')
if local_path:
    models_path = os.path.join(local_path, models_path)

In [None]:
# Load the trained model
model = load_model(os.path.join(models_path, 'beauty_model_untuned.h5'))

In [None]:
@interact  # Create an ipywidgets selector
def show_images(file=os.listdir(images_path)):
    # Preprocess the image as a model input
    img_size = 224
    img = image.load_img(os.path.join(images_path, file), target_size = (img_size, img_size))
    img = image.img_to_array(img)
    img = preprocess_input(img)
    img = np.expand_dims(img, axis = 0)

    reference =  df[df['filename'] == file].rating.values[0]  # Get the reference from the dataset
    prediction = model.predict(img)[0][0]  # Use da model

    display(Image(os.path.join(images_path, file)))  # Display the image

    print('Reference beauty:', reference)
    print('Predicted beauty:', prediction)

In [None]:
@interact
def predict_from_url(URL='https://upload.wikimedia.org/wikipedia/commons/c/c0/Nicolas_Cage_Deauville_2013.jpg'):
    with urllib.request.urlopen(URL) as url:  # Download an image from an URL in RAM memory
        img_size = 224
        img = image.load_img(BytesIO(url.read()), target_size=(img_size, img_size))
        display(img)  # Display the image

        # Preprocess image
        img = image.img_to_array(img)
        img = preprocess_input(img)
        img = np.expand_dims(img, axis = 0)

        prediction = model.predict(img)[0][0]  # Use da model
        print('Predicted beauty:', prediction)

## 6 - Predict beauty in real time from a webcam¶

Hints to fix jupyterlab IPython plugins:
```bash
pip3 install ipywidgets -U
jupyter nbextension enable --py widgetsnbextension
jupyter labextension install @jupyter-widgets/jupyterlab-manager
jupyter lab clean
jupyter lab build
```

In [None]:
import os

local_path = os.path.abspath('C:/poubelle/MECA653')  # Specific to Polytech Annecy
if os.path.isdir(local_path):
    print('Will use a local path on Polytech Annecy desktop', local_path)
else:
    local_path = None
models_path = os.path.relpath('models')
if local_path:
    models_path = os.path.join(local_path, models_path)

In [None]:
from keras.models import load_model

model = load_model(os.path.join(models_path, 'beauty_model_untuned.h5'))

In [None]:
import cv2
from keras.applications.resnet50 import preprocess_input
import numpy as np

import PIL.Image
from io import BytesIO
import IPython.display
from matplotlib import pyplot as plt
import threading
import time

In [None]:
# Simple live stream with face beauty ranking using Deep Transfer Learning

#Use 'jpeg' instead of 'png' (~5 times faster)
def array_to_image(a, fmt='jpeg'):
    #Create binary stream object
    f = BytesIO()

    #Convert array to binary stream object
    PIL.Image.fromarray(a).save(f, fmt)

    return IPython.display.Image(data=f.getvalue())

# Load a VideoCapture from a webcam
cap = cv2.VideoCapture(0)
if not cap.isOpened():
    print('Could not open video device 0.')
    print('Will use a video file.')
    cap = cv2.VideoCapture(os.path.abspath(os.path.join(local_path, 'datasets/head-pose-face-detection-female.mp4')))

cap.set(cv2.CAP_PROP_FRAME_WIDTH, 320)  # 640
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 240)  # 480

print('Webcam frame size: %d x %d' % (cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))


d = IPython.display.display("", display_id=1)
d_fps = IPython.display.display("", display_id=2)

# Display and face detect & ranking loop
img_size = 224
crop_box_add = 30
debounce_detector = 0  # debouncer to do the detection only every 10 frames
debounce_detector_frames_nb = 10  # debouncer to do the detection only every 10 frames, should be > 0

while(True):
    try:
        # Capture frame-by-frame
        t1 = time.time()
        ret, frame = cap.read()
        frame = cv2.resize(frame, (img_size, img_size))
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.flip(frame, 1)

        if debounce_detector <= 0:  # Compute face & beauty every 10 frames
            debounce_detector = debounce_detector_frames_nb

            cropped_images = np.array(cropped_images)
            preprocessed_image = preprocess_input(frame)
            preprocessed_image = np.expand_dims(preprocessed_image, axis = 0)
            y_pred = model.predict(preprocessed_image)[0][0]

        # Scores
        cv2.putText(frame, str('%.2f' % (y_pred)), (10, img_size - 10), cv2.FONT_HERSHEY_SIMPLEX,
                    1, (0, 255, 0), 2)

        # Display the video output
        image = array_to_image(frame)
        d.update(image)
        s = f"""{int(1 / (time.time() - t1))} FPS"""
        d_fps.update( IPython.display.HTML(s) )

        debounce_detector = debounce_detector - 1

    except KeyboardInterrupt:
        print()
        cap.release()
        IPython.display.clear_output()
        print ("Stream stopped")
        break

## 7 - Rank scaling analysis

In [None]:
from scipy.stats import norm
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer

# Define scalers
min_max_scaler = MinMaxScaler(feature_range=(0, 5), copy=True)
quantile_normal_scaler = QuantileTransformer(output_distribution='normal', copy=True)
quantile_uniform_scaler = QuantileTransformer(output_distribution='uniform', copy=True)

# Fit
min_max_scaler.fit(df['rating'].values.reshape(-1, 1))
quantile_normal_scaler.fit(df['rating'].values.reshape(-1, 1))
quantile_uniform_scaler.fit(df['rating'].values.reshape(-1, 1))

# Plots
n_bins = 100
f, (ax0, ax1, ax2, ax3) = plt.subplots(ncols=1, nrows=4, sharex=True, figsize=(18, 8))

x = df['rating'].values
n, bins, patches = ax0.hist(x, bins=n_bins, density=True)
(mu, sigma) = norm.fit(x)
y = norm.pdf(bins, mu, sigma)
ax0.plot(bins, y, 'r--', linewidth=1)
ax0.set_xlim([0, 5])
ax0.set_ylabel('Probability')
ax0.set_title('Original rank')

x = min_max_scaler.transform(df['rating'].values.reshape(-1, 1))
n, bins, patches = ax1.hist(x, bins=n_bins, density=True)
(mu, sigma) = norm.fit(x)
y = norm.pdf(bins, mu, sigma)
ax1.plot(bins, y, 'r--', linewidth=1)
ax1.set_ylabel('Probability')
ax1.set_title('Min Max')

x = quantile_normal_scaler.transform(df['rating'].values.reshape(-1, 1))
x -= x.min()
x /= x.max()
x *= 5.0
n, bins, patches = ax2.hist(x, bins=n_bins, density=True)
(mu, sigma) = norm.fit(x)
y = norm.pdf(bins, mu, sigma)
ax2.plot(bins, y, 'r--', linewidth=1)
ax2.set_ylabel('Probability')
ax2.set_title('Quantile Normal')

x = quantile_uniform_scaler.transform(df['rating'].values.reshape(-1, 1))
x *= 5.0
n, bins, patches = ax3.hist(x, bins=n_bins, density=True)
(mu, sigma) = norm.fit(x)
y = norm.pdf(bins, mu, sigma)
ax3.plot(bins, y, 'r--', linewidth=1)
ax3.set_xlabel('Rank')
ax3.set_ylabel('Probability')
ax3.set_title('Quantile Uniform')

f.suptitle('Rank Scaling Analysis', y=0.035)
f.tight_layout(rect=[0.05, 0.05, 0.95, 0.95])

plt.show()