In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from skimage.feature import hog
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils import np_utils
import pandas as pd
from PIL import Image
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.utils import np_utils
import seaborn as sns
from keras.applications.inception_v3 import preprocess_input, InceptionV3

In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
MAIN_DIR = '/content/celeba-dataset/'
IMAGES = MAIN_DIR + 'img_align_celeba/img_align_celeba/'

#variables
IMG_WIDTH = 178
IMG_HEIGHT = 218
IMG_SHAPE = (IMG_WIDTH, IMG_HEIGHT, 3)


In [None]:
# import the data set that include the attribute for each picture
attributes = pd.read_csv(MAIN_DIR + 'list_attr_celeba.csv')
# set the index to the first column
attributes.set_index('image_id', inplace=True)
#replace -1 with 0
attributes.replace(to_replace=[-1], value=0, inplace=True)
attributes.shape

In [None]:
# available attributes
attributes.columns


In [None]:
# sample picture with the attributes
image_sample = load_img(IMAGES + '000001.jpg', target_size=(IMG_WIDTH, IMG_HEIGHT))
plt.imshow(image_sample)


In [None]:
# count of the number of female and male
plt.title('Count of Male and female')
sns.countplot(y='Male', data=attributes, color="c")
plt.show()

In [None]:
# load recommended partitions
partitions = pd.read_csv(MAIN_DIR+'list_eval_partition.csv')
partitions.head()

In [None]:
# display count by partition where
# 0 -> TRAINING
# 1 -> VALIDATION
# 2 -> TEST

partitions['partition'].value_counts().sort_index()

In [None]:
# join the partirions with the attributes
partitions.set_index('image_id', inplace=True)
partitions_attributes = partitions.join(attributes['Male'], how='inner')
partitions_attributes.head()

In [None]:
# load reshape image

def load_reshape_img(fname):
    img = load_img(fname)
    x = img_to_array(img)/255.
    x = x.reshape((1,) + x.shape)

    return x


def generate_df(partition, attr, num_samples):
    
    df_ = partitions_attributes[(partitions_attributes['partition'] == partition) 
                           & (partitions_attributes[attr] == 0)].sample(int(num_samples/2))
    df_ = pd.concat([df_,
                      partitions_attributes[(partitions_attributes['partition'] == partition) 
                                  & (partitions_attributes[attr] == 1)].sample(int(num_samples/2))])

    # for Train and Validation
    if partition != 2:
        x_ = np.array([load_reshape_img(IMAGES + fname) for fname in df_.index])
        x_ = x_.reshape(x_.shape[0], 218, 178, 3)
        y_ = np_utils.to_categorical(df_[attr],2)
    # for Test
    else:
        x_ = []
        y_ = []

        for index, target in df_.iterrows():
            im = cv2.imread(IMAGES + index)
            im = cv2.resize(cv2.cvtColor(im, cv2.COLOR_BGR2RGB), (IMG_WIDTH, IMG_WIDTH)).astype(np.float32) / 255.0
            im = np.expand_dims(im, axis =0)
            x_.append(im)
            y_.append(target[attr])

    return x_, y_

    # generate dataframe for training, validation and test set as well as the labels




In [None]:
# Generate image generator for data augmentation
datagen =  ImageDataGenerator(
  #preprocessing_function=preprocess_input,
  rotation_range=30,
  width_shift_range=0.2,
  height_shift_range=0.2,
  shear_range=0.2,
  zoom_range=0.2,
  horizontal_flip=True
)

# the datagen generates batches of images and labels, ImageDataGenerator is a generator which is a library for data augmentation
# Data augmentation is a process of randomly changing the appearance of an image so that it can be used as a training example.

# load one image and reshape
img = load_img(IMAGES + '000001.jpg')
x = img_to_array(img)/255.
x = x.reshape((1,) + x.shape)

# plot 10 augmented images of the loaded iamge
plt.figure(figsize=(20,10))
plt.suptitle('Data Augmentation', fontsize=28)

i = 0
for batch in datagen.flow(x, batch_size=1):
    plt.subplot(3, 5, i+1)
    plt.grid(False)
    plt.imshow( batch.reshape(218, 178, 3))
    
    if i == 9:
        break
    i += 1
    
plt.show()

In [None]:
# Train data
x_train, y_train = generate_df(0, 'Male', 8000)

# Training,  Data Preparationm  Data Augmentation with generators
train_datagen =  ImageDataGenerator(
  preprocessing_function=preprocess_input,
  rotation_range=30,
  width_shift_range=0.2,
  height_shift_range=0.2,
  shear_range=0.2,
  zoom_range=0.2,
  horizontal_flip=True,
)

train_datagen.fit(x_train)

train_generator = train_datagen.flow(
x_train, y_train,
batch_size=16,
)


In [None]:
# Validation Data
x_valid, y_valid = generate_df(1, 'Male', 2000)

#

In [2]:
from keras.layers import GlobalAveragePooling2D
from keras.models import Sequential, Model 
# checkpointer
from keras.callbacks import ModelCheckpoint

inception_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3))

# Used inception model which is pretrained on imagenet dataset to extract features from the images, then used it to train a new model
"""
The InceptionV3 model is a deep convolutional neural network model with success rate of >99% on the ImageNet dataset.
It has been used for image classification tasks and is one of the most popular models for image classification. Here we use the InceptionV3 model to extract features from the images.
"""



# add custom layers

x = inception_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(512, activation='relu')(x)
predictions = Dense(2, activation='softmax')(x)



# create the model

"""
create the model using the inception model as the base model and add custom layers on top of it.

"""
model = Model(inputs=inception_model.input, outputs=predictions)

# freeze the layers
""" 
Freezing the layers of the inception model is a technique used to prevent the model from overfitting.
It is used to freeze the weights of the inception model and only train the top layers.

"""
for layer in inception_model.layers:
    layer.trainable = False

# compile the model
"""
Compile the model using the rmsprop optimizer and categorical crossentropy loss function.
RMSprop is a gradient descent algorithm which is used to minimize the loss function.
Categorical crossentropy is a loss function used to measure the difference between the predicted output and the actual output.


"""
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

# checkpointer
"""
Checkpoint is a callback which is used to save the model after every epoch.
It is used to save the model after every epoch so that the model can be used to make predictions.

"""
checkpointer = ModelCheckpoint(filepath='model.{epoch:02d}.h5', verbose=1, save_best_only=True)

# history
"""

History is a callback which is used to save the training and validation accuracy and loss after every epoch.
It is used to plot the accuracy and loss after every epoch.
"""
history = model.fit_generator(
        train_generator,
        steps_per_epoch=100,
        epochs=10,
        validation_data=(x_valid, y_valid),
        callbacks=[checkpointer],
        verbose=1
        )

# plot the training and validation loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

# plot accuracy the training and validation accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()



# build another model, SVM classifier for the test data using HOG
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# HOG
from skimage.feature import hog
from sklearn.externals import joblib
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# get hog features
"""
get hog features from the test data
HOG is a feature descriptor which is used to extract features from the images.
How it works:
1. Extract the HOG features from the images
2. Scale the features
"""
def get_hog_features(img, orient, pix_per_cell, cell_per_block, vis=False, feature_vec=True):
    if vis == True:
        features, hog_image = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                                  cells_per_block=(cell_per_block, cell_per_block), block_norm='L2-Hys',
                                  feature_vector=feature_vec)
        return features, hog_image
    else:
        features = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
                                  cells_per_block=(cell_per_block, cell_per_block), block_norm='L2-Hys',
                                  feature_vector=feature_vec)
        return features


# use the HOG to extract features from the test data
def extract_features(imgs, cspace='RGB', orient=9,
                        pix_per_cell=8, cell_per_block=2, hog_channel=0):
        features = []
        for img in imgs:
            if cspace != 'RGB':
                if cspace == 'HSV':
                    feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
                elif cspace == 'LUV':
                    feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2LUV)
                elif cspace == 'HLS':
                    feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2HLS)
                elif cspace == 'YUV':
                    feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
                elif cspace == 'YCrCb':
                    feature_image = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
            else:
                feature_image = np.copy(img)
            # Call get_hog_features() with vis=False, feature_vec=True
            if hog_channel == 'ALL':
                hog_features = []
                for channel in range(feature_image.shape[2]):
                    hog_features.append(get_hog_features(feature_image[:,:,channel],
                                        orient, pix_per_cell, cell_per_block,
                                        vis=False, feature_vec=True))
                hog_features = np.ravel(hog_features)
            else:
                hog_features = get_hog_features(feature_image[:,:,hog_channel], orient,
                            pix_per_cell, cell_per_block, vis=False, feature_vec=True)
            features.append(hog_features)
        return np.array(features)

        # extract features from the test data
x_train_features = extract_features(x_train, cspace='YCrCb',
                                    orient=9, pix_per_cell=8, cell_per_block=2, hog_channel='ALL')
x_valid_features = extract_features(x_valid, cspace='YCrCb',
                                    orient=9, pix_per_cell=8, cell_per_block=2, hog_channel='ALL')  


# use the SVM classifier to classify the test data
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

# split the data into training and validation sets
x_train, x_valid, y_train, y_valid = train_test_split(x_train_features, y_train, test_size=0.2, random_state=0)

# define the parameters to search
param_grid = {'C': [0.1, 1, 10, 100]}

# create a pipeline
pipeline = Pipeline([
    ('clf', LinearSVC())
])

# search for the best parameters
grid_search = GridSearchCV(pipeline, param_grid=param_grid, cv=5, verbose=1)

# fit the model
grid_search.fit(x_train, y_train)

# print the best parameters
print('Best parameters: {}'.format(grid_search.best_params_))

# print the best score
print('Best score: {}'.format(grid_search.best_score_))

ModuleNotFoundError: No module named 'keras'