In [1]:
import matplotlib
matplotlib.use("Agg")

In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.utils import plot_model

In [3]:
from sklearn.model_selection import train_test_split
from smallervggnet import SmallerVGGNet
import matplotlib.pyplot as plt
import numpy as np
import argparse
import random

In [4]:
'''It is a built-in module in python that we've used to
make random numbers.'''

import cv2
'''OpenCV-Python is a library of Python designed
to solve computer vision problems.'''

import os
import glob
'''the glob module is used to retrieve files/pathnames
matching a specified pattern. It is also predicted that
according to benchmarks it is faster than other methods to
match pathnames in directories.'''

'the glob module is used to retrieve files/pathnames\nmatching a specified pattern. It is also predicted that\naccording to benchmarks it is faster than other methods to\nmatch pathnames in directories.'

In [5]:
# handle command line arguments
'''it takes a list of strings and produce paths to input/output
datasets or output accuracy/loss plot'''

ap = argparse.ArgumentParser()
ap.add_argument("-d", "--dataset", required=True,
	help="path to input dataset (i.e., directory of images)")

ap.add_argument("-m", "--model", type=str, default="gender_detection.model",
	help="path to output model")

ap.add_argument("-p", "--plot", type=str, default="plot.png",
	help="path to output accuracy/loss plot")
args = ap.parse_args()

# initial parameters
epochs = 100
lr = 1e-3
batch_size = 64
img_dims = (96,96,3)

data = []
labels = []


# load image files recursively from the dataset

image_files = [f for f in glob.glob(args.dataset + "/**/*", recursive=True) if not os.path.isdir(f)] 
random.seed(42)
'''Shuffle a list (reorganize the order of the list items)'''
random.shuffle(image_files)


# create ground-truth label from the image path
'''refer to any kind of information provided by direct
observation.'''
'''it is the information or data collected on site so that
the input data (image) can be related to the actual features'''

for img in image_files:

   #cv2.imread() method loads an image from the specified file  
    image = cv2.imread(img)

    #changing dimensions
    image = cv2.resize(image, (img_dims[0],img_dims[1]))

    '''img_to_array() function is provided by keras to convert
    a loaded image in PIL format into a NumPy
    array.'''
    image = img_to_array(image)
    data.append(image)

    label = img.split(os.path.sep)[-2]
    if label == "woman":
        label = 1
    else:
        label = 0
        
    labels.append([label])


# pre-processing
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)


# split dataset for training and validation
(trainX, testX, trainY, testY) = train_test_split(data, labels, test_size=0.2,
                                                  random_state=42)
trainY = to_categorical(trainY, num_classes=2)
testY = to_categorical(testY, num_classes=2)


# augmenting dataset
'''Data augmentation in data analysis are techniques used
to increase the amount of data by adding slightly modified
copies of already existing data or newly created synthetic
data from existing data.'''

aug = ImageDataGenerator(rotation_range=25, width_shift_range=0.1,
                         height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
                         horizontal_flip=True, fill_mode="nearest")

# build model
model = SmallerVGGNet.build(width=img_dims[0], height=img_dims[1], depth=img_dims[2],
                            classes=2)

# compile the model
opt = Adam(lr=lr, decay=lr/epochs)
model.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])

# train the model
H = model.fit_generator(aug.flow(trainX, trainY, batch_size=batch_size),
                        validation_data=(testX,testY),
                        steps_per_epoch=len(trainX) // batch_size,
                        epochs=epochs, verbose=1)

# save the model to disk
model.save(args.model)

# plot training/validation loss/accuracy
plt.style.use("ggplot")
plt.figure()
N = epochs
plt.plot(np.arange(0,N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0,N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0,N), H.history["acc"], label="train_acc")
plt.plot(np.arange(0,N), H.history["val_acc"], label="val_acc")

plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="upper right")

# save plot to disk
plt.savefig(args.plot)


usage: ipykernel_launcher.py [-h] -d DATASET [-m MODEL] [-p PLOT]
ipykernel_launcher.py: error: the following arguments are required: -d/--dataset


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
