In [83]:
# import the necessary packages
from sklearn.preprocessing import LabelEncoder

from imutils import paths

from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img

import numpy as np
import pickle
import random
import os

In [84]:
# load the VGG16 network and initialize the label encoder
print("[INFO] loading network...")
model = VGG16(weights="imagenet", include_top=False)
le = None

[INFO] loading network...


In [85]:
# mount drive to the notebook
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [86]:
BASE_PATH = '/content/drive/My Drive/Face_Mask_Dataset'

TRAIN = 'Training'
TEST = 'Testing'
VAL = 'Validation'

MASK = '/WithMask'
NO_MASK = '/WithoutMask'

BASE_CSV_PATH = '/content/drive/My Drive/Face_Mask_Dataset/Output'
LE_PATH = os.path.sep.join([BASE_CSV_PATH, "le.cpickle"])
MODEL_PATH = os.path.sep.join([BASE_CSV_PATH, "model.cpickle"])

BATCH_SIZE = 32

In [87]:
# load the VGG16 network and initialize the label encoder
print("[INFO] loading network...")
model = VGG16(weights="imagenet", include_top=False)
le = None

[INFO] loading network...


In [88]:
# loop over the data splits
for split in (VAL, TRAIN):
  # grab all image paths in the current split
  print("[INFO] processing '{} split'...".format(split))
  p = os.path.sep.join([BASE_PATH, split])
  imagePaths = list(paths.list_images(p))
  
  # randomly shuffle the image paths and then extract the class
  # labels from the file paths
  random.shuffle(imagePaths)
  labels = [p.split(os.path.sep)[-2] for p in imagePaths]

	# if the label encoder is None, create it
  if le is None:
    le = LabelEncoder()
    le.fit(labels)

	# open the output CSV file for writing
  csvPath = os.path.sep.join([BASE_CSV_PATH, "{}.csv".format(split)])
  csv = open(csvPath, "w")

	# loop over the images in batches
  for (b, i) in enumerate(range(0, len(imagePaths), BATCH_SIZE)):
		# extract the batch of images and labels, then initialize the
		# list of actual images that will be passed through the network
		# for feature extraction
    print("[INFO] processing batch {}/{}".format(b + 1, int(np.ceil(len(imagePaths) / float(BATCH_SIZE)))))
    batchPaths = imagePaths[i:i + BATCH_SIZE]
    batchLabels = le.transform(labels[i:i + BATCH_SIZE])
    batchImages = []

		# loop over the images and labels in the current batch
    for imagePath in batchPaths:
			# load the input image using the Keras helper utility
			# while ensuring the image is resized to 224x224 pixels
      image = load_img(imagePath, target_size=(224, 224))
      image = img_to_array(image)

			# preprocess the image by (1) expanding the dimensions and
			# (2) subtracting the mean RGB pixel intensity from the
			# ImageNet dataset
      image = np.expand_dims(image, axis=0)
      image = preprocess_input(image)

			# add the image to the batch
      batchImages.append(image)

		# pass the images through the network and use the outputs as
		# our actual features, then reshape the features into a
		# flattened volume
    batchImages = np.vstack(batchImages)
    features = model.predict(batchImages, batch_size=BATCH_SIZE)
    features = features.reshape((features.shape[0], 7 * 7 * 512))

		# loop over the class labels and extracted features
    for (label, vec) in zip(batchLabels, features):
			# construct a row that exists of the class label and
			# extracted features
      vec = ",".join([str(v) for v in vec])
      csv.write("{},{}\n".format(label, vec))

	# close the CSV file
  csv.close()

[INFO] processing 'Validation split'...
[INFO] processing batch 1/45
[INFO] processing batch 2/45
[INFO] processing batch 3/45
[INFO] processing batch 4/45
[INFO] processing batch 5/45
[INFO] processing batch 6/45
[INFO] processing batch 7/45
[INFO] processing batch 8/45
[INFO] processing batch 9/45
[INFO] processing batch 10/45
[INFO] processing batch 11/45
[INFO] processing batch 12/45
[INFO] processing batch 13/45
[INFO] processing batch 14/45
[INFO] processing batch 15/45
[INFO] processing batch 16/45
[INFO] processing batch 17/45
[INFO] processing batch 18/45
[INFO] processing batch 19/45
[INFO] processing batch 20/45
[INFO] processing batch 21/45
[INFO] processing batch 22/45
[INFO] processing batch 23/45
[INFO] processing batch 24/45
[INFO] processing batch 25/45
[INFO] processing batch 26/45
[INFO] processing batch 27/45
[INFO] processing batch 28/45
[INFO] processing batch 29/45
[INFO] processing batch 30/45
[INFO] processing batch 31/45
[INFO] processing batch 32/45
[INFO] pr

In [89]:
# serialize the label encoder to disk
f = open(LE_PATH, "wb")
f.write(pickle.dumps(le))
f.close()

In [90]:
# import the necessary packages
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

def load_data_split(splitPath):
	# initialize the data and labels
	data = []
	labels = []

	# loop over the rows in the data split file
	for row in open(splitPath):
		# extract the class label and features from the row
		row = row.strip().split(",")
		label = row[0]
		features = np.array(row[1:], dtype="float")

		# update the data and label lists
		data.append(features)
		labels.append(label)

	# convert the data and labels to NumPy arrays
	data = np.array(data)
	labels = np.array(labels)

	# return a tuple of the data and labels
	return (data, labels)

In [91]:
# derive the paths to the training and testing CSV files
trainingPath = os.path.sep.join([BASE_CSV_PATH, "{}.csv".format(TRAIN)])
testingPath = os.path.sep.join([BASE_CSV_PATH, "{}.csv".format(VAL)])

In [92]:
# load the data from disk
print("[INFO] loading data...")
(trainX, trainY) = load_data_split(trainingPath)
(testX, testY) = load_data_split(testingPath)

[INFO] loading data...


In [94]:
# load the label encoder from disk
le = pickle.loads(open(LE_PATH, "rb").read())

In [95]:
# train the model
print("[INFO] training model...")
model = LogisticRegression(solver="lbfgs", multi_class="auto", max_iter=150)
model.fit(trainX, trainY)

[INFO] training model...


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=150,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [96]:
# evaluate the model
print("[INFO] evaluating...")
preds = model.predict(testX)
print(classification_report(testY, preds, target_names=le.classes_))

[INFO] evaluating...
              precision    recall  f1-score   support

    WithMask       1.00      1.00      1.00       777
 WithoutMask       1.00      1.00      1.00       660

    accuracy                           1.00      1437
   macro avg       1.00      1.00      1.00      1437
weighted avg       1.00      1.00      1.00      1437



In [97]:
# serialize the model to disk
print("[INFO] saving model...")
f = open(MODEL_PATH, "wb")
f.write(pickle.dumps(model))
f.close()

[INFO] saving model...


In [98]:
print(testY)
print(preds)

['1' '0' '0' ... '0' '1' '1']
['1' '0' '0' ... '0' '1' '1']
