# Data Preparation
In this notebook we are going to save the images as vectors in order to be able to give them as input to the neural network for training it.
We are going to assing label 0 for images with a mask and label 1 for images that do not wear a mask

In [19]:
import os
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
import random
import pickle

## Read Names of the images

In [20]:
path = "dataset"
with_mask = "with_mask" #0
without_mask = "without_mask" #+1
img_size = 224

names_images_with_mask = np.array(os.listdir(os.path.join(path,with_mask)))
names_images_without_mask = np.array(os.listdir(os.path.join(path,without_mask)))

## Read Images

In [21]:
def read_images(general_path,names_images,index_class):
    images_classes = []
    for name in names_images:
        try:            
            img = load_img(general_path + "\\" + name, target_size=(img_size,img_size))
            img = img_to_array(img)
            img = img/255
            images_classes.append([img,index_class])
        except Exception as e:
            print(e)
    return images_classes

In [22]:
images_with_mask = read_images(os.path.join(path,with_mask),names_images_with_mask,0)
images_without_mask = read_images(os.path.join(path,without_mask),names_images_without_mask,1)



## Create Dataset

In [23]:
def create_dataset(images_with_mask,images_without_mask):
    return images_with_mask + images_without_mask

In [24]:
dataset = create_dataset(images_with_mask,images_without_mask)
random.shuffle(dataset)

## Design Matrix and Vector of Labels

In [25]:
X = []
y = []
for features, label in dataset:
    X.append(features)
    y.append(label)

X = np.array(X,dtype="float32")
y = np.array(y)
y = LabelBinarizer().fit_transform(y)
y = to_categorical(y,num_classes=2)

## Split into Training, Validation and Test Set


In [26]:
training_size = round(0.6 * X.shape[0])
validation_size = round(0.2 * X.shape[0])
test_size = X.shape[0] - training_size - validation_size

X_train = X[0:training_size,:,:,:]
X_validation = X[training_size : training_size + validation_size,:,:,:]
X_test = X[training_size + validation_size: X.shape[0],:,:,:]

y_train = y[0:training_size]
y_validation = y[training_size : training_size + validation_size]
y_test = y[training_size + validation_size: len(y)]

## Save Training, Validation and Test Data

In [27]:
pickle_out = open("data\X_train.data","wb")
pickle.dump(X_train, pickle_out, protocol = 4) #more than 4 gb
pickle_out.close()

pickle_out = open("data\X_validation.data","wb")
pickle.dump(X_validation,pickle_out,protocol = 4) #more than 4 gb
pickle_out.close()

pickle_out = open("data\X_test.data","wb")
pickle.dump(X_test,pickle_out,protocol = 4) # more than 4 gb
pickle_out.close()

pickle_out = open("data\y_train.data","wb")
pickle.dump(y_train,pickle_out)
pickle_out.close()

pickle_out = open("data\y_validation.data","wb")
pickle.dump(y_validation,pickle_out)
pickle_out.close()

pickle_out = open("data\y_test.data","wb")
pickle.dump(y_test,pickle_out)
pickle_out.close()