In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
# Import Necessary Librariee
import sys
import os
import cv2 as cv
from google.colab.patches import cv2_imshow
import json
import csv
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import numpy as np

In [3]:
# Load Y labels and bounding Boxes
y_labels = pd.read_csv("/content/gdrive/MyDrive/Face_mask_detection/face_mask_data/train.csv")
y_labels

Unnamed: 0,name,x1,x2,y1,y2,classname
0,2756.png,69,126,294,392,face_with_mask
1,2756.png,505,10,723,283,face_with_mask
2,2756.png,75,252,264,390,mask_colorful
3,2756.png,521,136,711,277,mask_colorful
4,6098.jpg,360,85,728,653,face_no_mask
...,...,...,...,...,...,...
15407,1894.jpg,437,121,907,644,face_with_mask
15408,1894.jpg,557,363,876,636,mask_surgical
15409,1894.jpg,411,3,940,325,hat
15410,3216.png,126,69,409,463,face_with_mask


In [4]:
# Remove Duplicates and other Labels
options = ['face_with_mask','face_no_mask']
y_labels = y_labels[y_labels['classname'].isin(options)].sort_values('name')

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
# encode classname column since it has inly to classes
y_labels['target'] = le.fit_transform(y_labels.classname)
print("Number of unique images in y_labels set: ", y_labels.name.nunique())
y_labels.head()


Number of unique images in y_labels set:  3390


Unnamed: 0,name,x1,x2,y1,y2,classname,target
13381,1801.jpg,451,186,895,697,face_no_mask,0
3463,1802.jpg,110,71,273,272,face_with_mask,1
14835,1803.jpg,126,75,303,333,face_with_mask,1
5867,1804.jpg,112,113,262,307,face_with_mask,1
6194,1805.jpg,728,180,853,336,face_with_mask,1


In [5]:
# Get names of Images in Data
train_images = y_labels.name.unique().tolist()
print(train_images)

['1801.jpg', '1802.jpg', '1803.jpg', '1804.jpg', '1805.jpg', '1806.jpg', '1808.jpg', '1809.jpg', '1810.jpg', '1811.jpg', '1812.jpg', '1813.jpg', '1814.jpg', '1815.jpg', '1818.jpg', '1819.jpg', '1821.jpg', '1822.jpg', '1823.jpg', '1825.jpg', '1826.jpg', '1827.jpg', '1828.jpg', '1829.jpg', '1831.jpg', '1833.jpg', '1835.jpg', '1836.jpg', '1837.jpg', '1838.jpg', '1839.jpg', '1840.jpg', '1841.jpg', '1843.jpg', '1844.jpg', '1845.jpg', '1847.jpg', '1848.jpg', '1849.jpg', '1850.jpg', '1851.jpg', '1852.jpg', '1854.jpg', '1856.jpg', '1857.jpg', '1858.jpg', '1864.jpg', '1867.jpg', '1870.jpg', '1873.png', '1874.jpg', '1880.jpg', '1881.jpg', '1882.jpg', '1885.jpg', '1886.jpg', '1887.jpg', '1888.jpg', '1889.jpg', '1891.jpg', '1892.jpg', '1893.jpg', '1894.jpg', '1895.jpg', '1896.jpg', '1897.jpg', '1899.jpeg', '1900.png', '1901.jpg', '1902.jpg', '1903.jpg', '1904.jpg', '1905.jpg', '1906.jpg', '1909.jpg', '1910.jpg', '1911.jpg', '1912.jpg', '1913.jpg', '1914.jpg', '1915.png', '1916.jpg', '1917.jpg', '1

IMAGE SIZE = 128

In [6]:
# create a list that would contain bounding boxes for each face
bounding_box=[]
for i in range(len(y_labels)):
    lst = []
    # extract coordinates of bounding box
    for box in y_labels.iloc[i][["x1",'x2','y1','y2']]:
        lst.append(box)
    bounding_box.append(lst)
    
#add new column with bounding boxes
y_labels["bounding_box"] = bounding_box  

# get box(es) for each unique image
def get_boxes(filename):
    boxes = []
    # get bounding_box column for all rows where y_labels["name"] == filename
    for box in y_labels[y_labels["name"] == filename]["bounding_box"]:
        boxes.append(box)
    return boxes

In [7]:
def draw_facebox(image, boxes):
    # plot the image
    plt.imshow(image)
    # get the context for drawing boxes
    ax = plt.gca()
    # plot each box
    for box in boxes:
    # get coordinates
        x, y, width, height = box[0], box[1], box[2], box[3],
        # create the shape
        rect = plt.Rectangle((x, y), width-x, height-y, 
                             fill=False, color='b', linewidth=1)
        # draw the box
        ax.add_patch(rect)
    # show the plot
    plt.show()

### Resizing Images and Bounding Boxes

## **SIZE = 256**

In [8]:
train_images_1 = y_labels.name.to_list()
len(train_images_1)

5749

In [9]:
# Load all images and resize them
size = 128
image_dir = '/content/gdrive/MyDrive/Face_mask_detection/face_mask_data/data/images'
images = np.empty((len(train_images_1),size,size,3))

width_scale = []
height_scale = []
i = 0
for i in range(len(train_images_1)):
  image = cv.imread(os.path.join(image_dir, train_images_1[i]))
  height,width,depth = image.shape
  width_scale.append(size/width)
  height_scale.append(size/height)
  image = cv.resize(image, (size, size), cv.INTER_AREA)
  images[i] = image
# width_scale = pd.DataFrame(width_scale)
# height_scale = pd.DataFrame(height_scale)


In [10]:
print(len(width_scale), len(height_scale))

5749 5749


In [12]:
bounding_boxes = []
for i in range(len(train_images_1)):
  x1 = int(y_labels.iloc[i]['x1']*width_scale[i])
  x2 = int(y_labels.iloc[i]['x2']*height_scale[i])
  y1 = int(y_labels.iloc[i]['y1']*width_scale[i])
  y2 = int(y_labels.iloc[i]['y1']*height_scale[i])
  bounding_boxes.append([x1,x2,y1,y2])

In [15]:
y_labels

Unnamed: 0,name,x1,x2,y1,y2,classname,target,bounding_box,0,1
0,2756.png,12,23,54,72,face_with_mask,1.0,"[69, 126, 294, 392]",0.184704,0.184838
1,2756.png,211,6,302,177,face_with_mask,1.0,"[505, 10, 723, 283]",0.418301,0.627451
4,6098.jpg,76,27,155,208,face_no_mask,0.0,"[360, 85, 728, 653]",0.213333,0.320000
7,4591.png,62,1,136,59,face_with_mask,1.0,"[239, 9, 522, 342]",0.261224,0.174150
11,5525.jpg,69,6,128,119,face_no_mask,0.0,"[262, 18, 484, 319]",0.264463,0.375918
...,...,...,...,...,...,...,...,...,...,...
5731,4778.png,53,58,68,79,face_with_mask,1.0,"[156, 170, 200, 232]",0.343624,0.342704
5740,4778.png,90,61,102,75,face_with_mask,1.0,"[262, 178, 297, 218]",0.345479,0.344086
5741,4778.png,0,77,11,96,face_with_mask,1.0,"[1, 211, 33, 262]",0.348299,0.367816
5744,5325.jpg,39,28,130,225,face_with_mask,1.0,"[153, 63, 509, 507]",0.256000,0.444444


In [11]:
import pickle

In [12]:
with open('/content/gdrive/MyDrive/Face_mask_detection/face_mask_data/resized_images.txt', 'wb') as file:
  pickle.dump(images, file, protocol= 4)

In [16]:
with open('/content/gdrive/MyDrive/Face_mask_detection/face_mask_data/bounding_boxes.txt', 'wb') as file:
  pickle.dump(bounding_boxes, file, protocol= 4)