In [None]:
import random
from os import listdir, makedirs, path
from shutil import copy, rmtree
from tqdm import tqdm
from glob import glob
from PIL import Image

In [12]:
WEBCAM_DIR = "webcam_dataset"
SAMPLED_DATASET = "resized-sampled-dataset"
OUTPUT_DIRECTORY = "dataset"
TRAIN_RATIO = 0.8 # range from 0 to 1
SEED = 27
NUM_SAMPLE = 3000 # -1 for all samples, > 0 for sampling data
RESIZED_SHAPE = (64,64)
classes = ['incorrect_mask', 'no_mask', 'mask']

In [13]:
if path.exists(path.join(OUTPUT_DIRECTORY,'train')):
    rmtree(path.join(OUTPUT_DIRECTORY,'train'))
if path.exists(path.join(OUTPUT_DIRECTORY,'val')):
    rmtree(path.join(OUTPUT_DIRECTORY,'val'))
makedirs(path.join(OUTPUT_DIRECTORY,'train'))
makedirs(path.join(OUTPUT_DIRECTORY,'val'))

## Processing Webcam Dataset

In [14]:
for _class in classes:
    print('Processing: {}'.format(_class))
    images = glob(path.join(WEBCAM_DIR,_class,'*'))
    
    if not path.exists(path.join(OUTPUT_DIRECTORY,'train',_class)):
        makedirs(path.join(OUTPUT_DIRECTORY,'train',_class))
    if not path.exists(path.join(OUTPUT_DIRECTORY,'val',_class)):
        makedirs(path.join(OUTPUT_DIRECTORY,'val',_class))
    
    # randomize
    random.seed(SEED)
    random.shuffle(images)
    if NUM_SAMPLE > 0:
        images = images[:NUM_SAMPLE]
    
    # Copy files to the output directory
    positive_sample = int(TRAIN_RATIO * len(images))
    corrupt_files = []
    for img in tqdm(images):
        im_pil = Image.open(img)
        imResize = im_pil.resize(RESIZED_SHAPE, Image.ANTIALIAS)
        file_name = path.split(img)[1]
        if img in images[:positive_sample]:
            imResize.save(path.join(OUTPUT_DIRECTORY,'train',_class,file_name), 'JPEG', quality=90)
        else:
            imResize.save(path.join(OUTPUT_DIRECTORY,'val',_class,file_name), 'JPEG', quality=90)

Processing: incorrect_mask


100%|████████████████████████████████████████| 930/930 [00:01<00:00, 907.62it/s]


Processing: no_mask


100%|████████████████████████████████████████| 908/908 [00:00<00:00, 939.19it/s]


Processing: mask


100%|████████████████████████████████████████| 904/904 [00:00<00:00, 923.90it/s]


## Processing Public Dataset

In [15]:
for _class in classes:
    print('Processing: {}'.format(_class))
    images_train = glob(path.join(SAMPLED_DATASET,'train',_class,'*'))
    images_val = glob(path.join(SAMPLED_DATASET,'val',_class,'*'))
    images = images_train + images_val
    
    if not path.exists(path.join(OUTPUT_DIRECTORY,'train',_class)):
        makedirs(path.join(OUTPUT_DIRECTORY,'train',_class))
    if not path.exists(path.join(OUTPUT_DIRECTORY,'val',_class)):
        makedirs(path.join(OUTPUT_DIRECTORY,'val',_class))
    
    # randomize
    random.seed(SEED)
    random.shuffle(images)
    if NUM_SAMPLE > 0:
        images = images[:NUM_SAMPLE]
    
    # Copy files to the output directory
    positive_sample = int(TRAIN_RATIO * len(images))
    corrupt_files = []
    for img in tqdm(images):
        im_pil = Image.open(img)
        imResize = im_pil.resize(RESIZED_SHAPE, Image.ANTIALIAS)
        file_name = path.split(img)[1]
        if img in images[:positive_sample]:
            imResize.save(path.join(OUTPUT_DIRECTORY,'train',_class,file_name), 'JPEG', quality=90)
        else:
            imResize.save(path.join(OUTPUT_DIRECTORY,'val',_class,file_name), 'JPEG', quality=90)

Processing: incorrect_mask


100%|██████████████████████████████████████| 3000/3000 [00:05<00:00, 580.58it/s]


Processing: no_mask


100%|██████████████████████████████████████| 3000/3000 [00:05<00:00, 592.69it/s]


Processing: mask


100%|██████████████████████████████████████| 3000/3000 [00:05<00:00, 591.38it/s]


## Get mean std

In [24]:
import torch
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import numpy as np

train_set = ImageFolder(root=OUTPUT_DIRECTORY, transform=transforms.ToTensor())

data = torch.cat([d[0] for d in DataLoader(train_set)])
mean, std = data.mean(dim=[0, 2, 3]), data.std(dim=[0, 2, 3])
print('mean: {}\tstd: {}'.format(list(np.array(mean)),list(np.array(std))))

mean: [0.51156753, 0.45862445, 0.43074608]	std: [0.2624124, 0.2608746, 0.26630473]
