# Mask pre-processing
In this notebook you can perform two tasks:
*   **Check** if all masks taken from a dataset are binary;
*   **Binarize** all masks taken from a dataset.


CAREFUL: DON'T RUN THE LAST PEACE OF CODE IF THE MASKS ARE ALREADY OK.


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import random
import cv2
from tqdm.autonotebook import tqdm
import numpy as np
import glob
import os
from PIL import Image

IMAGE_SIZE = (384, 216)
DATA_DIR = "/content/drive/MyDrive/Uni/CV Project/datasets/handsoverface_prof/DATA_MASKS/"

  This is separate from the ipykernel package so we can avoid doing imports until


*Checking masks*

In [3]:
NUM_IMAGES = 30 # Checking only NUM_IMAGES because this is a slow process

data_masks = sorted(glob.glob(DATA_DIR + '*png'))

data_masks = data_masks[:NUM_IMAGES]

# Shuffling data for picking random masks
random.shuffle(data_masks)

masks = [cv2.imread(mask, cv2.IMREAD_GRAYSCALE) for mask in tqdm(data_masks, desc="Reading images")]
for i in range(0, len(masks)):
  masks[i] = cv2.resize(masks[i], IMAGE_SIZE, interpolation = cv2.INTER_NEAREST)
mask_dataset = np.array(masks)
mask_dataset = np.expand_dims(mask_dataset, axis = 3)
  
print("Labels in the mask are : ", np.unique(mask_dataset)) # Must be [0 255] for binary masks

Reading images:   0%|          | 0/30 [00:00<?, ?it/s]

Labels in the mask are :  [  0 255]


*Binarizing masks*

In [None]:
images = tqdm(glob.glob(DATA_DIR + '*png'))

thresh = 127 # If grey image this will work
for e in tqdm(images):
    image_name = os.path.basename(os.path.splitext(e)[0])
    img_grey = cv2.imread(e, cv2.IMREAD_GRAYSCALE)
    img_binary = cv2.threshold(img_grey, thresh, 255, cv2.THRESH_BINARY)[1]
    cv2.imwrite(DATA_DIR + image_name + ".png", img_binary)

  0%|          | 0/290 [00:00<?, ?it/s]

  0%|          | 0/290 [00:00<?, ?it/s]