In [1]:
import os
import numpy as np
from PIL import Image
%matplotlib inline
import matplotlib.pyplot as plt
import pickle
from tqdm import tqdm
%pylab inline
import matplotlib.image as mpimg
import re
import cv2

Populating the interactive namespace from numpy and matplotlib


In [78]:
# Get lists of images and masks
image_path = '/Users/Claude/Documents/GitHub/DSCI591-Fall21-RecommendationSystem/data/image_data'
image_files = []
for root, dirs, files in os.walk(image_path):
    image_files.append(files)
    break
mask_files = []
mask_path = '/Users/Claude/Documents/GitHub/DSCI591-Fall21-RecommendationSystem/data/mask_pkls'
for root, dirs, files in os.walk(mask_path):
    mask_files.append(files)
    break

In [79]:
# Flatten the lists
image_files = [image for sublist in image_files for image in sublist]
mask_files = [mask for sublist in mask_files for mask in sublist]

In [80]:
print(f"There are {len(image_files)} image files and {len(mask_files)} mask pickle files")

There are 2834 image files and 1424 mask pickle files


In [81]:
# Pair the image with cooresponding mask
num_pattern = '(\d*)'

mask_image_pairs = {}
for mask_i in mask_files:
    if (mask_i == ".DS_Store") or (mask_i == ".gitignore"):
        continue
    mask_ID = re.match(num_pattern, mask_i).group()

    for image_i in image_files:
        if (image_i == ".DS_Store") or (image_i == ".gitignore"):
            continue
        image_ID = re.match(num_pattern, image_i).group()
        if mask_ID == image_ID:
            mask_image_pairs[image_ID] = (image_i, mask_i)
            break

In [82]:
k = len(mask_image_pairs)
print(f"Total pairs are: {len(mask_image_pairs)}")

Total pairs are: 1419


In [83]:
def mask_image(image_i, mask_i, image_path, mask_path):

    # Load image
    image = plt.imread(os.path.join(image_path, image_i), 0)

    # Load mask
    with open(mask_path+'/'+mask_i, 'rb') as file:
        mask_dict = pickle.load(file)

    #separate into r, g, b instances and mask each one
    r, g, b = np.ma.masked_where(mask_dict[0]['masks'][:,:,0]==0, image[:,:,0]), np.ma.masked_where(mask_dict[0]['masks'][:,:,0]==0, image[:,:,1]), np.ma.masked_where(mask_dict[0]['masks'][:,:,0]==0, image[:,:,2])

    #create initial blank mask 
    mask = np.zeros(image.shape, dtype=np.uint8)
    mask = cv2.circle(mask, (260, 300), 225, (255,255,255), -1) 

    # Mask input image with binary mask
    result = cv2.bitwise_and(image, mask)

    # Color background white
    result[mask==0] = 255 # Optional

    # image np array applied with mask
    img = result * ~r.mask[..., None]

    # Use image_i as unique ID for each file
    num_pattern = "(\d*)"
    file_ID = re.match(num_pattern, image_i).group()
    folder_address = "/Users/Claude/Documents/GitHub/DSCI591-Fall21-RecommendationSystem/data/masked_image_data"
    file_name = "masked_" + file_ID + ".png"

    # Save the output in the data/masked_image_data
    im = Image.fromarray(img)
    im.save(os.path.join(folder_address, file_name))

In [85]:
# Iterate through the image and mask pairs and combine them together
k = len(mask_image_pairs)
with tqdm(total=k) as pbar:
    for i in mask_image_pairs:
        pbar.update(1)
        image_i, mask_i = mask_image_pairs[i]
        try:
            mask_image(image_i, mask_i, image_path, mask_path)
        except Exception as e:
            print(e)
            print(image_i, mask_i)

# #load examples

# image = plt.imread(os.path.join(image_path, image_i), 0)

100%|██████████| 1419/1419 [03:20<00:00,  7.06it/s]


In [86]:
result_files = []
result_path = '/Users/Claude/Documents/GitHub/DSCI591-Fall21-RecommendationSystem/data/masked_image_data'
for root, dirs, files in os.walk(result_path):
    result_files.append(files)
    break
result_files = [result for sublist in result_files for result in sublist]

In [87]:
result_files

['masked_2619.png',
 'masked_1476.png',
 'masked_1462.png',
 'masked_69.png',
 'masked_202.png',
 'masked_2625.png',
 'masked_41.png',
 'masked_2143.png',
 'masked_1338.png',
 'masked_2631.png',
 'masked_2802.png',
 'masked_2816.png',
 'masked_82.png',
 'masked_96.png',
 'masked_1674.png',
 'masked_1660.png',
 'masked_2341.png',
 'masked_2427.png',
 'masked_2433.png',
 'masked_2396.png',
 'masked_160.png',
 'masked_174.png',
 'masked_2553.png',
 'masked_2547.png',
 'masked_148.png',
 'masked_1728.png',
 'masked_2221.png',
 'masked_2590.png',
 'masked_362.png',
 'masked_404.png',
 'masked_410.png',
 'masked_376.png',
 'masked_2751.png',
 'masked_438.png',
 'masked_2792.png',
 'masked_2786.png',
 'masked_389.png',
 'masked_388.png',
 'masked_2787.png',
 'masked_2793.png',
 'masked_2750.png',
 'masked_2036.png',
 'masked_439.png',
 'masked_1517.png',
 'masked_411.png',
 'masked_377.png',
 'masked_2778.png',
 'masked_363.png',
 'masked_1265.png',
 'masked_405.png',
 'masked_1503.png',
 'ma