# Image Pre-processing for Laver's Law Revisited

This notebook contains functions to remove backgrounds (adapted from https://github.com/aadityavikram/Background-Removal) and resize runway photo images

In [None]:
from google.colab import drive
drive.mount("/content/drive")

##Install and Import
Note: Many of these are old versions to work with background removal code. Possible scikit-image version conflict with other package. If errors occur install/disable package as needed.

In [None]:
!pip3 uninstall Keras
!pip3 uninstall scipy
!pip3 uninstall numpy
!pip3 uninstall tensorflow
!pip3 uninstall tensorflow-gpu
!pip3 install -I tensorflow-gpu==1.15.0
!pip3 install -I Keras==2.0.9
!pip3 install Pillow
!pip3 install -I scipy==1.1.0
!pip3 uninstall h5py
!pip3 install -I h5py==2.10.0 
!pip3 install numpy
!pip3 install scikit-image 
!apt-get -qq install -y libsm6 libxext6 && pip install -q -U opencv-python


In [None]:
import tensorflow as tf
import keras
from keras.models import load_model
from PIL import Image
from scipy.misc import imresize
import numpy as np
import os
from skimage import io, img_as_float, transform
import cv2
import drive
import glob

##Perform Background Removal
Load the model as linked from https://github.com/aadityavikram/Background-Removal. The majority of the three code blocks following come directly from https://github.com/aadityavikram/Background-Removal/blob/master/person.py.

In [None]:
model = load_model('./drive/MyDrive/main_model.hdf5', compile=False)
graph = tf.get_default_graph()

In [None]:
def predict(image):
    with graph.as_default():
        # Make prediction
        prediction = model.predict(image[None, :, :, :])
    prediction = prediction.reshape((224,224, -1))
    return prediction

In [None]:
def remove(in_folder, out_folder, filename):
    try:
      image = Image.open(os.path.join(in_folder, filename))
      image1 = imresize(image, (224, 224)) / 255.0

      prediction = predict(image1[:, :, 0:3])
      prediction = imresize(prediction[:, :, 1], (image.height, image.width))
      prediction[prediction>0.5*255] = 255
      prediction[prediction<0.5*255] = 0

      transparency = np.append(np.array(image)[:, :, 0:3], prediction[: , :, None], axis=-1)
      png = Image.fromarray(transparency)
      # Create bright yellow background (as opposed to white etc.) to help ensure
      # distinction from clothing items
      new_image = Image.new("RGBA", png.size, "YELLOW")
      new_image.paste(png, (0, 0), png)  # Paste the image on the background. 
      basewidth = 399
      wpercent = (basewidth / float(new_image.size[0]))
      hsize = int((float(new_image.size[1]) * float(wpercent)))
      new_image = new_image.resize((basewidth, hsize), Image.ANTIALIAS)
      # Could probably remove the step below of resaving then editing again in another format
      # with minor refactoring...
      new_image.convert('RGB').save(os.path.join(out_folder, filename), "JPEG")  # Save as JPEG
      getLargestContour(os.path.join(out_folder, filename))
    except:
      print("error")
      pass
    return

##Remove Artifacts
Some photos don't have clealy removed backgrounds, especially photos with many models visible or busy foregrounds.

This code takes the bg-removed image and removes everything but the biggest contour.

The getLargestContour code was largely lifted from https://stackoverflow.com/questions/58754961/how-to-remove-the-object-marked-by-the-biggest-contour-from-an-image-and-save-it

In [None]:
def getLargestContour(img):
  try:
    # Read image, create blank masks, color threshold
    image = cv2.imread(img)

    # # red color boundaries [B, G, R]
    lower = [0, 200, 200]
    upper = [20, 255, 255]

    # create NumPy arrays from the boundaries
    lower = np.array(lower, dtype="uint8")
    upper = np.array(upper, dtype="uint8")

    # # find the colors within the specified boundaries and apply
    # # the mask
    mask = 255 - cv2.inRange(image, lower, upper)

    ret,thresh = cv2.threshold(mask, 40, 255, 0)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    if len(contours) > 0:
      # find the biggest countour (c) by the area
      c = max(contours, key = cv2.contourArea)
      x,y,w,h = cv2.boundingRect(c)

      out = image[y:(y+h), x:(x+w)].copy()
      height = out.shape[0]
      width = out.shape[1]
      ratio = width/height
      new_height = 440
      new_width = int(new_height * ratio)
      # if the largest contour is super big or super small
      # we assume the pic is badly formatted and we remove it
      if new_width > 200 or new_width < 85:
        os.remove(img)
      else:
        resized = cv2.resize(out,(new_width,new_height))
        H, W = 445, 399
        new_image = np.zeros((H,W,3), np.uint8)
        new_image[:] = [0,255,255]
        new_image_as_array = new_image
        new_image_as_array[2:resized.shape[0]+2,  int(445/2 - new_width/2): int(445/2 - new_width/2) + resized.shape[1]] = resized
        cv2.imwrite(img, new_image_as_array)
    # delete any images where there are no contours
    else: 
      print(img)
      os.remove(img)
  except:
    print("error")
    print(img)
    os.remove(img)


## Check for Humans

Some of the output is pretty grim (weirdly scaled or random body parts). Here we look for faces in the top 100 pixels of the image and check if they are the right scale.

In [None]:
from google.colab.patches import cv2_imshow

# Load the cascade
face_cascade = cv2.CascadeClassifier('./drive/My Drive/haarcascade_frontalface_default.xml')
body_cascade = cv2.CascadeClassifier('./drive/My Drive/haarcascade_upperbody.xml')

index = 0
remove_count = 0
for filename in glob.glob(new_folder_base + "/batch_*/*.jpg"):
  index = index +1
  # sanity check
  if (index % 1000 == 0):
    print(index)
    print(remove_count)
  # Read the input image
  img = cv2.imread(filename)
  # Convert into grayscale
  gray = cv2.cvtColor(img[0:100], cv2.COLOR_BGR2GRAY)
  # faces has very loose detection criteria and may over-detect faces
  faces = face_cascade.detectMultiScale(gray, 1.05, 3, minSize=(20,20), maxSize=(100,100))
  # strict faces has stricter parameters and may miss faces
  strict_faces = face_cascade.detectMultiScale(gray, 1.05, 6, minSize=(30,30), maxSize=(65,65))
  # remove pics with no faces
  if len(faces) < 1:
    remove_count = remove_count + 1
    os.remove(filename)
    # cv2_imshow(img)
  # if there is a face but it is too big, delete it
  elif len(strict_faces) > 0:
    min_y = min([f[1] for f in strict_faces])
    main_face = [f for f in strict_faces if f[1] == min_y][0]
    if (main_face[0] > (200 + main_face[2])) or (main_face[0] < (200 - main_face[2]/2)):
      os.remove(filename)
      remove_count = remove_count + 1