# Initial Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#imports
import cv2, os
import numpy as np
import matplotlib.pyplot as plt
import math, decimal, copy, glob
from PIL import Image as im

In [72]:
CascadeFilePath = "/content/drive/MyDrive/TrainingParseScript/lbpcascade_frontalface.xml"
videoPath = "/content/drive/MyDrive/TrainingParseScript/headFixed.mp4" #head3 and headfixed
areaCaptureChoice = 2 #by default captures the entire face area - change to 1 for top half of face, change to 2 for eye area only
displayCaptureImages = False #change to true if you want to print out images
LBP = False #change to true if need to convert frames into local binary pattern format
fixedlabel = False #change to true if need to use a fixed label to be in one position. This is to enable usage of fixedlabelCoor below
fixedlabelCoor = "0 0.502083 0.487153 0.995833 0.906250" #if you already have a label coordinates ala imagelb and want to replicate that same label across all frames
parseFramesOnly = False #change if you only want to parse frames (such as for resnet)
rotate90 = True #change to rotate frame by 90 - use for mobile phone videos

# Parse Video Function

In [54]:

#Extract frames form video
#https://www.geeksforgeeks.org/extract-images-from-video-in-python/
def parseVideo(path, rotate: bool):
  cam = cv2.VideoCapture(path)
  '''
  try:
    if not os.path.exists('data'):
      os.makedirs('data')
  except OSError:
    print("Error when creating directory, already exists?")
  '''

  #Parse video to frames
  currentFrame = 0
  frameFileNameCount = []
  while(True):
    ret, frame = cam.read()
    if(rotate):
      frame = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE)
    if ret:
      #parsing the frames
      name = "./frame" + str(currentFrame) + ".jpg"
      frameFileNameCount.append(str(currentFrame))
      print('Creating...' + name)
      cv2.imwrite(name, frame)
      currentFrame += 1
    else:
      break
  cam.release()
  cv2.destroyAllWindows()
  return frameFileNameCount

# Label Determination and Creation Functions

## Supporting Functions


In [6]:
#This is to detect IF there is a face in a picture and extract said face
def getFaceCoordinates(image, cascadeFilePath):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #convert image to black/white/gray images (openCV only accepts monochrome images)
    faceCascade = cv2.CascadeClassifier(cascadeFilePath) #this is lpb, load face detector
    
    faces = faceCascade.detectMultiScale(gray, scaleFactor=1.2, minNeighbors=5) #returns a list of faces in case there's more than 1 face in pic

    if(len(faces) == 0): #if faces list is empty, there are no images
        return None

    #(x, y, w, h) = faces[0] #Assume that there is one face, extract the face x, y, width, height

    
    return faces[0]

#expand it by 10% on each side
def expandLabelArea(box):
  x, y = (box[0], box[1])
  w, h = (box[2], box[3])

  box[0] = int(x - w*.10)
  box[1] = int(y - h*.10)
  box[2] = int(w + w*.20)
  box[3] = int(h + h*.20)

  return box

#Get the top half for second label
def topHalfLabelCVCoor(boxFull):
  box = copy.deepcopy(boxFull)
  box[3] = box[3]/2
  return box

#Convert Bounding Box to YOLO. Taken and modified from: https://github.com/tzutalin/labelImg/blob/master/libs/yolo_io.py
def CVBoxToYOLO(box,img_size):
  xMin, yMin = (box[0], box[1])
  w, h = (box[2], box[3])

  #calculate x & y Max
  xMax = xMin+w
  yMax = yMin+h

  x_center = float((xMin + xMax)) / 2 / img_size[1]
  y_center = float((yMin + yMax)) / 2 / img_size[0]

  w = float((xMax - xMin)) / img_size[1]
  h = float((yMax - yMin)) / img_size[0]

  return x_center, y_center, w, h

#Generate Yolo coordinates as string
def YOLOCoorGen(label: int, imageCVCoor, imageShape):
  YOLOCoor = CVBoxToYOLO(imageCVCoor, imageShape) #convert to YOLO format. Returns a tuple
  YOLOString = []
  for item in YOLOCoor: #each of the four items 
    targetVal = round(item, 6)

    digits = decimal.Decimal(targetVal)
    digits = digits.as_tuple().exponent #get number of digits
    if abs(digits) < 6:
      #start padding with 0
      addingVal = ""
      for iter in range(6 - abs(digits)):
        addingVal = str(targetVal) + "0"
      
      #add to YOLOString once all padded
      YOLOString.append(addingVal)
    else:
      YOLOString.append(str(targetVal))

  return f'{label} {YOLOString[0]} {YOLOString[1]} {YOLOString[2]} {YOLOString[3]}'

#write label for face recognition
def writeLabel(filePath: str, labelInfo):
  print(f"writing: {filePath}")
  label = open(filePath, 'w')
  label.write(labelInfo)
  #label.write("\n")
  #label.write(labelInfoHalf)
  label.close()

In [7]:
#Cut off top half to only get eye area instead of eye area + hair
# - crop to the top half only
# - save the height of this top half
# - eat into height from y0
def eyeAreaOnlyCoor(boxFull):
  box = copy.deepcopy(boxFull)
  halfHeight = box[3]/2 #top half height only
  eyeRegionEat = halfHeight*.45
  eyeRegionHeight = halfHeight-eyeRegionEat

  #shift y1 down
  box[1] = box[1] + eyeRegionEat

  #set new height
  box[3] = eyeRegionHeight

  return box

  #How yolo graphs coor:
  '''
   y0
    .
    .
    .
    .
    .
    .
  yn/x0 . . . . . . . . . xn
  x origin is normal but y origin starts at the top

  x1+w = x2
  y1+h = y2
  cv2.rectangle(image, (x2, y2), (x2+w2, y2+h2), (255, 0, 0), 3)
  '''

## Functions to convert image into LBP format

In [8]:
#https://www.geeksforgeeks.org/create-local-binary-pattern-of-an-image-using-opencv-python/   
def get_pixel(img, center, x, y):
      
    new_value = 0
      
    try:
        # If local neighbourhood pixel 
        # value is greater than or equal
        # to center pixel values then 
        # set it to 1
        if img[x][y] >= center:
            new_value = 1
              
    except:
        # Exception is required when 
        # neighbourhood value of a center
        # pixel value is null i.e. values
        # present at boundaries.
        pass
      
    return new_value
   
# Function for calculating LBP
def lbp_calculated_pixel(img, x, y):
   
    center = img[x][y]
   
    val_ar = []
      
    # top_left
    val_ar.append(get_pixel(img, center, x-1, y-1))
      
    # top
    val_ar.append(get_pixel(img, center, x-1, y))
      
    # top_right
    val_ar.append(get_pixel(img, center, x-1, y + 1))
      
    # right
    val_ar.append(get_pixel(img, center, x, y + 1))
      
    # bottom_right
    val_ar.append(get_pixel(img, center, x + 1, y + 1))
      
    # bottom
    val_ar.append(get_pixel(img, center, x + 1, y))
      
    # bottom_left
    val_ar.append(get_pixel(img, center, x + 1, y-1))
      
    # left
    val_ar.append(get_pixel(img, center, x, y-1))
       
    # Now, we need to convert binary
    # values to decimal
    power_val = [1, 2, 4, 8, 16, 32, 64, 128]
   
    val = 0
      
    for i in range(len(val_ar)):
        val += val_ar[i] * power_val[i]
          
    return val

In [9]:
def lbpConversion(imagePath):

  img_bgr = cv2.imread(imagePath, 1)
  height, width, _ = img_bgr.shape
   
  # We need to convert RGB image 
  # into gray one because gray 
  # image has one channel only.
  img_gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)
   
  # Create a numpy array as 
  # the same height and width 
  # of RGB image
  img_lbp = np.zeros((height, width),np.uint8)
    
  for i in range(0, height):
      for j in range(0, width):
          img_lbp[i, j] = lbp_calculated_pixel(img_gray, i, j)

  '''  
  plt.imshow(img_bgr)
  plt.show()
    
  plt.imshow(img_lbp, cmap ="gray")
  plt.show()
  
  cv2_imshow(img_bgr)
  cv2_imshow(img_lbp)
  '''
  #print(f"{img_lbp.shape[0]} {img_lbp.shape[1]}")


  return img_lbp

## Functions for Labeling

In [10]:
def displayOpenCVLabelImage(image, labelCoordinates):
  x, y, w, h = (labelCoordinates[0], labelCoordinates[1], labelCoordinates[2], labelCoordinates[3])
  cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 10)
  plt.imshow(image)
  plt.show()
  cv2.destroyAllWindows()

In [28]:
#Determine label for each frame. 

#frameFileNameCount: -a list of frames from 0 to 1 to etc. 
#captureArea: 0 - whole face, 1 - top half of face, 2 - eye region only | 0 is assumed default
def labelFrames(frameFileNames, cascadeFilePath, captureArea: int, displayLabeledImages: bool, lbp: bool):
  labelInfo = ""
  badFrames = []

  for fileName in frameFileNames:
    filePath = "./frame" + fileName + ".jpg"
    image = cv2.imread(filePath)
    
    #if failed to read
    if image is None:
      os.remove(filePath)
      badFrames.append(fileName)
      print(f"Removing {filePath}: Unable to read file")
      continue

    #detect WHOLE FACE coordinates via openCV LBP cascade
    imageOpenCVCoor = getFaceCoordinates(image, cascadeFilePath)

    #If didn't detect face 
    if imageOpenCVCoor is None:
      os.remove(filePath)
      badFrames.append(fileName)
      print(f"Removing {filePath}: no face detected")
      continue

    #Change to TOP HALF ONLY if setting is set
    if(captureArea == 1):

      #get only top half of face
      imageOpenCVCoor = topHalfLabelCVCoor(imageOpenCVCoor)

    #Change to EYE AREA ONLY if setting is set
    if(captureArea == 2):

      #get only the eye area
      imageOpenCVCoor = eyeAreaOnlyCoor(imageOpenCVCoor)

    #imageOpenCVCoor = expandLabelArea(imageCVCoor) #expand the label area

    #Change frames into LOCAL BINARY PATTERN format if setting is set
    if(lbp):
        #convert frames into local binary pattern format
        numpyImage = lbpConversion(filePath)

        #replace frame with lbp version
        os.remove(filePath)
        cv2.imwrite(filePath, numpyImage)

    #convert openCVLabel coordinates into YOLO Format as String
    labelInfo = YOLOCoorGen(0, imageOpenCVCoor, image.shape)
    print(labelInfo)

    #write labels
    labelFilePath = "./frame" + fileName + ".txt"
    writeLabel(labelFilePath,labelInfo)

    #re-read image file path since it has now been replaced lbp format
    if(lbp):
      image = cv2.imread(filePath)

    if(displayLabeledImages):
      displayOpenCVLabelImage(image, imageOpenCVCoor)
  

In [24]:
def labelFramesFixed(frameFileNames, fixedLabelString, displayLabeledImages):
  labelInfo = fixedLabelString

  for fileName in frameFileNames:
    filePath = "./frame" + fileName + ".jpg"
    image = cv2.imread(filePath)
    
    #if failed to read
    if(image is None):
      os.remove(filePath)
      badFrames.append(fileName)
      print(f"Removing {filePath}: Unable to read file")
      continue

    #write labels
    labelFilePath = "./frame" + fileName + ".txt"
    writeLabel(labelFilePath,labelInfo)

    if(displayLabeledImages):
      displayOpenCVLabelImage(image, imageOpenCVCoor)

#Execution

In [73]:
frameFilesCount = parseVideo(videoPath, rotate90)
if (not parseFramesOnly):
  if fixedlabel:  
    labelFramesFixed(frameFilesCount,fixedlabelCoor, displayCaptureImages)
  if not fixedlabel:
    labelFrames(frameFilesCount,CascadeFilePath, areaCaptureChoice, displayCaptureImages, LBP)


Creating..../frame0.jpg
Creating..../frame1.jpg
Creating..../frame2.jpg
Creating..../frame3.jpg
Creating..../frame4.jpg
Creating..../frame5.jpg
Creating..../frame6.jpg
Creating..../frame7.jpg
Creating..../frame8.jpg
Creating..../frame9.jpg
Creating..../frame10.jpg
Creating..../frame11.jpg
Creating..../frame12.jpg
Creating..../frame13.jpg
Creating..../frame14.jpg
Creating..../frame15.jpg
Creating..../frame16.jpg
Creating..../frame17.jpg
Creating..../frame18.jpg
Creating..../frame19.jpg
Creating..../frame20.jpg
Creating..../frame21.jpg
Creating..../frame22.jpg
Creating..../frame23.jpg
Creating..../frame24.jpg
Creating..../frame25.jpg
Creating..../frame26.jpg
Creating..../frame27.jpg
Creating..../frame28.jpg
Creating..../frame29.jpg
Creating..../frame30.jpg
Creating..../frame31.jpg
Creating..../frame32.jpg
Creating..../frame33.jpg
Creating..../frame34.jpg
Creating..../frame35.jpg
Creating..../frame36.jpg
Creating..../frame37.jpg
Creating..../frame38.jpg
Creating..../frame39.jpg
Creating..

#Zip Extracted Frames + Labels

In [74]:
!apt install zip
!zip labeledFrames *.jpg *.txt

Reading package lists... Done
Building dependency tree       
Reading state information... Done
zip is already the newest version (3.0-11build1).
0 upgraded, 0 newly installed, 0 to remove and 37 not upgraded.
  adding: frame0.jpg (deflated 0%)
  adding: frame100.jpg (deflated 0%)
  adding: frame101.jpg (deflated 0%)
  adding: frame102.jpg (deflated 0%)
  adding: frame103.jpg (deflated 0%)
  adding: frame104.jpg (deflated 0%)
  adding: frame105.jpg (deflated 0%)
  adding: frame106.jpg (deflated 0%)
  adding: frame107.jpg (deflated 0%)
  adding: frame108.jpg (deflated 0%)
  adding: frame109.jpg (deflated 0%)
  adding: frame10.jpg (deflated 0%)
  adding: frame110.jpg (deflated 0%)
  adding: frame111.jpg (deflated 0%)
  adding: frame112.jpg (deflated 0%)
  adding: frame113.jpg (deflated 0%)
  adding: frame114.jpg (deflated 0%)
  adding: frame115.jpg (deflated 0%)
  adding: frame116.jpg (deflated 0%)
  adding: frame117.jpg (deflated 0%)
  adding: frame118.jpg (deflated 0%)
  adding: frame1

In [75]:
%rm *.jpg *.txt