In [1]:
#Writing python script that captures images from webcam
#Stores Face Info as numpy Array

#Steps:
#1. Read and show video stream ,capture images
#2. Detect face and show bounding box
#3. Flatten the largest face image and save in numpy array
#4. Repeat the above for multiple people to generate training data

In [1]:
import cv2
import numpy as np

In [12]:
#starting the camera
cap = cv2.VideoCapture(0)

#face Detection

face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_alt.xml")
skip=0
face_data = []
dataset_path = './data/'

file_name = input("Enter name of Person: ")
while True:
    ret,frame = cap.read()
    
    if ret == False:
        continue
    #converting captured image into gray scale    
    gray_frame = cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
    
    faces = face_cascade.detectMultiScale(frame,1.3,5)
    #1.3 is the scaling factor i.e. Parameter specifying how much the image size is reduced at each image scale.
    #This scale factor is used to create scale pyramid.Suppose, the scale factor is 1.03, it means we're using a small step for resizing, i.e. reduce size by 3 %
    #we increase the chance of a matching size with the model for detection is found, while it's expensive.
    
    #and 5 is the minNeighbor.Parameter specifying how many neighbors each candidate rectangle should have to retain it. 
    #This parameter will affect the quality of the detected faces: higher value results in less detections but with higher quality.
    
    faces = sorted(faces , key = lambda f:f[2]*f[3])
    #sort faces on basis of maxm size of the face is obtained.
    #like 2 faces are present in frame with width and height of (25,25) and (50,50)
    #then face is sorted on basis of which there product comes out to be maxm
    
    
    for face in faces[-1:]:
        x,y,w,h = face
        cv2.rectangle(frame,(x-15,y-15),(x+w+15,y+h+15),(0,255,255),4)
        #plotting rectangle around the face
        
        face_section = frame[y-15:y+h+15,x-15:x+w+15]
        face_section = cv2.resize(face_section ,(100,100))
        #face area is extracted
        
        skip += 1
        if (skip%10 == 0 ):
            face_data.append(face_section)
            #only every 10th frame is appended to the face_data
            print(len(face_data))
            
    cv2.imshow("Frame",frame)
    cv2.imshow("Face Section",face_section)
    
    key_pressed = cv2.waitKey(1) & 0xFF
    #continuing capturing data untill q key is pressed
    if key_pressed == ord('q'):
        break
        
#converting face list array into numpy array
face_data = np.asarray(face_data)
face_data = face_data.reshape((face_data.shape[0],-1))
print(face_data.shape)

np.save(dataset_path + file_name +'.npy',face_data )
print("data saved at" + dataset_path + file_name +'.npy')
        
cap.release()
cv2.destroyAllWindows()

Enter name of Person: Tushar
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
(37, 30000)
data saved at./data/Tushar.npy


In [8]:
#Building Face Classifier


#1 Load training data(numpy array of all person)
     # x-value are stored in numpy
     # y-value we need to allign for each person
#2 Read video stream using opencv
#3 Extract face out of it
#4. Use knn to find predicted id to name of user
#5. Display prediction on screen - bounding box and name

In [2]:
import os

In [4]:
def distance(x,y):
    return np.sqrt(((x-y)**2).sum())

def KNN(train , test,k=5):
    dist = []
    for i in range(train.shape[0]):
        #get the vector (2-D matrix) and there respective labels
        #like y = Ayush and x = array of data of pixels
        ix = train[i,:-1]
        iy = train[i,-1]
        
        #computing distance from test point
        d = distance(test , ix)
        dist.append([d,iy])
        
    #sorted the dist on basis of distance in ascending(like points which are near are kept early and opposite for other)
    dk = sorted(dist , key = lambda x:x[0])
    dk = dk[:k]
    
    #retrieving there labels from dk array for all rows and there respective label from -1 position in columns
    labels = np.array(dk)[:,-1]
    
    #Getting there frequencies for each label
    output = np.unique(labels , return_counts = True)
    
    #finding output from telling the element with max frequency and output their label as answer
    index = np.argmax(output[1])
    return output[0][index]






#starting the camera
cap = cv2.VideoCapture(0)

#face Detection

face_cascade = cv2.CascadeClassifier("haarcascade_frontalface_alt.xml")
skip=0
dataset_path = './data/'
face_data = []
labels = []

class_id = 0 #Labels for given file
names = {} #mapping btw id and names

#Data Preparation
for fx in os.listdir(dataset_path):
    if fx.endswith('.npy'):
        #mapping btw id and name
        #for eg name of file was ayush.npy then name[class_id] = ayush
        names[class_id] = fx[:-4]
        #loading value(array) into face_data
        data_item = np.load(dataset_path + fx)
        face_data.append(data_item)
        
        #creating labels for class i.e creating np array of one with size of shape of respective data_item and then multiplying with there class_id
        target = class_id*np.ones((data_item.shape[0],))
        class_id += 1
        labels.append(target)
        
face_dataset = np.concatenate(face_data,axis = 0)
face_labels = np.concatenate(labels , axis = 0).reshape((-1,1))

trainset = np.concatenate((face_dataset , face_labels),axis = 1)
trainset.shape
# shape comes out to be (19,30001) which means data has 19 pictures of a person in it's data with 30,000 features and 1 label (Each)


#testing phase
while True:
    ret,frame = cap.read()
    if ret == False:
        continue
    
    faces = face_cascade.detectMultiScale(frame,1.3,5)
    
    for face in faces:
        x,y,w,h = face
        
        #Get face Region Of Interest
        offset = 10
        face_section = frame[y-offset:y+h+offset,x-offset:x+w+offset]
        face_section = cv2.resize(face_section,(100,100))
        
        out = KNN(trainset , face_section.flatten())
        pred = names[int(out)]
        
        
        cv2.putText(frame , pred , (x,y-10),cv2.FONT_HERSHEY_SIMPLEX,1,(255,0,0),2,cv2.LINE_AA)
        cv2.rectangle(frame , (x,y),(x+w,y+h) , (0,255,255),2)
    cv2.imshow("Faces",frame)
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break
cap.release()
cv2.destroyAllWindows()
        