<a href="https://colab.research.google.com/github/agg-shambhavi/MLH-code/blob/master/IV_ML_pipeline_documented.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Installing and importing essential libraries

In [None]:
pip install face-recognition

Collecting face-recognition
  Downloading https://files.pythonhosted.org/packages/1e/95/f6c9330f54ab07bfa032bf3715c12455a381083125d8880c43cbe76bb3d0/face_recognition-1.3.0-py2.py3-none-any.whl
Collecting face-recognition-models>=0.3.0
[?25l  Downloading https://files.pythonhosted.org/packages/cf/3b/4fd8c534f6c0d1b80ce0973d01331525538045084c73c153ee6df20224cf/face_recognition_models-0.3.0.tar.gz (100.1MB)
[K     |████████████████████████████████| 100.2MB 43kB/s 
Building wheels for collected packages: face-recognition-models
  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone
  Created wheel for face-recognition-models: filename=face_recognition_models-0.3.0-py2.py3-none-any.whl size=100566173 sha256=fb17fa9cf114dd2675e7b57aa781458a7d058b29f9f89bc2064499990bae473b
  Stored in directory: /root/.cache/pip/wheels/d2/99/18/59c6c8f01e39810415c0e63f5bede7d83dfb0ffc039865465f
Successfully built face-recognition-models
Installing collected packages: face-recognition-m

In [None]:
# importing the libraries
import cv2
import matplotlib.pyplot as plt
import face_recognition
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import numpy as np
import os
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

In [None]:
''' The ResizeUtils provides resizing function to keep the aspect ratio intact'''
class ResizeUtils: 
    # Given a target height, adjust the image 
    # by calculating the width and resize 
    def rescale_by_height(self, image, target_height, method = cv2.INTER_LANCZOS4): 
        # Rescale `image` to `target_height` 
        # (preserving aspect ratio) 
        w = int(round(target_height * image.shape[1] / image.shape[0])) 
        return (cv2.resize(image, (w, target_height), interpolation = method)) 
  
    # Given a target width, adjust the image 
    # by calculating the height and resize 
    def rescale_by_width(self, image, target_width, method = cv2.INTER_LANCZOS4):                      
        # Rescale `image` to `target_width`  
        # (preserving aspect ratio) 
        h = int(round(target_width * image.shape[0] / image.shape[1])) 
        return (cv2.resize(image, (target_width, h), interpolation = method)) 

In [None]:
'''To resize the frame images obtained from the video'''
class FramesResizing: 
    # Resize the given input to fit in a specified  
    def AutoResize(self, frame): 
        resizeUtils = ResizeUtils() 
        height, width, _ = frame.shape 
        if height > 500: 
            frame = resizeUtils.rescale_by_height(frame, 500) 
            self.AutoResize(frame) 
        if width > 700: 
            frame = resizeUtils.rescale_by_width(frame, 700) 
            self.AutoResize(frame) 
          
        return frame 

# Extraction of frames from the video

In [None]:
# Extract 1 frame after second from video
# save the frames in a dictionary 
# dictionary has file path of the image, frame number and time stamp 
def GenerateFrames(source): 
    cap = cv2.VideoCapture(source) 
    _, frame = cap.read() 
 
    fps = cap.get(cv2.CAP_PROP_FPS) 
    TotalFrames = cap.get(cv2.CAP_PROP_FRAME_COUNT) 
 
    print("[INFO] Total Frames ", TotalFrames, " @ ", fps, " fps") 
    print("[INFO] Calculating number of frames per second") 

    autosize = FramesResizing()

    output_list = []
 
    CurrentFrame = 1
    fpsCounter = 0
    FrameWrittenCount = 1
    while CurrentFrame < TotalFrames: 
        _, frame = cap.read() 
        if (frame is None): 
            continue
          
        if fpsCounter > fps: 
            fpsCounter = 0
            autosize = FramesResizing()
            frame = autosize.AutoResize(frame) 
 
            filename = "frame_" + str(FrameWrittenCount) + ".jpg"
            filepath = os.path.join( '/content/drive/My Drive/Intelligent Vision/Frames', filename)
            cv2.imwrite(filepath, frame) 
            timestamp = cap.get(propId = 0)
            output_dict = {'filepath':filepath, 'frame_number':FrameWrittenCount,'timestamp':timestamp}
            output_list.append(output_dict)
            FrameWrittenCount += 1
          
        fpsCounter += 1
        CurrentFrame += 1
 
    print('[INFO] Frames extracted') 
    return output_list

# Input Video

In [None]:
# Generating frames from a video and saving them in a dictionary
input_list = GenerateFrames("/content/drive/My Drive/Intelligent Vision/hinal_video.mp4")

[INFO] Total Frames  2029.0  @  30.0  fps
[INFO] Calculating number of frames per second
[INFO] Frames extracted


In [None]:
# how does the dictionary look like
input_list[0:2]

[{'filepath': '/content/drive/My Drive/Intelligent Vision/Frames/frame_1.jpg',
  'frame_number': 1,
  'timestamp': 1066.6666666666667},
 {'filepath': '/content/drive/My Drive/Intelligent Vision/Frames/frame_2.jpg',
  'frame_number': 2,
  'timestamp': 2100.0}]

## Dectection of frames in the frames

In [None]:
# Function that detects faces in the frames 
# appends only those frames where faces are detected in a list
# takes list of dictionaries which has frames information as input
# returns a list of dictionaries
# dictionaries has information like filepath, timestamp, location of face, frame_number and encoding of the face
def face_detector(input_list):
    output_list = []
    for frame_dict in input_list:
        imagepath = frame_dict['filepath'] 
        image = cv2.cvtColor(cv2.imread(imagepath), cv2.COLOR_BGR2RGB)
        boxes = face_recognition.face_locations(image, model = 'cnn')
        if len(boxes)!=0:
            encodings = face_recognition.face_encodings(image,boxes, model ='large')
            for box in boxes:
                output_subdict = {'timestamp':frame_dict['timestamp'],
                                'filepath': frame_dict['filepath'],
                                'frame_number': frame_dict['frame_number'],
                                'box_loc':box,
                                'encodings':encodings[0]}
                output_list.append(output_subdict)
    return output_list

In [None]:
# how does the output list look like?
output = face_detector(input_list)
print(len(output))
print(output[0:2])

68
[{'timestamp': 1066.6666666666667, 'filepath': '/content/drive/My Drive/Intelligent Vision/Frames/frame_1.jpg', 'frame_number': 1, 'box_loc': (164, 401, 262, 303), 'encodings': array([-0.09204042, -0.01314281,  0.03511099,  0.00645922, -0.0141924 ,
       -0.08487569, -0.07276684, -0.09745202,  0.14139694, -0.07149032,
        0.19016232, -0.00948468, -0.10748965, -0.1009154 ,  0.03988665,
        0.05982405, -0.11614557, -0.05562395, -0.01980149, -0.07372502,
        0.00770692,  0.07971621,  0.08769965,  0.09991093, -0.16798052,
       -0.41128671, -0.11024234, -0.18832588, -0.02607009, -0.11204962,
       -0.07710136,  0.0967841 , -0.14902157, -0.01965883,  0.03253515,
        0.11028294,  0.10250863,  0.00179904,  0.17233141,  0.09450549,
       -0.15812048,  0.00464545, -0.00149993,  0.3206335 ,  0.16735914,
       -0.03496288,  0.01498056,  0.001847  ,  0.11346167, -0.14322783,
        0.14416163,  0.04844698,  0.10729957, -0.02870406,  0.06169754,
       -0.08768573, -0.02735

In [None]:
# converting the output list into dataframe 
df = pd.DataFrame(output)
print(df.shape)
df.head()

(68, 5)


Unnamed: 0,timestamp,filepath,frame_number,box_loc,encodings
0,1066.666667,/content/drive/My Drive/Intelligent Vision/Fra...,1,"(164, 401, 262, 303)","[-0.09204041957855225, -0.013142808340489864, ..."
1,2100.0,/content/drive/My Drive/Intelligent Vision/Fra...,2,"(164, 411, 262, 313)","[-0.08225470036268234, 0.014796493574976921, 0..."
2,3133.333333,/content/drive/My Drive/Intelligent Vision/Fra...,3,"(137, 401, 255, 283)","[-0.10313364863395691, 0.00907590426504612, 0...."
3,4166.666667,/content/drive/My Drive/Intelligent Vision/Fra...,4,"(149, 401, 267, 283)","[-0.1145082488656044, -0.025201357901096344, 0..."
4,5200.0,/content/drive/My Drive/Intelligent Vision/Fra...,5,"(164, 381, 262, 283)","[-0.10884711146354675, -0.010970329865813255, ..."


In [None]:
# making a dataset which contains only the encodings of the faces
derived_df = pd.DataFrame(df.encodings.to_list())
derived_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127
0,-0.09204,-0.013143,0.035111,0.006459,-0.014192,-0.084876,-0.072767,-0.097452,0.141397,-0.07149,0.190162,-0.009485,-0.10749,-0.100915,0.039887,0.059824,-0.116146,-0.055624,-0.019801,-0.073725,0.007707,0.079716,0.0877,0.099911,-0.167981,-0.411287,-0.110242,-0.188326,-0.02607,-0.11205,-0.077101,0.096784,-0.149022,-0.019659,0.032535,0.110283,0.102509,0.001799,0.172331,0.094505,...,0.156633,0.05341,-0.050636,0.232027,-0.024876,0.068637,0.091124,0.001615,-0.091291,-0.026707,-0.141475,-0.010826,0.023155,-0.04317,-0.017232,0.082316,-0.230024,0.173763,0.055405,-0.074039,0.066088,0.076008,-0.127651,-0.109028,0.115269,-0.255761,0.093448,0.107997,0.09771,0.218541,0.003399,0.063316,0.007738,0.005917,-0.135763,0.000548,0.054091,0.014914,0.036682,0.015394
1,-0.082255,0.014796,0.026123,-0.002456,-0.028046,-0.052704,-0.045156,-0.107173,0.141906,-0.085737,0.219632,-0.011728,-0.091192,-0.123608,0.032062,0.074019,-0.098014,-0.129223,0.01364,-0.079296,-0.020864,0.018336,0.070188,0.086684,-0.200014,-0.401637,-0.089891,-0.172755,-0.006143,-0.146842,-0.050004,0.106456,-0.156302,0.005996,0.022478,0.109998,0.088335,0.011428,0.180467,0.077977,...,0.190013,0.054094,-0.041964,0.200747,-0.055562,0.083527,0.055504,0.030217,-0.102705,-0.016075,-0.121851,-0.018974,0.010805,-0.043042,-0.027058,0.088542,-0.243977,0.164504,0.019945,-0.062896,0.056447,0.116459,-0.126383,-0.124499,0.082909,-0.258404,0.078266,0.131148,0.075511,0.194087,0.060432,0.038205,0.026126,0.019413,-0.097268,-0.02818,0.065913,-0.026172,0.030011,0.001396
2,-0.103134,0.009076,0.054216,-0.045221,-0.015502,-0.055624,-0.07529,-0.083699,0.124457,-0.08952,0.203505,-0.001025,-0.132225,-0.129175,0.039239,0.069112,-0.134668,-0.09755,-0.009361,-0.104052,0.001922,0.083103,0.065253,0.064775,-0.168008,-0.413111,-0.092113,-0.1792,-0.020886,-0.115215,-0.062315,0.057054,-0.150779,-0.003368,0.039125,0.107389,0.098985,-0.001716,0.156597,0.066227,...,0.170519,0.05474,-0.049636,0.220004,-0.049024,0.106197,0.080352,0.025706,-0.102025,-0.050952,-0.161041,-0.015319,0.03249,-0.042095,-0.011309,0.08147,-0.202557,0.127904,0.031205,-0.093307,0.028299,0.078147,-0.115495,-0.079403,0.093427,-0.268056,0.114279,0.121991,0.079272,0.204564,0.024431,0.010375,0.01772,-0.015893,-0.10241,-0.036578,0.036613,0.002605,0.054833,0.013384
3,-0.114508,-0.025201,0.048241,-0.057659,-0.034296,-0.06796,-0.087113,-0.071205,0.125594,-0.108827,0.170899,-0.016271,-0.130479,-0.118472,0.045015,0.05829,-0.135256,-0.096086,0.020629,-0.090158,-0.016846,0.070146,0.066341,0.100093,-0.192958,-0.398122,-0.087711,-0.175604,-0.049446,-0.11042,-0.05519,0.080735,-0.16308,-0.029423,0.02999,0.128438,0.07937,0.010186,0.155813,0.074058,...,0.186003,0.055424,-0.050231,0.208173,-0.019013,0.103115,0.08128,0.031907,-0.11591,-0.020215,-0.154549,-0.006111,0.049097,-0.027628,0.000757,0.102759,-0.199788,0.13473,0.010032,-0.10253,0.047646,0.088294,-0.123244,-0.116018,0.08873,-0.256512,0.102524,0.131143,0.071689,0.210076,0.024468,0.035111,0.021257,0.000174,-0.105335,-0.035882,0.06934,0.005195,0.063994,0.001513
4,-0.108847,-0.01097,0.039654,-0.05658,-0.046086,-0.051101,-0.072413,-0.058738,0.125877,-0.086487,0.178358,-0.007109,-0.115533,-0.108141,0.037151,0.071415,-0.162532,-0.081398,0.017761,-0.079818,-0.011067,0.094773,0.069971,0.080715,-0.201124,-0.409394,-0.096712,-0.16384,-0.023337,-0.070317,-0.053345,0.096493,-0.168384,-0.024606,0.059536,0.13275,0.064042,0.012,0.153062,0.065547,...,0.20236,0.043926,-0.046381,0.211903,-0.022075,0.091521,0.08639,0.02142,-0.104468,-0.024826,-0.158792,-0.004256,0.041175,-0.050274,0.001017,0.103325,-0.195385,0.129035,0.020665,-0.090345,0.051867,0.073522,-0.12795,-0.100602,0.099897,-0.257201,0.114366,0.108534,0.048984,0.212177,0.012098,0.037871,-0.003642,-0.013886,-0.109271,-0.029397,0.06817,-0.004663,0.081541,0.004952


# Input Image
Reading the query images and encoding them.

In [None]:
yash_pic = plt.imread("/content/drive/My Drive/Intelligent Vision/yash.jpg")
kalp_pic = plt.imread("/content/drive/My Drive/Intelligent Vision/kalp_1.jpg")
sham_pic = plt.imread("/content/drive/My Drive/Intelligent Vision/shambhavii.jpg")
tanvi_pic = plt.imread("/content/drive/My Drive/Intelligent Vision/tanvi_1.jpg")

yash_encoding = pd.DataFrame(face_recognition.face_encodings(yash_pic, model ='large'))
kalp_encoding = pd.DataFrame(face_recognition.face_encodings(kalp_pic, model ='large'))
sham_encoding = pd.DataFrame(face_recognition.face_encodings(sham_pic, model ='large'))
tanvi_encoding = pd.DataFrame(face_recognition.face_encodings(tanvi_pic, model ='large'))

# Clustering Using DBSCAN
Clustering the faces of the same person appearning in the different frames in one cluster

In [None]:
# creating an object of DBSCAN 
db = DBSCAN(eps = 0.4, metric ="euclidean").fit(derived_df)

In [None]:
# Finding the number of clusters formed
labelIDs = np.unique(db.labels_) 
numUniqueFaces = len(np.where(labelIDs > -1)[0]) 
print("[INFO] # unique faces: {}".format(numUniqueFaces)) 

[INFO] # unique faces: 4


In [None]:
labelIDs

array([-1,  0,  1,  2,  3])

In [None]:
# MAPPING THE CLUSTERING RESULTS WITH REAL RESULTS

dbscan_results = []
for output_dict, lbl in zip(output,db.labels_):
  i = 'None'
  if 1 <= output_dict['frame_number'] <= 16:
    i = 'Yash'
  elif 18 <= output_dict['frame_number'] <= 33:
    i = 'Tanvi'
  elif 35 <= output_dict['frame_number'] <= 56:
    i = 'Sham'
  elif 35 <= output_dict['frame_number'] <= 65:
    i = 'Kalp'
  pair_list = [output_dict['frame_number'],lbl,i]
  dbscan_results.append(pair_list)

dbscan_results_df = pd.DataFrame(dbscan_results, columns=['frame_number','dbscan_label','True_result'])
dbscan_results_df.sample(n=5)

Unnamed: 0,frame_number,dbscan_label,True_result
7,8,0,Yash
31,31,1,Tanvi
12,13,0,Yash
20,20,1,Tanvi
62,60,-1,Kalp


## Converting Unsupervised learning problem into Supervised learning problem using the labels obtained from clustering algorithm 

In [None]:
# x_train are the enconding of the faces
# y_train are the labels of the cluster for that face
x_train = derived_df
y_train = np.array(db.labels_)

In [None]:
# encoding the y_train to make it suitable for ml-algos
out_encoder = LabelEncoder()
out_encoder.fit(y_train)
y_train = out_encoder.transform(y_train)

In [None]:
# MAPPING THE ENCODED y_train LABELS WITH REAL RESULTS

mapping_y_train = []
for output_dict, lbl in zip(output,y_train):
  i = 'None'
  if 1 <= output_dict['frame_number'] <= 16:
    i = 'Yash'
  elif 18 <= output_dict['frame_number'] <= 33:
    i = 'Tanvi'
  elif 35 <= output_dict['frame_number'] <= 56:
    i = 'Sham'
  elif 35 <= output_dict['frame_number'] <= 65:
    i = 'Kalp'
  pair_list = [output_dict['frame_number'],lbl,i]
  mapping_y_train.append(pair_list)

mapping_y_train = pd.DataFrame(mapping_y_train, columns=['frame_number','y_label','True_result'])
mapping_y_train.sample(n=10)

Unnamed: 0,frame_number,y_label,True_result
26,26,2,Tanvi
61,59,4,Kalp
41,40,3,Sham
17,17,0,
32,32,2,Tanvi
28,28,2,Tanvi
47,46,3,Sham
35,34,3,
38,37,3,Sham
25,25,2,Tanvi


In [None]:
# adding the information of encoded y_label to the frame list

back_mapping = []
for y_lbl, frame_dict in zip(y_train,output):
  time_min = int(frame_dict['frame_number']) // 60
  time_sec = int(frame_dict['frame_number']) % 60
  final_dict = {'timestamp':frame_dict['timestamp'],
                'filepath': frame_dict['filepath'],
                'frame_number': frame_dict['frame_number'],
                'box_loc':frame_dict['box_loc'],
                'encodings':frame_dict['encodings'],
                'labels': y_lbl,
                'time_in_min':str(time_min)+":"+str(time_sec)}
  back_mapping.append(final_dict)

Mapping_df = pd.DataFrame(back_mapping)
Mapping_df.sample(n=5)

Unnamed: 0,timestamp,filepath,frame_number,box_loc,encodings,labels,time_in_min
20,20700.0,/content/drive/My Drive/Intelligent Vision/Fra...,20,"(137, 401, 255, 283)","[-0.1576208770275116, 0.004803510382771492, 0....",2,0:20
56,56866.666667,/content/drive/My Drive/Intelligent Vision/Fra...,55,"(43, 414, 185, 272)","[-0.09919435530900955, 0.06980714946985245, 0....",3,0:55
67,67200.0,/content/drive/My Drive/Intelligent Vision/Fra...,65,"(104, 421, 203, 323)","[-0.07170265913009644, 0.04573126137256622, 0....",4,1:5
7,8300.0,/content/drive/My Drive/Intelligent Vision/Fra...,8,"(144, 401, 242, 303)","[-0.10893487930297852, -0.010787680745124817, ...",1,0:8
10,11400.0,/content/drive/My Drive/Intelligent Vision/Fra...,11,"(125, 413, 243, 295)","[-0.07877559214830399, 0.02332720160484314, 0....",1,0:11


## Training SVM to predict the cluster of the query image

In [None]:
# instantiating svm class and fitting the dataset into it 
svm_dbscan = SVC(kernel='linear', probability=True)
svm_dbscan.fit(x_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [None]:
# checing the accuracy of svm on training data
yhat_train = svm_dbscan.predict(x_train)
score_train = accuracy_score(y_train, yhat_train)
print(score_train)

0.9705882352941176


# Predicting the cluster of the query image

In [None]:
print("Yash's label is 1 and model predicts {} ".format(svm_dbscan.predict(yash_encoding)[0]))
print("Tanvi's label is 2 and model predicts {} ".format(svm_dbscan.predict(tanvi_encoding)[0]))
print("Sham's label is 3 and model predicts {} ".format(svm_dbscan.predict(sham_encoding)[0]))
print("Kalp's label is 4 and model predicts {} ".format(svm_dbscan.predict(kalp_encoding)[0]))

Yash's label is 1 and model predicts 1 
Tanvi's label is 2 and model predicts 2 
Sham's label is 3 and model predicts 3 
Kalp's label is 4 and model predicts 4 


# Retrieving frames of the predicted cluster 

In [None]:
return_info = Mapping_df[Mapping_df['labels']==1].loc[:,['time_in_min','filepath']]
return_info

Unnamed: 0,time_in_min,filepath
0,0:1,/content/drive/My Drive/Intelligent Vision/Fra...
1,0:2,/content/drive/My Drive/Intelligent Vision/Fra...
2,0:3,/content/drive/My Drive/Intelligent Vision/Fra...
3,0:4,/content/drive/My Drive/Intelligent Vision/Fra...
4,0:5,/content/drive/My Drive/Intelligent Vision/Fra...
5,0:6,/content/drive/My Drive/Intelligent Vision/Fra...
6,0:7,/content/drive/My Drive/Intelligent Vision/Fra...
7,0:8,/content/drive/My Drive/Intelligent Vision/Fra...
8,0:9,/content/drive/My Drive/Intelligent Vision/Fra...
9,0:10,/content/drive/My Drive/Intelligent Vision/Fra...


# Organizing code into classes to be 
So, that I can write it into python script

In [None]:
''' The ResizeUtils provides resizing function to keep the aspect ratio intact'''
class ResizeUtils: 
    # Given a target height, adjust the image 
    # by calculating the width and resize 
    def rescale_by_height(self, image, target_height, method = cv2.INTER_LANCZOS4): 
        # Rescale `image` to `target_height` 
        # (preserving aspect ratio) 
        w = int(round(target_height * image.shape[1] / image.shape[0])) 
        return (cv2.resize(image, (w, target_height), interpolation = method)) 
  
    # Given a target width, adjust the image 
    # by calculating the height and resize 
    def rescale_by_width(self, image, target_width, method = cv2.INTER_LANCZOS4):                      
        # Rescale `image` to `target_width`  
        # (preserving aspect ratio) 
        h = int(round(target_width * image.shape[0] / image.shape[1])) 
        return (cv2.resize(image, (target_width, h), interpolation = method)) 


'''To resize the frame images obtained from the video'''
class FramesResizing: 
    # Resize the given input to fit in a specified  
    def AutoResize(self, frame): 
        resizeUtils = ResizeUtils() 
        height, width, _ = frame.shape 
        if height > 500: 
            frame = resizeUtils.rescale_by_height(frame, 500) 
            self.AutoResize(frame) 
        if width > 700: 
            frame = resizeUtils.rescale_by_width(frame, 700) 
            self.AutoResize(frame) 
          
        return frame 

''' generate frames from a video'''
class FrameGenerator:
  # Function that will genearte frames from video file source
  # vid_fp is the parameter to passed which is the path to file sorce
  def GenerateFrames(self,vid_fp): 
    cap = cv2.VideoCapture(vid_fp) 
    _, frame = cap.read() 
 
    fps = cap.get(cv2.CAP_PROP_FPS) 
    TotalFrames = cap.get(cv2.CAP_PROP_FRAME_COUNT) 
 
    print("[INFO] Total Frames ", TotalFrames, " @ ", fps, " fps") 
    print("[INFO] Calculating number of frames per second") 

    autosize = FramesResizing()

    output_list = []
 
    CurrentFrame = 1
    fpsCounter = 0
    FrameWrittenCount = 1
    while CurrentFrame < TotalFrames: 
        _, frame = cap.read() 
        if (frame is None): 
            continue
          
        if fpsCounter > fps: 
            fpsCounter = 0
            autosize = FramesResizing()
            frame = autosize.AutoResize(frame) 
 
            filename = "frame_" + str(FrameWrittenCount) + ".jpg"
            filepath = os.path.join( '/content/drive/My Drive/Intelligent Vision/Frames', filename)
            cv2.imwrite(filepath, frame) 
            timestamp = cap.get(propId = 0)
            output_dict = {'filepath':filepath, 'frame_number':FrameWrittenCount,'timestamp':timestamp}
            output_list.append(output_dict)
            FrameWrittenCount += 1
          
        fpsCounter += 1
        CurrentFrame += 1
 
    print('[INFO] Frames extracted') 
    return output_list

''' detect face from frame list and returns a list of detected faces'''
class DetectFaces:
  # Function detects faces from the list of frames
  # parameter - frame_list, which is list of frames
  def face_detector(self,frame_list):
    output_list = []
    for frame_dict in frame_list:
        imagepath = frame_dict['filepath'] 
        image = cv2.cvtColor(cv2.imread(imagepath), cv2.COLOR_BGR2RGB)
        boxes = face_recognition.face_locations(image, model = 'cnn')
        if len(boxes)!=0:
            encodings = face_recognition.face_encodings(image,boxes, model ='large')
            for box in boxes:
                output_subdict = {'timestamp':frame_dict['timestamp'],
                                'filepath': frame_dict['filepath'],
                                'frame_number': frame_dict['frame_number'],
                                'box_loc':box,
                                'encodings':encodings[0]}
                output_list.append(output_subdict)
    return output_list

class DataFrameUtils:
  # Function to create pandas dataframe of a list which has dictionaries
  def createDataFrame(self,input_list):
    df = pd.DataFrame(input_list)
    print("[INFO] The shape of df is {}".format(df.shape))
    return df

# class that reads and encodes the query image
# takes the query image as parameter
class ImageReaderEncoder:
  def __init__(self,image_fp):
    self.image_fp = image_fp

  def image_read_encode(self):
    read_img = plt.imread(self.image_fp)
    img_encoding = pd.DataFrame(face_recognition.face_encodings(read_img, model ='large'))
    return img_encoding

# class performs dbscan clustering on ecodings of the faces
class dbscan_clustering:
  def db_cluster(self,derived_df):
    dbscan_model = DBSCAN(eps = 0.4, metric ="euclidean").fit(derived_df)
    print("[INFO] Number of unique clusters {}".format(len(np.where((np.unique(dbscan_model.labels_)) > -1)[0])))
    return dbscan_model.labels_

# svm class which prepares the dataset, fits the model and predicts the cluster of the query image
# takes dataframe of face encodings and labels of those encodings
class TrainSVM:
  def __init__(self, derived_df, labels_of_dbscan,image_enc):
    self.derived_df = derived_df
    self.labels_of_dbscan = labels_of_dbscan
    self.image_enc = image_enc


  def prepare_dataset(self):
    x_train = self.derived_df
    y_train = np.array(self.labels_of_dbscan)
    out_encoder = LabelEncoder()
    out_encoder.fit(y_train)
    y_train = out_encoder.transform(y_train)
    return x_train,y_train

  def fit_model(self):
    svm_dbscan = SVC(kernel='linear', probability=True)
    x_train, y_train = self.prepare_dataset()
    svm_dbscan.fit(x_train, y_train)
    yhat_train = svm_dbscan.predict(x_train)
    score_train = accuracy_score(y_train, yhat_train)
    print("[INFO] Accuracy of SVM Classifier is {}".format(score_train))
    return svm_dbscan

  def predict_img_cluster(self):
    svm_dbscan = self.fit_model()
    return svm_dbscan.predict(self.image_enc)[0]

# Class that retrievs frames of the predicted cluster
class Retrieve_instances:
  def __init__(self, y_train_lbl, detected_faces_list, predicted_cluster_number):
    self.y_train_lbl = y_train_lbl
    self.detected_faces_list = detected_faces_list
    self.predicted_cluster_number = predicted_cluster_number

  def back_mapping(self):
    back_mapping = []
    for y_lbl, frame_dict in zip(self.y_train_lbl,self.detected_faces_list):
      time_min = int(frame_dict['frame_number']) // 60
      time_sec = int(frame_dict['frame_number']) % 60
      final_dict = {'timestamp':frame_dict['timestamp'],
                'filepath': frame_dict['filepath'],
                'frame_number': frame_dict['frame_number'],
                'box_loc':frame_dict['box_loc'],
                'encodings':frame_dict['encodings'],
                'labels': y_lbl,
                'time_in_min':str(time_min)+":"+str(time_sec)}
      back_mapping.append(final_dict)
    Mapping_df = pd.DataFrame(back_mapping)
    return Mapping_df

  def df_to_retrieve(self):
    Mapping_df = self.back_mapping()
    cluster_df = Mapping_df[Mapping_df['labels']==self.predicted_cluster_number].loc[:,['time_in_min','filepath']]
    return cluster_df

# final class which takes only the input image and input video
# returns the instances at which person in the input image appeared in the the input video
class img_to_vid_match:
  def __init__(self, image_fp, video_fp):
    self.image_fp = image_fp
    self.video_fp = video_fp

  def Final_Match(self):
    generate_frames_list = FrameGenerator().GenerateFrames(self.video_fp)
    detected_faces_list = DetectFaces().face_detector(generate_frames_list)
    full_dataset = DataFrameUtils().createDataFrame(detected_faces_list)
    encodings_datframe = pd.DataFrame(full_dataset.encodings.to_list())
    image_encoding = ImageReaderEncoder(self.image_fp).image_read_encode()
    dbscan_labels = dbscan_clustering().db_cluster(encodings_datframe)
    svm = TrainSVM(encodings_datframe,dbscan_labels,image_encoding)
    _ , y_train = svm.prepare_dataset()
    pred_cluster = svm.predict_img_cluster()
    final_df = Retrieve_instances(y_train,detected_faces_list,pred_cluster).df_to_retrieve()
    return final_df

In [None]:
# testing the final class
match = img_to_vid_match('/content/drive/My Drive/Intelligent Vision/kalp_1.jpg',
                         '/content/drive/My Drive/Intelligent Vision/hinal_video.mp4')
final_df = match.Final_Match()
final_df

[INFO] Total Frames  2029.0  @  30.0  fps
[INFO] Calculating number of frames per second
[INFO] Frames extracted
[INFO] The shape of df is (68, 5)
[INFO] Number of unique clusters 4
[INFO] Accuracy of SVM Classifier is 0.9705882352941176


Unnamed: 0,time_in_min,filepath
61,0:59,/content/drive/My Drive/Intelligent Vision/Fra...
63,1:1,/content/drive/My Drive/Intelligent Vision/Fra...
64,1:2,/content/drive/My Drive/Intelligent Vision/Fra...
66,1:4,/content/drive/My Drive/Intelligent Vision/Fra...
67,1:5,/content/drive/My Drive/Intelligent Vision/Fra...
