In [None]:
import cv2 as cv
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from mtcnn.mtcnn import MTCNN
from keras_facenet import FaceNet
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img

## Data scrapping

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!cp /content/drive/MyDrive/Augmented_photos.zip /content/


In [None]:
import zipfile
with zipfile.ZipFile('/content/Augmented_photos.zip', 'r') as zip_ref:
    zip_ref.extractall('/content/Augmented_photos')

## Data preparation

In [None]:
import os
import shutil

# Define the directory containing student images and the target directory
source_directory = "/content/Student Photos/Student Photos"
target_directory = "organized_student_images"

# Create the target directory if it doesn't exist
os.makedirs(target_directory, exist_ok=True)

# Loop through each file in the source directory
for filename in os.listdir(source_directory):
    if filename.endswith(".jpg"):
        # Extract the student name from the filename (e.g., John_Doe.jpg -> John_Doe)
        student_name = os.path.splitext(filename)[0]

        # Create a directory for each student
        student_directory = os.path.join(target_directory, student_name)
        os.makedirs(student_directory, exist_ok=True)

        # Copy the image to the student's directory
        source_file = os.path.join(source_directory, filename)
        target_file = os.path.join(student_directory, filename)
        shutil.copy(source_file, target_file)
        print(f"Copied {filename} to {student_directory}")

print("Images organized into folders.")


Copied Lakkam Rakesh_231030030_HALL14_186.jpg to organized_student_images/Lakkam Rakesh_231030030_HALL14_186
Copied Prashant Bisht_231050065_HALL14_501.jpg to organized_student_images/Prashant Bisht_231050065_HALL14_501
Copied Vishw Dipak Patel_231010087_HALL14_55.jpg to organized_student_images/Vishw Dipak Patel_231010087_HALL14_55
Copied Dhiraj Pareek_231110012_HALL14_216.jpg to organized_student_images/Dhiraj Pareek_231110012_HALL14_216
Copied Viswanath E D_231050410_HALL14_487.jpg to organized_student_images/Viswanath E D_231050410_HALL14_487
Copied Devansh Jindal_231140007_HALL14_327.jpg to organized_student_images/Devansh Jindal_231140007_HALL14_327
Copied Vishal Kumar_231050101_HALL14_540.jpg to organized_student_images/Vishal Kumar_231050101_HALL14_540
Copied Shashank Singh_231290014_HALL14_582.jpg to organized_student_images/Shashank Singh_231290014_HALL14_582
Copied Abhay Sharma_231030001_HALL14_190.jpg to organized_student_images/Abhay Sharma_231030001_HALL14_190
Copied Debc

In [None]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img

# Initialize the ImageDataGenerator with augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Path to the organized student images directory
organized_directory = "/content/organized_student_images"

# Loop through each student's folder and apply augmentation
for student_name in os.listdir(organized_directory):
    student_directory = os.path.join(organized_directory, student_name)

    for filename in os.listdir(student_directory):
        if filename.endswith(".jpg"):
            # Load the image
            img_path = os.path.join(student_directory, filename)
            img = load_img(img_path)
            x = img_to_array(img)
            x = x.reshape((1,) + x.shape)  # Reshape for the ImageDataGenerator

            # Generate and save augmented images
            i = 0
            for batch in datagen.flow(x, batch_size=1, save_to_dir=student_directory, save_prefix=student_name, save_format='jpg'):
                i += 1
                if i >= 10:  # Generate 10 augmented images per original image
                    break

print("Data augmentation completed.")


Data augmentation completed.


In [None]:

from google.colab import files

# Specify the path to your file
#file_path = "/content/organized_student_images"

# Download the file
files.download("/content/organized_student_images")

In [None]:
import shutil
from google.colab import files

# Specify the folder you want to zip
folder_to_zip = "/content/organized_student_images"
zip_filename = "Augmented_photos.zip"

# Create a zip file
shutil.make_archive(zip_filename.replace('.zip', ''), 'zip', folder_to_zip)

# Download the zip file
files.download(zip_filename)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Facenet model


In [5]:
class FACELOADING:
    def __init__(self, directory):
        self.directory = directory
        self.target_size = (160,160)
        self.X = []
        self.Y = []
        self.detector = MTCNN()
        self.count = 0

    def extract_face(self, filename):
        img = cv.imread(filename)
        img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        x,y,w,h = self.detector.detect_faces(img)[0]['box']
        x,y = abs(x), abs(y)
        face = img[y:y+h, x:x+w]
        face_arr = cv.resize(face, self.target_size)
        return face_arr


    def load_faces(self, dir):
        FACES = []
        for im_name in os.listdir(dir):
            try:
                path = dir + im_name
                single_face = self.extract_face(path)
                FACES.append(single_face)
            except Exception as e:
                pass
        return FACES

    def load_classes(self):
        for sub_dir in os.listdir(self.directory):
            path = self.directory +'/'+ sub_dir+'/'
            FACES = self.load_faces(path)
            labels = [sub_dir for _ in range(len(FACES))]
            print(f"Loaded successfully: {len(labels)}")
            self.count += 1
            print(f"count of this student is {self.count}")
            print(self.count)
            self.X.extend(FACES)
            self.Y.extend(labels)

        return np.asarray(self.X), np.asarray(self.Y)


    def plot_images(self):
        plt.figure(figsize=(18,16))
        for num,image in enumerate(self.X):
            ncols = 3
            nrows = len(self.Y)//ncols + 1
            plt.subplot(nrows,ncols,num+1)
            plt.imshow(image)
            plt.axis('off')

In [None]:
faceloading = FACELOADING("/content/Augmented_photos")
X, Y = faceloading.load_classes()





[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Loaded successfully: 11
count of this student is 532
532
Loaded successfully: 11
count of this student is 533
533
Loaded successfully: 11
count of this student is 534
534
Loaded successfully: 11
count of this student is 535
535
Loaded successfully: 11
count of this student is 536
536
Loaded successfully: 11
count of this student is 537
537
Loaded successfully: 11
count of this student is 538
538
Loaded successfully: 11
count of this student is 539
539
Loaded successfully: 11
count of this student is 540
540
Loaded successfully: 11
count of this student is 541
541
Loaded successfully: 11
count of this student is 542
542
Loaded successfully: 11
count of this student is 543
543
Loaded successfully: 11
count of this student is 544
544
Loaded successfully: 11
count of this student is 545
545
Loaded successfully: 11
count of this student is 546
546
Loaded successfully: 11
count of this student is 547
547
Loaded successfully: 11

In [None]:
import numpy as np
from google.colab import files

# Save the face images and labels to .npz files
np.savez('/content/faces_labels.npz', faces=X, labels=Y)

# Download the .npz file to your local machine
files.download('/content/faces_labels.npz')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import drive



# Save the face images and labels to .npz files
np.savez_compressed('/content/drive/MyDrive/faces_labels.npz', faces=X, labels=Y)


In [3]:
from keras_facenet import FaceNet
embedder = FaceNet()

def get_embedding(face_img):
    face_img = face_img.astype('float32') # 3D(160x160x3)
    face_img = np.expand_dims(face_img, axis=0)
    # 4D (Nonex160x160x3)
    yhat= embedder.embeddings(face_img)

    return yhat[0] # 512D image (1x1x512)

In [None]:
EMBEDDED_X = []
count = 0
for img in X:
    EMBEDDED_X.append(get_embedding(img))
    count +=1
    print(count)

EMBEDDED_X = np.asarray(EMBEDDED_X)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866
3867
3868
3869
3870
3871
3872
3873
3874
3875
3876
3877
3878
3879
3880
3881
3882
3883
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896
3897
3898
3899
3900
3901
3902
3903
3904
3905
3906
3907
3908
3909
3910
3911
3912
3913
3914
3915
3916
3917
3918
3919
3920
3921
3922
3923
3924
3925
3926
3927
3928
3929
3930
3931
3932
3933
3934
3935
3936
3937
3938
3939
3940
3941
3942
3943
3944
3945
3946
3947
3948
3949
3950
3951
3952
3953
3954
3955
3956
3957
3958
3959
3960
3961
3962
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977
3978
3979
3980
3981
3982
3983
3984
3985
3986
3987
3988
3989
3990
3991
3992
3993
3994
3995
3996
3997
3998
3999
4000
4001
4002
4003
4004
4005
4006
4007
4008
4009
4010
4011
4012
4013
4014
4015
4016
4017
4018
4019
4020
4021
4022
4023
4024
4025
4026
4027
4028
4029
4030
4031
4032
4033
4034
4035
4036
4037
4038
4039
4040
4041


In [None]:
np.savez_compressed('faces_embeddings_done_4classes.npz', EMBEDDED_X, Y)

In [None]:
from google.colab import files

# Download the .npz file
files.download('faces_embeddings_done_4classes.npz')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import drive
# Save the face images and labels to .npz files
np.savez_compressed('/content/drive/MyDrive/faces_embeddings_done_4classes.npz', embeddings=EMBEDDED_X, labels=Y)


### Filtering Data

This are the students with improper face image, so i  have to  remove them from the dataset.

In [3]:
# Aravind Potluri_231040024_HALL14_322,
# Balu Savarapu_231040034_HALL14_391,
# Gourav Salla_231040047_HALL14_350,
# Hemang Mohanlal Khatri_231110016_HALL14_184,
# Pratik Kumar_230781_HALL14_412,
# Sai Rama Sekhar Kancharla_231050079_HALL14_467,
# Sameem Nazir Lone_231010066_HALL14_106,
# Suraj Jaiswal_231040414_HALL14_289,
# Vedant Salphale_231010085_HALL14_78,
# Vishal Kumar_231110058_HALL14_236

In [5]:
import numpy as np

# Assuming faces_embeddings and Y_Labels are already loaded
# faces_embeddings: array of embeddings
# Y_Labels: array of labels with names in the format 'Name_RollNo_Hall_Room'

roll_numbers_to_remove = ['231110058', '231010085', '231040414', '231010066', '231050079', '230781', '231110016', '231040047', '231040034', '231040024']

# Load your embeddings and labels
faces_embeddings = np.load("faces_embeddings_done_4classes.npz")['arr_0']  # Assuming embeddings are in 'arr_0'
Y_Labels = np.load("faces_embeddings_done_4classes.npz")['arr_1']  # Assuming labels are in 'arr_1'

# Count the occurrences of each roll number in the unfiltered data
roll_number_counts = {roll_no: 0 for roll_no in roll_numbers_to_remove}
for label in Y_Labels:
    for roll_no in roll_numbers_to_remove:
        if roll_no in label:
            roll_number_counts[roll_no] += 1

# Display the counts
for roll_no, count in roll_number_counts.items():
    print(f"Roll number {roll_no} occurs {count} times in the unfiltered data.")

# Filter the labels and embeddings
filtered_indices = [i for i, label in enumerate(Y_Labels) if not any(roll_no in label for roll_no in roll_numbers_to_remove)]
filtered_embeddings = faces_embeddings[filtered_indices]
filtered_labels = np.array(Y_Labels)[filtered_indices]

# Verify the removal
print(f"Original number of embeddings: {len(Y_Labels)}")
print(f"Number of embeddings after removal: {len(filtered_labels)}")
print(f"No of embedding removed is : {len(Y_Labels)-len(filtered_labels)}")



Roll number 231110058 occurs 11 times in the unfiltered data.
Roll number 231010085 occurs 9 times in the unfiltered data.
Roll number 231040414 occurs 11 times in the unfiltered data.
Roll number 231010066 occurs 11 times in the unfiltered data.
Roll number 231050079 occurs 11 times in the unfiltered data.
Roll number 230781 occurs 11 times in the unfiltered data.
Roll number 231110016 occurs 11 times in the unfiltered data.
Roll number 231040047 occurs 11 times in the unfiltered data.
Roll number 231040034 occurs 11 times in the unfiltered data.
Roll number 231040024 occurs 11 times in the unfiltered data.
Original number of embeddings: 6354
Number of embeddings after removal: 6246
No of embedding removed is : 108


In [6]:
# Save the filtered embeddings and labels if needed
np.savez_compressed("filtered_faces_embeddings.npz", arr_0=filtered_embeddings, arr_1=filtered_labels)

In [8]:
faces_embeddings = np.load("filtered_faces_embeddings.npz")
EMBEDDED_X = faces_embeddings['arr_0']
Y = faces_embeddings['arr_1']
print(len(EMBEDDED_X))
print(len(Y))

6246
6246


## SVM Model 

In [9]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
encoder.fit(Y)
Y = encoder.transform(Y)

In [10]:
plt.plot(EMBEDDED_X[89])
plt.ylabel(Y[89])

NameError: name 'plt' is not defined

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(EMBEDDED_X, Y, shuffle=True, random_state=17)

In [12]:
from sklearn.svm import SVC
model = SVC(kernel='linear', probability=True)
model.fit(X_train, Y_train)

In [13]:
ypreds_train = model.predict(X_train)
ypreds_test = model.predict(X_test)

In [14]:
from sklearn.metrics import accuracy_score

accuracy_score(Y_train, ypreds_train)

0.9995730145175064

In [15]:
accuracy_score(Y_test,ypreds_test)

0.9974391805377721

In [60]:
t_im = cv.imread("D:\SSearch project\pooja\pooja.jpg")
t_im = cv.cvtColor(t_im, cv.COLOR_BGR2RGB)
x,y,w,h = detector.detect_faces(t_im)[0]['box']
t_im = t_im[y:y+h, x:x+w]
t_im = cv.resize(t_im, (160,160))
test_im = get_embedding(t_im)

  t_im = cv.imread("D:\SSearch project\pooja\pooja.jpg")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

In [55]:
test_im = [test_im]
ypreds = model.predict(test_im)

In [56]:
ypreds

array([105], dtype=int64)

In [59]:
encoder.inverse_transform(ypreds)

array(['Ashish Kumar_231040028_HALL14_347'], dtype='<U60')

In [16]:
import pickle
#save the model
with open('svm_model_160x160.pkl','wb') as f:
    pickle.dump(model,f)

In [None]:
# Download the saved model file to your local machine
files.download('svm_model_160x160.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Checking SVM model


In [20]:
import cv2 as cv
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
from mtcnn.mtcnn import MTCNN

In [19]:
from keras_facenet import FaceNet
embedder = FaceNet()




In [21]:
detector = MTCNN()

In [17]:
import numpy as np

# Load the .npz file
data = np.load('filtered_faces_embeddings.npz')

In [22]:
# Access the embeddings and labels
EMBEDDED_X_faces = data['arr_0']
Y_label = data['arr_1']

# Verify the loaded data
print(f'Embeddings shape: {EMBEDDED_X_faces.shape}')
print(f'Labels shape: {Y_label.shape}')

Embeddings shape: (6246, 512)
Labels shape: (6246,)


In [23]:
import pickle

with open("svm_model_160x160.pkl", 'rb') as f:
    model = pickle.load(f)

In [24]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
encoder.fit(Y_label)
Y = encoder.transform(Y_label)

In [25]:
EMBEDDED_X = EMBEDDED_X_faces

### checking for a test image

In [11]:
def get_embedding(face_img):
    face_img = face_img.astype('float32') # 3D(160x160x3)
    face_img = np.expand_dims(face_img, axis=0)
    # 4D (Nonex160x160x3)
    yhat= embedder.embeddings(face_img)

    return yhat[0] # 512D image (1x1x512)

In [12]:
t_im = cv.imread("D:\SSearch project\student_photo_3.jpg")
t_im = cv.cvtColor(t_im, cv.COLOR_BGR2RGB)
x,y,w,h = detector.detect_faces(t_im)[0]['box']
t_im = t_im[y:y+h, x:x+w]
t_im = cv.resize(t_im, (160,160))
test_im = get_embedding(t_im)
test_im = [test_im]
ypreds = model.predict(test_im)
names = encoder.inverse_transform(ypreds)
print(names)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step


  t_im = cv.imread("D:\SSearch project\student_photo_3.jpg")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 138ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 145ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
['Akshat Hemang Jani_231010008_HALL14_3']


In [22]:
def  verify(t_im,model=model,encode=encoder,threshold=0.01):
    #t_im = cv.imread("D:\SSearch project\student_photo_3.jpg")
    t_im = cv.cvtColor(t_im, cv.COLOR_BGR2RGB)
    x,y,w,h = detector.detect_faces(t_im)[0]['box']
    t_im = t_im[y:y+h, x:x+w]
    t_im = cv.resize(t_im, (160,160))
    test_im = get_embedding(t_im)
    test_im = [test_im]
    probabilities = model.predict_proba(test_im)[0]
    max_prob = np.max(probabilities)
    print(max_prob)
    if max_prob < threshold:
        return "unknown"
    else:
        return encoder.inverse_transform([np.argmax(probabilities)])[0]
    # ypreds = model.predict(test_im)
    # names = encoder.inverse_transform(ypreds)
    # return names


In [18]:
threshold

0.01

In [53]:
t_im = cv.imread("D:\SSearch project\pooja\pooja.jpg")
t_im = cv.cvtColor(t_im, cv.COLOR_BGR2RGB)
x,y,w,h = detector.detect_faces(t_im)[0]['box']
t_im = t_im[y:y+h, x:x+w]
t_im = cv.resize(t_im, (160,160))
test_im = get_embedding(t_im)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step

  t_im = cv.imread("D:\SSearch project\pooja\pooja.jpg")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

In [1]:
# face recognition part II
#IMPORT
import cv2 as cv
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
import pickle
from keras_facenet import FaceNet
#INITIALIZE
facenet = FaceNet()
faces_embeddings = np.load("faces_embeddings_done_4classes.npz")
Y = faces_embeddings['arr_1']
encoder = LabelEncoder()
encoder.fit(Y)
haarcascade = cv.CascadeClassifier("haarcascade_frontalface_default.xml")
model = pickle.load(open("svm_model_160x160.pkl", 'rb'))






In [2]:
from mtcnn.mtcnn import MTCNN
detector = MTCNN()
cap = cv.VideoCapture(0)
# WHILE LOOP

while cap.isOpened():
    _, frame = cap.read()
    rgb_img = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
    gray_img = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)

    
    faces = haarcascade.detectMultiScale(gray_img, 1.3, 5)
    for x,y,w,h in faces:
        img = rgb_img[y:y+h, x:x+w]
        img = cv.resize(img, (160,160)) # 1x160x160x3
        img = np.expand_dims(img,axis=0)
        
        ypred = facenet.embeddings(img)
        face_name = model.predict(ypred)
        final_name = encoder.inverse_transform(face_name)[0]
        cv.rectangle(frame, (x,y), (x+w,y+h), (0,255,255), 3)
        cv.putText(frame, str(final_name), (x,y-10), cv.FONT_HERSHEY_SIMPLEX,1, (0,0,255), 3, cv.LINE_AA)

    cv.imshow("Face Recognition:", frame)
    
    
    if cv.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms

In [1]:
# IMPORT
import cv2 as cv
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
import pickle
from keras_facenet import FaceNet
import pandas as pd
from datetime import datetime
from mtcnn.mtcnn import MTCNN

# INITIALIZE
facenet = FaceNet()
faces_embeddings = np.load("filtered_faces_embeddings.npz")
Y = faces_embeddings['arr_1']
encoder = LabelEncoder()
encoder.fit(Y)
haarcascade = cv.CascadeClassifier("haarcascade_frontalface_default.xml")
model = pickle.load(open("svm_model_160x160.pkl", 'rb'))
detector = MTCNN()




In [1]:
import numpy as np

# Load the embeddings and labels
faces_embeddings = np.load("filtered_faces_embeddings.npz")
Y = faces_embeddings['arr_1']

# Find unique labels
unique_labels = np.unique(Y)

# Print the number of unique labels and the unique labels themselves
print(f"Number of unique labels: {len(unique_labels)}")
print(f"Unique labels: {unique_labels}")


Number of unique labels: 570
Unique labels: ['Aakash Gupta_231040401_HALL14_335'
 'Aakashdeep Singh_231110001_HALL14_233'
 'Aayush Chaudhari_231040001_HALL14_399'
 'Abhay Sharma_231030001_HALL14_190' 'Abhay_231050001_HALL14_563'
 'Abhijeet Gangwar_231010001_HALL14_17' 'Abhijeet_231030002_HALL14_238'
 'Abhinandan Singh Baghel_231110002_HALL14_224'
 'Abhinav Pratap Singh_231140001_HALL14_299'
 'Abhinav Saumya_231050003_HALL14_546'
 'Abhiram Krishna Chaitanya Sirigi_231010002_HALL14_14'
 'Abhirup Dasgupta_231060001_HALL14_431'
 'Abhishek Binodkumar Tiwari_231020001_HALL14_93'
 'Abhishek Chaudhary_231050004_HALL14_478'
 'Abhishek Dubey_231110003_HALL14_183'
 'Abhishek Kumar_231050005_HALL14_535'
 'Abhishek Kumar_231050006_HALL14_510'
 'Abhishek Kumar_231050007_HALL14_536' 'Abhishek M_231010401_HALL14_28'
 'Abhishek Semwal_231040002_HALL14_262'
 'Abhishek Verma_231020002_HALL14_79'
 'Abhishek Verma_231290001_HALL14_581'
 'Abhishekkumar Kishoriprasad Verma_231040003_HALL14_261'
 'Abhishikat_

In [2]:
import pickle

# Load your trained SVM model
model = pickle.load(open("svm_model_160x160.pkl", 'rb'))

# Check the number of classes
if hasattr(model, 'classes_'):
    num_classes = len(model.classes_)
    print(f"Number of classes: {num_classes}")
    print(f"Class labels: {model.classes_}")
else:
    print("The model does not have the 'classes_' attribute.")

Number of classes: 570
Class labels: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
 234 235 236 2

### Verification with unknown

In [16]:
unknown_count = 4
# Path to the directory containing images
image_directory = r"D:\SSearch project\imposters\Unidentified persons"

In [25]:
image_path = os.path.join(image_directory,f"{unknown_count}.jpg")
image = cv2.imread(image_path)



error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


In [34]:
# Path to the directory containing images
known_unknowns = []
image_directory = r"D:\SSearch project\imposters\Unidentified persons"
def verify_unknown(image):
    
    # Iterate over all images in the directory
    
    image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
    x,y,w,h = detector.detect_faces(image)[0]['box']
    image = image[y:y+h, x:x+w]
    image = cv.resize(image, (160,160))
    new_embedding = get_embedding(image)


    if image is not None:
        embedding = get_embedding(image)
        known_unknowns.append(embedding)
        print(f"Processed new unknown {unknown_count}")

In [27]:
verify_unknown(3)

error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\imgproc\src\color.cpp:196: error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor'


In [48]:
import os
import cv2 as cv
import numpy as np
import pandas as pd
from datetime import datetime

cap = cv.VideoCapture(0)

attendance_list = []
unknown_count = 0
known_unknowns = []  # Initialize known unknown embeddings list

# Load existing attendance file if it exists
if os.path.exists("attendance.xlsx"):
    df_existing = pd.read_excel("attendance.xlsx")
    existing_names = df_existing['Name'].tolist()
else:
    df_existing = pd.DataFrame(columns=["Name", "Time"])
    existing_names = []

# Function to save attendance to Excel
def save_attendance(attendance_list):
    df_new = pd.DataFrame(attendance_list, columns=["Name", "Time"])
    df_combined = pd.concat([df_existing, df_new]).drop_duplicates(subset=['Name'], keep='first')
    df_combined.to_excel("attendance.xlsx", index=False)

# Function to save image of unknown person
def save_unknown(frame, count):
    filename = f"D:\\SSearch project\\imposters\\Unidentified persons\\unknown_{count}.jpg"
    cv.imwrite(filename, frame)
    print(f"Unknown person saved as {filename}")

# Function to calculate distance between embeddings
def distance(embedding1, embedding2):
    return np.linalg.norm(embedding1 - embedding2)

threshold_main = 0.01

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    rgb_img = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
    gray_img = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    
    faces = haarcascade.detectMultiScale(gray_img, 1.3, 5)
    recognized_names = []

    for x, y, w, h in faces:
        img = rgb_img[y:y+h, x:x+w]
        img = cv.resize(img, (160, 160))
        img = np.expand_dims(img, axis=0)
        
        ypred = facenet.embeddings(img)

        probabilities = model.predict_proba([ypred[0]])[0]
        max_prob = np.max(probabilities)
        if max_prob < threshold_main:
            final_name = "unknown"
        else:
            final_name = encoder.inverse_transform([np.argmax(probabilities)])[0]

        recognized_names.append(final_name)

        cv.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 255), 3)
        cv.putText(frame, str(final_name), (x, y-10), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3, cv.LINE_AA)
    
    cv.imshow("Face Recognition:", frame)
    
    key = cv.waitKey(10) & 0xFF
    if key == ord('q'):
        break
    elif key == ord('v'):
        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        for name in recognized_names:
            if name == "unknown":
                if known_unknowns:
                    distances = [distance(ypred[0], emb) for emb in known_unknowns]
                    min_distance = min(distances)
                    threshold = 0.85  # Set a suitable threshold based on validation data

                    if min_distance > threshold:
                        print("Unknown person....")
                        unknown_count += 1
                        save_unknown(frame, unknown_count)
                        known_unknowns.append(ypred[0])
                        print("Embeddings Updated")
                    else:
                        print("This unknown is already there....")
                else:
                    unknown_count += 1
                    save_unknown(frame, unknown_count)
                    known_unknowns.append(ypred[0])
            elif name not in existing_names:
                attendance_list.append([name, current_time])
                existing_names.append(name)
        save_attendance(attendance_list)

cap.release()
cv.destroyAllWindows()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48

## Direct verification

In [49]:
import os
import cv2 as cv
import numpy as np
import pandas as pd
from datetime import datetime

cap = cv.VideoCapture(0)

attendance_list = []
unknown_count = 0
known_unknowns = []  # Initialize known unknown embeddings list

# Load existing attendance file if it exists
if os.path.exists("attendance.xlsx"):
    df_existing = pd.read_excel("attendance.xlsx")
    existing_names = df_existing['Name'].tolist()
else:
    df_existing = pd.DataFrame(columns=["Name", "Time"])
    existing_names = []

# Function to save attendance to Excel
def save_attendance(attendance_list):
    df_new = pd.DataFrame(attendance_list, columns=["Name", "Time"])
    df_combined = pd.concat([df_existing, df_new]).drop_duplicates(subset=['Name'], keep='first')
    df_combined.to_excel("attendance.xlsx", index=False)

# Function to save image of unknown person
def save_unknown(frame, count):
    filename = f"D:\\SSearch project\\imposters\\Unidentified persons\\unknown_{count}.jpg"
    cv.imwrite(filename, frame)
    print(f"Unknown person saved as {filename}")

# Function to calculate distance between embeddings
def distance(embedding1, embedding2):
    return np.linalg.norm(embedding1 - embedding2)

threshold_main = 0.01

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    rgb_img = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
    gray_img = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    
    faces = haarcascade.detectMultiScale(gray_img, 1.3, 5)
    recognized_names = []

    for x, y, w, h in faces:
        img = rgb_img[y:y+h, x:x+w]
        img = cv.resize(img, (160, 160))
        img = np.expand_dims(img, axis=0)
        
        ypred = facenet.embeddings(img)

        probabilities = model.predict_proba([ypred[0]])[0]
        max_prob = np.max(probabilities)
        if max_prob < threshold_main:
            final_name = "unknown"
        else:
            final_name = encoder.inverse_transform([np.argmax(probabilities)])[0]

        recognized_names.append(final_name)

        cv.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 255), 3)
        cv.putText(frame, str(final_name), (x, y-10), cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 3, cv.LINE_AA)
    
    cv.imshow("Face Recognition:", frame)
    
    
    
    
    current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    for name in recognized_names:
        if name == "unknown":
            if known_unknowns:
                distances = [distance(ypred[0], emb) for emb in known_unknowns]
                min_distance = min(distances)
                threshold = 0.85  # Set a suitable threshold based on validation data

                if min_distance > threshold:
                    print("Unknown person....")
                    unknown_count += 1
                    save_unknown(frame, unknown_count)
                    known_unknowns.append(ypred[0])
                    print("Embeddings Updated")
                else:
                    print("This unknown is already there....")
            else:
                unknown_count += 1
                save_unknown(frame, unknown_count)
                known_unknowns.append(ypred[0])
        elif name not in existing_names:
            attendance_list.append([name, current_time])
            existing_names.append(name)
    save_attendance(attendance_list)

    key = cv.waitKey(10) & 0xFF
    if key == ord('q'):
        break

cap.release()
cv.destroyAllWindows()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
Unknown person saved as D:\SSearch project\imposters\Unidentified persons\unknown_1.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
This unknown is already there....
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
This unknown is already there....
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
This unknown is already there....
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
This unknown is already there....
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
This unknown is already there....
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
This unknown is already there....
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step
This unknown is already there....
[1m1/1[0m [32m━━━━━━━━━

In [44]:
cap.release()
cv.destroyAllWindows()

In [47]:

def distance(embedding1, embedding2):
    return np.linalg.norm(embedding1 - embedding2)


def  verify2(t_im,model=model,encode=encoder,threshold=0.01):
    #t_im = cv.imread("D:\SSearch project\student_photo_3.jpg")
    t_im = cv.cvtColor(t_im, cv.COLOR_BGR2RGB)
    x,y,w,h = detector.detect_faces(t_im)[0]['box']
    t_im = t_im[y:y+h, x:x+w]
    t_im = cv.resize(t_im, (160,160))
    new_embedding = get_embedding(t_im)
    # probabilities = model_unknown.predict_proba([ypred[0]])[0]
    # print("Prob is ",probabilities)
    # Calculate distances to all known embeddings
    distances = [distance(new_embedding, emb) for emb in known_unknowns]
    min_distance = min(distances)
    print("Min distance is ", min_distance)
    threshold = 0.85  # Set a suitable threshold based on validation data

    if min_distance > threshold:
        print("Unknown person....")
    else:
        print("This unknown is already there....")
        # prediction = clf.predict([new_embedding])
        # print(f'Predicted: {prediction[0]}')

    # test_im = [test_im]
    # probabilities = model_unknown.predict_proba(test_im)[0]
    # max_prob = np.max(probabilities)
    # print(max_prob)
    # if max_prob < threshold:
    #     return "unknown"
    # else:
    #     return encoder_unknown.inverse_transform([np.argmax(probabilities)])[0]
    # ypreds = model.predict(test_im)
    # names = encoder.inverse_transform(ypreds)
    # return names


In [48]:
cap = cv.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()

    cv.imshow("verification",frame)
    
    
    if cv.waitKey(10) & 0xFF == ord('v'):
        print("verifying...")
        name = verify2(frame)
        print(name)

    if cv.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()


verifying...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
Min distance is  0.988499
Unknown person....
None
v

In [25]:

cap.release()
cv.destroyAllWindows()

## Verification of unknown

#### Identify face and make embedding


#### Make SVM model for this embeddings
#### Identify person from this model.


## Method 1

In [20]:
## Data preparation
import os
import shutil
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Define the directory containing student images and the target directory
source_directory = r"D:\SSearch project\imposters\Unidentified persons"
target_directory = r"D:\SSearch project\imposters\organized_photos"

# Create the target directory if it doesn't exist
os.makedirs(target_directory, exist_ok=True)

# Loop through each file in the source directory
for filename in os.listdir(source_directory):
    if filename.endswith(".jpg"):
        # Extract the student name from the filename (e.g., John_Doe.jpg -> John_Doe)
        student_name = os.path.splitext(filename)[0]

        # Create a directory for each student
        student_directory = os.path.join(target_directory, student_name)
        os.makedirs(student_directory, exist_ok=True)

        # Copy the image to the student's directory
        source_file = os.path.join(source_directory, filename)
        target_file = os.path.join(student_directory, filename)
        shutil.copy(source_file, target_file)
        print(f"Copied {filename} to {student_directory}")

print("Images organized into folders.")

# Initialize the ImageDataGenerator with augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
# Path to the organized student images directory
organized_directory = r"D:\SSearch project\imposters\organized_photos"

# Loop through each student's folder and apply augmentation
for student_name in os.listdir(organized_directory):
    student_directory = os.path.join(organized_directory, student_name)

    for filename in os.listdir(student_directory):
        if filename.endswith(".jpg") or filename.endswith(".jpeg"):
            # Load the image
            img_path = os.path.join(student_directory, filename)
            img = load_img(img_path)
            x = img_to_array(img)
            x = x.reshape((1,) + x.shape)  # Reshape for the ImageDataGenerator

            # Generate and save augmented images
            i = 0
            for batch in datagen.flow(x, batch_size=1, save_to_dir=student_directory, save_prefix=student_name, save_format='jpg'):
                i += 1
                if i >= 9:  # Generate 10 augmented images per original image
                    break

print("Data augmentation completed.")

## Generating embeddings
## Generating embeddings
faceloading = FACELOADING(r"D:\SSearch project\imposters\organized_photos")
X_unknown, Y_unknown = faceloading.load_classes()
embedder = FaceNet()
EMBEDDED_X_unknown = []
count = 0
for img in X_unknown:
    EMBEDDED_X_unknown.append(get_embedding(img))
    count +=1
    print(count, " is number of new images")
print("embedding generated.")
EMBEDDED_X_unknown = np.asarray(EMBEDDED_X_unknown)
print("embedding saved")

## New ENcoder training
encoder_unknown = LabelEncoder()
encoder_unknown.fit(Y_unknown)
Y_unknown = encoder_unknown.transform(Y_unknown)

## Model training
model_unknown = SVC(kernel='linear', probability=True)
model_unknown.fit(EMBEDDED_X_unknown, Y_unknown)
print("model trained succesfully")


Copied modiji.jpg to D:\SSearch project\imposters\organized_photos\modiji
Copied Shubham.jpg to D:\SSearch project\imposters\organized_photos\Shubham
Images organized into folders.
Data augmentation completed.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 129ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/

## Method 2 (Working)

In [8]:
import cv2
import numpy as np
import os

# Path to the directory containing images
image_directory = r"D:\SSearch project\imposters\Unidentified persons"

# List to store embeddings of known unknowns


# # Placeholder for the embedding model
# class DummyEmbedder:
#     def embeddings(self, face_img):
#         # Dummy embedding function, replace with actual model inference
#         return np.random.rand(1, 512)

embedder = FaceNet()

# Function to get embedding from image
def get_embedding(face_img):
    face_img = face_img.astype('float32')  # 3D (160x160x3)
    face_img = np.expand_dims(face_img, axis=0)  # 4D (1x160x160x3)
    yhat = embedder.embeddings(face_img)
    return yhat[0]  # 512D image (1x512)
known_unknowns = []
# Iterate over all images in the directory
for filename in os.listdir(image_directory):
    if filename.endswith(".jpg") or filename.endswith(".png"):  # Add other image formats if necessary
        image_path = os.path.join(image_directory, filename)
        image = cv2.imread(image_path)
        
        
        image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
        x,y,w,h = detector.detect_faces(image)[0]['box']
        image = image[y:y+h, x:x+w]
        image = cv.resize(image, (160,160))
        new_embedding = get_embedding(image)


        if image is not None:
            embedding = get_embedding(image)
            known_unknowns.append(embedding)
            print(f"Processed {filename}")

print(f"Total embeddings saved: {len(known_unknowns)}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
Processed modiji.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 366ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 195ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [5]:
print(len(known_unknowns))

30


## Addition of new image

### Data augmentation

In [76]:
import os
import shutil

# Define the directory containing student images and the target directory
source_directory = r"D:\SSearch project\new_images"
target_directory = r"D:\SSearch project\new_images\organized_photos"

# Create the target directory if it doesn't exist
os.makedirs(target_directory, exist_ok=True)

# Loop through each file in the source directory
for filename in os.listdir(source_directory):
    if filename.endswith(".jpg"):
        # Extract the student name from the filename (e.g., John_Doe.jpg -> John_Doe)
        student_name = os.path.splitext(filename)[0]

        # Create a directory for each student
        student_directory = os.path.join(target_directory, student_name)
        os.makedirs(student_directory, exist_ok=True)

        # Copy the image to the student's directory
        source_file = os.path.join(source_directory, filename)
        target_file = os.path.join(student_directory, filename)
        shutil.copy(source_file, target_file)
        print(f"Copied {filename} to {student_directory}")

print("Images organized into folders.")

Copied pooja.jpg to D:\SSearch project\new_images\organized_photos\pooja
Copied Shubham.jpg to D:\SSearch project\new_images\organized_photos\Shubham
Images organized into folders.


In [69]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array, array_to_img

# Initialize the ImageDataGenerator with augmentation parameters
datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)


In [82]:
# Path to the organized student images directory
organized_directory = r"D:\SSearch project\new_images\organized_photos"

# Loop through each student's folder and apply augmentation
for student_name in os.listdir(organized_directory):
    student_directory = os.path.join(organized_directory, student_name)

    for filename in os.listdir(student_directory):
        if filename.endswith(".jpg"):
            # Load the image
            img_path = os.path.join(student_directory, filename)
            img = load_img(img_path)
            x = img_to_array(img)
            x = x.reshape((1,) + x.shape)  # Reshape for the ImageDataGenerator

            # Generate and save augmented images
            i = 0
            for batch in datagen.flow(x, batch_size=1, save_to_dir=student_directory, save_prefix=student_name, save_format='jpg'):
                i += 1
                if i >= 10:  # Generate 10 augmented images per original image
                    break

print("Data augmentation completed.")

Data augmentation completed.


### Generating embedding and label

In [49]:
faceloading = FACELOADING(r"D:\SSearch project\new_images\organized_photos")
X_new, Y_new = faceloading.load_classes()
embedder = FaceNet()
EMBEDDED_X_new = []
count = 0
for img in X_new:
    EMBEDDED_X_new.append(get_embedding(img))
    count +=1
    print(count, " is number of new images")
print("embedding generated.")
EMBEDDED_X_new = np.asarray(EMBEDDED_X_new)
print("embedding saved")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 439ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 264ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 122ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m

### Adding it to original data

In [42]:
# original embeddings and labels are as numpy nd array
#EMBEDDED_X_faces
#Y_label

In [75]:
# Load the .npz file
data = np.load('faces_embeddings_done_4classes.npz')
# Access the embeddings and labels
EMBEDDED_X_faces_old = data['arr_0']
Y_label_old = data['arr_1']

# Verify the loaded data
print(f'Embeddings shape: {EMBEDDED_X_faces_old.shape}')
print(f'Labels shape: {Y_label_old.shape}')

Embeddings shape: (6354, 512)
Labels shape: (6354,)


In [74]:
print(len(EMBEDDED_X_new))
print(len(Y_new))

11
11


In [76]:
# Verify new data shapes
print(f'New embeddings shape: {EMBEDDED_X_new.shape}')
print(f'New labels shape: {Y_new.shape}')
print(f'old embeddings shape: {EMBEDDED_X_faces_old.shape}')
print(f'old labels shape: {Y_label_old.shape}')
# Concatenate existing data with new data
EMBEDDED_X_combined = np.concatenate((EMBEDDED_X_faces_old, EMBEDDED_X_new), axis=0)
Y_combined = np.concatenate((Y_label_old, Y_new), axis=0)

# Save the updated data
np.savez_compressed('updated_faces_embeddings.npz', EMBEDDED_X=EMBEDDED_X_combined, Y=Y_combined)

# Verify the updated data
print(f'Updated embeddings shape: {EMBEDDED_X_combined.shape}')
print(f'Updated labels shape: {Y_combined.shape}')


New embeddings shape: (11, 512)
New labels shape: (11,)
old embeddings shape: (6354, 512)
old labels shape: (6354,)
Updated embeddings shape: (6365, 512)
Updated labels shape: (6365,)


### New model and encoder training

### Encoder training

In [77]:
from sklearn.preprocessing import LabelEncoder

encoder2 = LabelEncoder()
encoder2.fit(Y_combined)
Y_combined = encoder2.transform(Y_combined)

### Model training

In [78]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(EMBEDDED_X_combined, Y_combined, shuffle=True, random_state=17)
from sklearn.svm import SVC
model2 = SVC(kernel='linear', probability=True)
model2.fit(X_train, Y_train)
ypreds_train = model2.predict(X_train)
ypreds_test = model2.predict(X_test)
from sklearn.metrics import accuracy_score

print("accuracy score of training : ", accuracy_score(Y_train, ypreds_train))
print("accuracy score of testing : ",accuracy_score(Y_test,ypreds_test))

accuracy score of training :  0.9987429289754871
accuracy score of testing :  0.9993718592964824


In [79]:
def  verify2(t_im,encoder = encoder2, model=model2,threshold=0.01):
    #t_im = cv.imread("D:\SSearch project\student_photo_3.jpg")
    t_im = cv.cvtColor(t_im, cv.COLOR_BGR2RGB)
    x,y,w,h = detector.detect_faces(t_im)[0]['box']
    t_im = t_im[y:y+h, x:x+w]
    t_im = cv.resize(t_im, (160,160))
    test_im = get_embedding(t_im)
    test_im = [test_im]
    probabilities = model.predict_proba(test_im)[0]
    max_prob = np.max(probabilities)
    print(max_prob)
    if max_prob < threshold:
        return "unknown"
    else:
        return encoder2.inverse_transform([np.argmax(probabilities)])[0]
    # ypreds = model.predict(test_im)
    # names = encoder.inverse_transform(ypreds)
    # return names


In [81]:
cap = cv.VideoCapture(0)

while cap.isOpened():
    ret, frame = cap.read()

    cv.imshow("verification",frame)
    
    
    if cv.waitKey(10) & 0xFF == ord('v'):
        print("verifying...")
        name = verify2(frame)
        print(name)

    if cv.waitKey(10) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()


verifying...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
0.016558720830224957
Shubham
verifying...
[1m1/1