# VOICE AND FACE AUTHENTICATION SYSTEM

### RUN THE CELL ONE BY ONE AFTER READING INSTRUCTION

## Please install necessary library required

In [1]:
import tensorflow as tf
import numpy as np
import os
import glob
import pickle
import cv2
import time
from numpy import genfromtxt

from keras import backend as K
from keras.models import load_model

K.set_image_data_format('channels_first')
np.set_printoptions(threshold=np.inf)


import pyaudio
from IPython.display import Audio, display, clear_output
import wave
from scipy.io.wavfile import read
#from sklearn.mixture import GMM 
from sklearn.mixture import GaussianMixture 
import warnings
warnings.filterwarnings("ignore")

from sklearn import preprocessing
import python_speech_features as mfcc
import face_recognition
import cv2
import numpy as np

# Audio processing

### -After installling all the necessary library pls run the below cell


In [2]:
def calculate_delta(array):
    rows, cols = array.shape
    deltas = np.zeros((rows, cols))
    N = 2
    for i in range(rows):
        index = []
        j = 1
        while len(index) < 2 and j < N+1:
            if i-j >= 0:
                index.append(i-j)
            if i+j < rows:
                index.append(i+j)
            j+=1
        if len(index) == 2:
            deltas[i] = (array[index[1]] - array[index[0]]) / (2*N)
        elif len(index) == 1:
            deltas[i] = (array[index[0]] - array[i]) / N
    return deltas


#convert audio to mfcc features
def extract_features(audio, rate):    
    mfcc_feat = mfcc.mfcc(audio, rate, 0.025, 0.01, 12, appendEnergy=True, nfft=2048)
    mfcc_feat = preprocessing.scale(mfcc_feat)
    delta = calculate_delta(mfcc_feat)

    #combining both mfcc features and delta
    combined = np.hstack((mfcc_feat, delta)) 
    return combined

# Registering  New User voice

### Run the below code for registering new voice of the person .

### WARNING:- RECORD IN SILENCE


##### -------------------- follow steps below --------------------------------------------

#### - RUN THE CODE 
#### - ENTER  YOUR NAME 
#### - SPEAK HELLO COMPUTER WHEN RECORDING
#### - SPEAK SAME WORD 2 TIMES MORE
#### - NOW YOUR VOICE REGISTERED 

#### - REGISTER THE 2 OR 3 PERSON VOICE 
#### - NOW YOUR VOICE REGISTERED 

###### -----------------------------------------------------------------------------------------------------------------

## TIPS:- register silence, noise  as name- unknown to classify as unknown




In [57]:
name=input("Enter your name: ")
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
CHUNK = 1024
RECORD_SECONDS = 3


source = "./voice_database/" + name

os.mkdir(source)

for i in range(3):
    audio = pyaudio.PyAudio()

    if i == 0:
        j = 3
        while j>=0:
            time.sleep(1.0)
            print("Speak your name in {} seconds".format(j))
            clear_output(wait=True)

            j-=1

    elif i == 1:
        print("Speak your name one more time")
        time.sleep(0.5)

    else:
        print("Speak your name one last time")
        time.sleep(0.5)

    # start Recording
    stream = audio.open(format=FORMAT, channels=CHANNELS,
                rate=RATE, input=True,
                frames_per_buffer=CHUNK)

    print("recording...")
    frames = []

    for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    # stop Recording
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # saving wav file of speaker
    waveFile = wave.open(source + '/' + str((i+1)) + '.wav', 'wb')
    waveFile.setnchannels(CHANNELS)
    waveFile.setsampwidth(audio.get_sample_size(FORMAT))
    waveFile.setframerate(RATE)
    waveFile.writeframes(b''.join(frames))
    waveFile.close()
    print("Done")

dest = "./gmm_models/"
count = 1

for path in os.listdir(source):
    path = os.path.join(source, path)

    features = np.array([])

    # reading audio files of speaker
    (sr, audio) = read(path)

    # extract 40 dimensional MFCC & delta MFCC features
    vector = extract_features(audio, sr)

    if features.size == 0:
        features = vector
    else:
        features = np.vstack((features, vector))

    # when features of 3 files of speaker are concatenated, then do model training
    if count == 3:
        gmm = GaussianMixture(n_components=16, max_iter=200, covariance_type='diag', n_init=3)
        gmm.fit(features)

        # saving the trained gaussian model
        #pickle.dump(gmm, open(dest + name + '.gmm', 'wb'))
        # save trained model
        
        #pickle.dump(gmm, open(dest + name + '.gmm', 'wb'))
        
        
        with open(dest + name + '.pkl', 'wb') as file:
            pickle.dump(gmm, file)
        print(name + ' added successfully')

        features = np.asarray(())
        count = 0
    count = count + 1


recording...
Done
Speak your name one more time
recording...
Done
Speak your name one last time
recording...
Done
unknown3 added successfully


# Registering a New User face 

###  Run the below code for registering new face of the person .




##### --------------------------------------------------------follow steps below--------------------------------------------------------------------------------------- 

#### - RUN THE CODE 
#### - ENTER  YOUR NAME 
#### - IT START RECORDING 
#### - PRESS S TO SAVE IMAGE 
#### - AFTER THE MESSAGE SUCCESSFUL PRINTED IN BELOW CELL
#### - PRESS Q TO EXIT
 
#### --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

## TIPS:- register 2 to 3 image to help the model to classify 

In [None]:
import cv2
import os

# Create a directory to store captured images
dir_name = 'captured_images'
if not os.path.exists(dir_name):
    os.makedirs(dir_name)

# Get name of the person to capture images
name = input("Enter your name: ")

# Create a directory with the name of the person
person_dir = os.path.join(dir_name, name)
if not os.path.exists(person_dir):
    os.makedirs(person_dir)

# Initialize the webcam
cap = cv2.VideoCapture(0)

# Set the width and height of the capture window
cap.set(3, 640)
cap.set(4, 480)

# Define the codec and create a VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi', fourcc, 20.0, (640, 480))

# Capture frames continuously
while True:
    # Capture a frame
    ret, frame = cap.read()

    # Display instructions in the frame
    cv2.putText(frame, "Press 'q' to exit", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    cv2.putText(frame, "Press 's' to save image", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

    # Display the frame
    cv2.imshow('frame', frame)

    # Exit the camera if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # Save the image if 's' is pressed
    if cv2.waitKey(1) & 0xFF == ord('s'):
        # Generate a unique filename for the image
        filename = os.path.join(person_dir, name + "_" + str(len(os.listdir(person_dir))+1) + ".jpg")

        # Save the image
        cv2.imwrite(filename, frame)
        print("Image saved successfully!")

# Release the webcam and close all windows
cap.release()
cv2.destroyAllWindows()




# Automatically add array of face encodings and there name and store it in a empty list


### just run the below code

In [4]:
# Initialize empty arrays to store the face encodings and names
known_face_encodings = []
known_face_names = []
path = "captured_images"

# Set the path to the folder containing the images of the particular person
for file in os.listdir(path):
    
    person_path = os.path.join(path, file)
    
    # Get the list of image files in the folder
    image_files = os.listdir(person_path)

    # Check if there is at least one image file in the folder
    if len(image_files) > 0:
        # Select the first image file in the folder
        image_file = image_files[0]
    
        # Load the image file
        image = face_recognition.load_image_file(os.path.join(person_path, image_file))
    
        # Extract the face encoding from the image
        encoding = face_recognition.face_encodings(image)[0]
    
        # Extract the name of the person from the filename
        name = os.path.splitext(file)[0]
    
        # Append the face encoding and name to their respective arrays
        known_face_encodings.append(encoding)
        known_face_names.append(name)

# Print the arrays of known face encodings and their names
print("Known face encodings:", known_face_encodings)
print("Known face names:", known_face_names)


Known face encodings: [array([-0.10603727,  0.13406637,  0.09755316,  0.01717214, -0.0177455 ,
       -0.0443629 , -0.06481581, -0.03275372,  0.08441459,  0.01632946,
        0.23954946, -0.08707264, -0.25597417, -0.14971158, -0.0085438 ,
        0.11727355, -0.05354901, -0.17429779, -0.05566221, -0.06684638,
        0.01994321,  0.03799533,  0.04113675,  0.00206695, -0.08606327,
       -0.44688386, -0.03061007, -0.111617  ,  0.09729519, -0.06653468,
       -0.03572546,  0.00345196, -0.21564899, -0.05102433,  0.03085935,
        0.11574043, -0.04740578, -0.02374438,  0.18887484,  0.0093974 ,
       -0.1249586 , -0.02845469,  0.06041468,  0.26397482,  0.1454096 ,
        0.0742325 ,  0.04354211, -0.04550374, -0.01831933, -0.19970518,
        0.09574576,  0.15648206,  0.13781491,  0.05862052,  0.15629384,
       -0.13437159,  0.00889811,  0.08292488, -0.22097458,  0.05514112,
       -0.01189425,  0.01842091, -0.05770644, -0.05067457,  0.27391085,
        0.0429178 , -0.0594479 , -0.13464

# ----------------------------VOICE RECOGNITION---------------------------

### - JUST SPEAK SAME WORD YOU REGISTERED IN SAME PITCH TO RECOGNISE YOUR VOICE

In [54]:
global identity

def recognize_voice():
    global identity
    FORMAT = pyaudio.paInt16
    CHANNELS = 2
    RATE = 44100
    CHUNK = 1024
    RECORD_SECONDS = 3
    FILENAME = "./test.wav"

    audio = pyaudio.PyAudio()

    # start Recording
    stream = audio.open(format=FORMAT, channels=CHANNELS,
                        rate=RATE, input=True,
                        frames_per_buffer=CHUNK)

    print("recording... say hello computer for authentication")
    frames = []

    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)
    print("finished recording")

    # stop Recording
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # saving wav file 
    waveFile = wave.open(FILENAME, 'wb')
    waveFile.setnchannels(CHANNELS)
    waveFile.setsampwidth(audio.get_sample_size(FORMAT))
    waveFile.setframerate(RATE)
    waveFile.writeframes(b''.join(frames))
    waveFile.close()

    modelpath = "./gmm_models/"

    gmm_files = [os.path.join(modelpath,fname) for fname in os.listdir(modelpath) if fname.endswith('.pkl')]

    models = [pickle.load(open(fname,'rb')) for fname in gmm_files]

    speakers = [fname.split("/")[-1].split(".pkl")[0] for fname in gmm_files]

    if len(models) == 0:
        print("No Users in the Database!")
    
    # read test file
    sr, audio = read(FILENAME)

    # extract mfcc features
    vector = extract_features(audio, sr)
    log_likelihood = np.zeros(len(models)) 

    # checking with each model one by one
    for i in range(len(models)):
        gmm = models[i]
        scores = np.array(gmm.score(vector[:,:40]))
        log_likelihood[i] = scores.sum()

    pred = np.argmax(log_likelihood)
    identity = speakers[pred]

    # if voice not recognized than terminate the process
    if identity == 'unknown':
        print("Not Recognized! Try again...")
        return

    print("Recognized as - ", identity)
    
    return identity


# example usage
recognize_voice()
print(identity)

                  


recording... say hello computer for authentication
finished recording
Recognized as -  adarsh
adarsh


# ----------------------------FACE AND VOICE AUTHENTICATION--------------------------

#### - KEEP YOUR FACE INFRONT OF THE CAMERA
### -------------------------------------THE RESULT WILL SHOW AS BELOW-----------------------------------
#### - IF FACE MATCHES WITH VOICE  --Authenticaation successful !welcome-------
#### - IF VOICE IDENTITY AND FACE IDENTIY IS DIFFERENT -----Voice identity not matching with face !Try again....
#### - IF VOICE IDENTITY== UKNOWN     ------------Voice not recognized , !Try again.....
#### - IF FACE IDENTITY == UKNOWN      -------------" Face not registered,! Unsuccessful"


In [55]:
#FACE RECOGNITION
# Initialize some variables
face_locations = []
face_encodings = []
face_names = []
process_this_frame = True    

print("Keep Your face infront of the camera")
cap = cv2.VideoCapture(0)
cap.set(3, 640)
cap.set(4, 480)
    
    
time.sleep(1.0)
start_time = time.time()
while True:
    curr_time = time.time()
            
    _, frame = cap.read()
        
    small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
        
    rgb_small_frame = small_frame[:, :, ::-1]
        
    face=face_recognition.face_locations(rgb_small_frame)
        
    if len(face) == 1:
            
        if process_this_frame:
                
            # Find all the faces and face encodings in the current frame of video
            face_locations = face_recognition.face_locations(rgb_small_frame)
            face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)
                
            face_name = []
                
            for face_encoding in face_encodings:
                    
                # See if the face is a match for the known face(s)
                matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
                facename="Unknown"
                    # # If a match was found in known_face_encodings, just use the first one.
                    #if True in matches:
                    #first_match_index = matches.index(True)
                    #name = known_face_names[first_match_index]
                    # Or instead, use the known face with the smallest distance to the new face
                    
                face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
                best_match_index = np.argmin(face_distances)
            
                if matches[best_match_index]:
                    facename = known_face_names[best_match_index]
                    face_names.append(facename)
                            
                 # if min dist is less then threshold value 
                 # and both face and voice matched than unlock the door
                
        process_this_frame = not process_this_frame
        # Display the results
        for (top, right, bottom, left), name in zip(face_locations, face_names):
              # Scale back up face locations since the frame we detected in was scaled to 1/4 size
            top *= 4
            right *= 4
            bottom *= 4
            left *= 4

            # Draw a box around the face
            cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)

            # Draw a label with a name below the face
            cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 0), cv2.FILLED)
            font = cv2.FONT_HERSHEY_DUPLEX
            
            if facename == identity: 
                cv2.putText(frame, facename +" Authenticaation successful !welcome", (left + 6, bottom - 6), font, 0.5, (0, 255, 0), 1)
            elif facename=="Unknown":
                cv2.putText(frame, facename+" Face not registered,! Unsuccessful", (left + 6, bottom - 6), font, 0.5, (0, 0, 255), 1)
            elif identity=="unknown":
                cv2.putText(frame, facename+" Voice not recognized , !Try again.....", (left + 6, bottom - 6), font, 0.5, (0, 0, 255), 1)
            else: 
                cv2.putText(frame, facename+" Voice identity not matching with face !Try again....", (left + 6, bottom - 6), font, 0.5, (0, 0, 255), 1)

        # Display the resulting image
        cv2.imshow('Video', frame)

        # Hit 'q' on the keyboard to quit!
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

# Release handle to the webcam
cap.release()
cv2.destroyAllWindows()

if len(face) == 0:
    print('There was no face found in the frame. Try again...')
                

elif len(face) > 1:
    print("More than one faces found. Try again...")



               


                   
            




Keep Your face infront of the camera
