In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import cv2
import os
import numpy as np
from tensorflow.keras.models import load_model
import pickle
import matplotlib.pyplot as plt
import shutil
from skimage import io

In [None]:
def get_images(data_dir,number_of_samples):
  image_path = []
  for category in categories:
    path = os.path.join(data_dir,category)
    i = 1
    for img in os.listdir(path):
      if i > number_of_samples:
        break
      else:
        image_path.append(os.path.join(path,img))
        i += 1
  return image_path

In [None]:
def visualize_dataset(image_path,rows,cols):
  fig = plt.figure(figsize=(20,20))
  for i in range(1,rows * cols + 1):
    fig.add_subplot(rows,cols,i)
    img_array = io.imread(image_path[i-1])
    fig.subplots_adjust(hspace=1)
    plt.imshow(cv2.cvtColor(img_array,cv2.COLOR_BGR2RGB))
    plt.xlabel(image_path[i-1].split('/')[-2])
  plt.show()

In [None]:
rootdir = os.getcwd()
dataset_dir = os.path.join(rootdir,'/content/gdrive/MyDrive/DATN/dataset/train')

model_path = os.path.join(rootdir,'/content/gdrive/MyDrive/DATN/models/facenet_keras.h5')
facenet_model = load_model(model_path)

categories = os.listdir(dataset_dir)
def check_pretrained_file(embeddings_model):
	data = pickle.loads(open(embeddings_model,"rb").read())
	names = np.array(data["names"])
	unique_names = np.unique(names).tolist()
	return [data,unique_names]

def get_remaining_names(unique_names):
	remaining_names = np.setdiff1d(categories,unique_names).tolist()
	return remaining_names

def get_all_face_pixels():
	image_ids = []
	image_paths = []
	image_arrays = []
	names = []
	for category in categories:
		path = os.path.join(dataset_dir,category)
		for img in os.listdir(path):
			img_array = cv2.imread(os.path.join(path,img))
			image_paths.append(os.path.join(path,img))
			image_ids.append(img)
			image_arrays.append(img_array)
			names.append(category)
	return [image_ids,image_paths,image_arrays,names]


def get_remaining_face_pixels(remaining_names):
	image_ids = []
	image_paths = []
	image_arrays = []
	names = []
	face_ids = []
	if len(remaining_names) != 0:	
		for category in remaining_names:
			path = os.path.join(dataset_dir,category)
			for img in os.listdir(path):
				img_array = cv2.imread(os.path.join(path,img))
				image_paths.append(os.path.join(path,img))
				image_ids.append(img)
				image_arrays.append(img_array)
				names.append(category)
		return [image_ids,image_paths,image_arrays,names]
	else:
		return None


def normalize_pixels(imagearrays):
	face_pixels = np.array(imagearrays)
	# scale pixel values
	face_pixels = face_pixels.astype('float32')
	# standardize pixel values across channels (global)
	mean, std = face_pixels.mean(), face_pixels.std()
	face_pixels = (face_pixels - mean) / std
	return face_pixels



embeddings_model_file = os.path.join(rootdir,"/content/gdrive/MyDrive/DATN/models/embeddings.pickle")
if not os.path.exists(embeddings_model_file):
	[image_ids,image_paths,image_arrays,names] = get_all_face_pixels()
	face_pixels = normalize_pixels(imagearrays = image_arrays)   
	embeddings = []
	for (i,face_pixel) in enumerate(face_pixels):
		sample = np.expand_dims(face_pixel,axis=0)
		embedding = facenet_model.predict(sample)
		new_embedding = embedding.reshape(-1)
		embeddings.append(new_embedding)
		data = {"paths":image_paths, "names":names,"imageIDs":image_ids,"embeddings":embeddings}
	f = open('/content/gdrive/MyDrive/DATN/models/embeddings.pickle' , "wb")
	f.write(pickle.dumps(data))
	f.close()

else:
	[old_data,unique_names] = check_pretrained_file(embeddings_model_file)
	remaining_names = get_remaining_names(unique_names)
	data = get_remaining_face_pixels(remaining_names)
	if data != None:
		[image_ids,image_paths,image_arrays,names] = data
		face_pixels = normalize_pixels(imagearrays = image_arrays)
		embeddings = []
		for (i,face_pixel) in enumerate(face_pixels):
			sample = np.expand_dims(face_pixel,axis=0)
			embedding = facenet_model.predict(sample)
			new_embedding = embedding.reshape(-1)
			embeddings.append(new_embedding)
		new_data = {"paths":image_paths, "names":names,"imageIDs":image_ids,"embeddings":embeddings}
		combined_data = {"paths":[],"names":[],"face_ids":[],"imageIDs":[],"embeddings":[]}
		combined_data["paths"] = old_data["paths"] + new_data["paths"]
		combined_data["names"] = old_data["names"] + new_data["names"]
		combined_data["face_ids"] = old_data["face_ids"] + new_data["face_ids"]
		combined_data["imageIDs"] = old_data["imageIDs"] + new_data["imageIDs"]
		combined_data["embeddings"] = old_data["embeddings"] + new_data["embeddings"]

		f = open('/content/gdrive/MyDrive/DATN/models/embeddings.pickle' , "wb")
		f.write(pickle.dumps(combined_data))
		f.close()
	else:
		print("No new data found... Embeddings has already extracted for this user")



In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC
import pickle
import numpy as np
import os
from sklearn.calibration import CalibratedClassifierCV

In [None]:
rootdir = os.getcwd()

embeddings_path = os.path.join(rootdir,'/content/gdrive/MyDrive/DATN/models/embeddings.pickle')

def load_embeddings_and_labels():
    data = pickle.loads(open(embeddings_path, "rb").read())
    # encoding labels by names
    label = LabelEncoder()
    names = np.array(data["names"])                       
    labels = label.fit_transform(names)
    # getting names
    # getting embeddings
    Embeddings = np.array(data["embeddings"])
    return [label,labels,Embeddings,names]

def create_svm_model(labels,embeddings):
    model_svc = LinearSVC()
    recognizer = CalibratedClassifierCV(model_svc)   
    recognizer.fit(embeddings,labels)
    return recognizer


[label,labels,Embeddings,names] = load_embeddings_and_labels()
recognizer = create_svm_model(labels=labels,embeddings=Embeddings)
f1 = open('/content/gdrive/MyDrive/DATN/models/recognizer.pickle', "wb")
f1.write(pickle.dumps(recognizer))
f1.close()
print("Training done successfully")

Training done successfully
