In [1]:
!pip install facenet-pytorch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting facenet-pytorch
  Downloading facenet_pytorch-2.5.3-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m17.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: facenet-pytorch
Successfully installed facenet-pytorch-2.5.3


In [2]:
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os
from facenet_pytorch import MTCNN, InceptionResnetV1, extract_face
from sklearn import svm
from PIL import Image
from sklearn import metrics
from sklearn.metrics import accuracy_score

**Mounted Drive**

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
!unzip /content/drive/MyDrive/faceRecognition/dataset/FacenetDataset.zip -d dataset_new

Archive:  /content/drive/MyDrive/faceRecognition/dataset/FacenetDataset.zip
   creating: dataset_new/FacenetDataset/
  inflating: dataset_new/__MACOSX/._FacenetDataset  
  inflating: dataset_new/FacenetDataset/.DS_Store  
  inflating: dataset_new/__MACOSX/FacenetDataset/._.DS_Store  
   creating: dataset_new/FacenetDataset/Phong/
  inflating: dataset_new/__MACOSX/FacenetDataset/._Phong  
   creating: dataset_new/FacenetDataset/Hellas/
  inflating: dataset_new/__MACOSX/FacenetDataset/._Hellas  
   creating: dataset_new/FacenetDataset/Nguyen/
  inflating: dataset_new/__MACOSX/FacenetDataset/._Nguyen  
   creating: dataset_new/FacenetDataset/Huy/
  inflating: dataset_new/__MACOSX/FacenetDataset/._Huy  
   creating: dataset_new/FacenetDataset/Phu/
  inflating: dataset_new/__MACOSX/FacenetDataset/._Phu  
  inflating: dataset_new/FacenetDataset/Phong/8.jpg  
  inflating: dataset_new/FacenetDataset/Phong/9.jpg  
  inflating: dataset_new/FacenetDataset/Phong/14.jpg  
  inflating: dataset_new/F

In [5]:
!rm -r /content/dataset_new/__MACOSX
!rm -r /content/dataset_new/FacenetDataset/.DS_Store

In [6]:
import os

for path in os.listdir('/content/dataset_new/FacenetDataset'):
  for path_child in os.listdir('/content/dataset_new/FacenetDataset/' + path):
      if '.py' in path_child:
        os.remove(f'/content/dataset_new/FacenetDataset/{path}/{path_child}')

In [7]:
batch_size = 32
epochs = 20
workers = 0 if os.name == 'nt' else 8

**Facenet + SVM**

In [8]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

facenet = InceptionResnetV1(pretrained='vggface2').eval()
facenet = facenet.to(device)

  0%|          | 0.00/107M [00:00<?, ?B/s]

In [9]:
def whitens(img):
    mean = img.mean()
    std = img.std()
    std_adj = std.clamp(min=1.0 / (float(img.numel()) ** 0.5))
    y = (img - mean) / std_adj
    return y

def extract_features(mtcnn, facenet, img):
    img = img.to(device)
    img = transforms.ToPILImage()(img.squeeze_(0))
    bbs, _ = mtcnn.detect(img)
    if bbs is None:
        # if no face is detected
        return None, None

    faces = torch.stack([extract_face(img, bb) for bb in bbs])
    embeddings = facenet(whitens(faces)).detach().numpy()

    return bbs, embeddings

def dataset_to_embeddings(dataset, mtcnn, facenet):
    transform = transforms.Compose([
        transforms.Resize(160),
        transforms.ToTensor()
    ])

    embeddings = []
    labels = []
    for img_path, label in dataset.samples:
        print(img_path)

        _, embedding = extract_features(mtcnn, facenet, transform(Image.open(img_path).convert('RGB')).unsqueeze_(0))
        if embedding is None:
            print("Could not find face on {}".format(img_path))
            continue
        if embedding.shape[0] > 1:
            print("Multiple faces detected for {}, taking one with highest probability".format(img_path))
            embedding = embedding[0, :]
        embeddings.append(embedding.flatten())
        labels.append(label)

    return np.stack(embeddings), labels

def train(embeddings, labels):
    clf = svm.SVC(probability=True)
    clf.fit(embeddings, labels)
    return clf

**Create adapt dataset**

In [10]:
import shutil

In [11]:
!mkdir /content/dataset
!mkdir /content/dataset/train
!mkdir /content/dataset/val

!mkdir /content/dataset/train/Phu
!mkdir /content/dataset/train/Phong
!mkdir /content/dataset/train/Nguyen
!mkdir /content/dataset/train/Huy
!mkdir /content/dataset/train/Hellas

!mkdir /content/dataset/val/Phu
!mkdir /content/dataset/val/Phong
!mkdir /content/dataset/val/Nguyen
!mkdir /content/dataset/val/Huy
!mkdir /content/dataset/val/Hellas

In [12]:
for idx,path in enumerate(os.listdir('/content/dataset_new/FacenetDataset/Phu')):
    if idx < 5:
      shutil.move('/content/dataset_new/FacenetDataset/Phu/' + path, '/content/dataset/val/Phu')
    else:
      shutil.move('/content/dataset_new/FacenetDataset/Phu/' + path, '/content/dataset/train/Phu')

In [13]:
def move_file(name):
  for idx,path in enumerate(os.listdir(f'/content/dataset_new/FacenetDataset/{name}')):
    if idx < 5:
      shutil.move(f'/content/dataset_new/FacenetDataset/{name}/' + path, f'/content/dataset/val/{name}')
    else:
      shutil.move(f'/content/dataset_new/FacenetDataset/{name}/' + path, f'/content/dataset/train/{name}')
move_file('Hellas')
move_file('Phong')
move_file('Nguyen')
move_file('Huy')

In [14]:
!rm -r /content/dataset/train/.ipynb_checkpoints
!rm -r /content/dataset/val/.ipynb_checkpoints

rm: cannot remove '/content/dataset/train/.ipynb_checkpoints': No such file or directory
rm: cannot remove '/content/dataset/val/.ipynb_checkpoints': No such file or directory


In [15]:
dataset_train = datasets.ImageFolder(root="/content/dataset/train")
dataset_val = datasets.ImageFolder(root="/content/dataset/val")

In [16]:
X_train, y_train = dataset_to_embeddings(dataset_train, mtcnn, facenet)
X_test, y_test = dataset_to_embeddings(dataset_val, mtcnn, facenet)

X_train_class_idx = dataset_train.class_to_idx
X_test_class_idx = dataset_val.class_to_idx

embeddings, labels, class_to_idx = X_train, y_train, X_train_class_idx

/content/dataset/train/Hellas/z3340946320295_903d7490fc28499652fb1db63c8139ca.jpg
/content/dataset/train/Hellas/z3340946322989_55b6073bf74e82bd388e8cc5f61c4928.jpg
/content/dataset/train/Hellas/z3340946329487_0bc341657d4cd6e5f8c4a3cdced6a473.jpg
/content/dataset/train/Hellas/z3340946331843_f52d1fdf66443c20221366fe15e684de.jpg
/content/dataset/train/Hellas/z3340946337250_e2952e7c6f55d4130924103a4af7d69e.jpg
/content/dataset/train/Hellas/z3340946337997_7cc8336b72385b95bbbe45e76f3850b0.jpg
/content/dataset/train/Hellas/z3340946339595_6147f2270ceeab8fe6e450bc020eb428.jpg
/content/dataset/train/Hellas/z3340946345056_1dc53a628d2b5285b1e1706bce282eb0.jpg
/content/dataset/train/Hellas/z3340946351201_7b03fe2053ff3d4c519fd693e296431e.jpg
/content/dataset/train/Hellas/z3340946351946_71bf2fcba5ee8c5a340536307add03b3.jpg
/content/dataset/train/Hellas/z3340946352121_e7b6fa28181f9754e872351bad0612ce.jpg
/content/dataset/train/Hellas/z3340946357038_65e005498d95bed1ebf6f6923ebfc0a1.jpg
/content/dataset

In [None]:
clf = train(embeddings, labels)

In [None]:
idx_to_class = {v: k for k, v in class_to_idx.items()}
print(idx_to_class)

target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(metrics.classification_report(labels, clf.predict(embeddings), target_names=target_names))

# Predict labels for validation set and calculate accuracy
y_val_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_val_pred)
print('Validation Accuracy: {:.2f}%'.format(accuracy*100))

{0: 'Hellas', 1: 'Huy', 2: 'Nguyen', 3: 'Phong', 4: 'Phu'}
              precision    recall  f1-score   support

      Hellas       1.00      1.00      1.00        25
         Huy       1.00      1.00      1.00        25
      Nguyen       1.00      1.00      1.00        25
       Phong       1.00      1.00      1.00        27
         Phu       1.00      1.00      1.00        25

    accuracy                           1.00       127
   macro avg       1.00      1.00      1.00       127
weighted avg       1.00      1.00      1.00       127

Validation Accuracy: 100.00%


**Try again facenet only**

In [None]:
resnet = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=len(idx_to_class)
).to(device)

In [None]:
data_dir = '/content/dataset/train'
dataset_train = datasets.ImageFolder(data_dir, transform=transforms.Resize((512,512)))
dataset_train.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
        for p, _ in dataset_train.samples
]

In [None]:
batch_size = 32
epochs = 20
workers = 0 if os.name == 'nt' else 8

In [None]:
train_loader = DataLoader(
    dataset_train,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

for i, (x, y) in enumerate(train_loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(train_loader)), end='')



Batch 4 of 4

In [None]:
data_dir = '/content/dataset/val'
dataset_val = datasets.ImageFolder(data_dir, transform=transforms.Resize((512,512)))
dataset_val.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
        for p, _ in dataset_val.samples
]

val_loader = DataLoader(
    dataset_val,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

for i, (x, y) in enumerate(val_loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(val_loader)), end='')

Batch 1 of 1

In [None]:
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [5, 10])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])

train_dataset = datasets.ImageFolder('/content/dataset/train_cropped', transform=trans)
img_inds_train = np.arange(len(train_dataset))
np.random.shuffle(img_inds_train)

val_dataset = datasets.ImageFolder('/content/dataset/val_cropped', transform=trans)
img_inds_val = np.arange(len(val_dataset))
np.random.shuffle(img_inds_val)

train_loader = DataLoader(
    train_dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(img_inds_train)
)
val_loader = DataLoader(
    val_dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(img_inds_val)
)

**Training**

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

In [None]:
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()



Initial
----------
Valid |     1/1    | loss:    1.6447 | fps:    2.1037 | acc:    0.1600   

Epoch 1/20
----------
Train |     4/4    | loss:    0.6742 | fps:    4.4303 | acc:    0.7422   
Valid |     1/1    | loss:    7.9410 | fps:    9.3435 | acc:    0.3600   

Epoch 2/20
----------
Train |     4/4    | loss:    0.0415 | fps:    4.4672 | acc:    0.9922   
Valid |     1/1    | loss:   23.0485 | fps:    9.3879 | acc:    0.2000   

Epoch 3/20
----------
Train |     4/4    | loss:    0.0332 | fps:    4.3358 | acc:    0.9922   
Valid |     1/1    | loss:    8.2651 | fps:    9.2996 | acc:    0.2800   

Epoch 4/20
----------
Train |     4/4    | loss:    0.1698 | fps:    3.8115 | acc:    0.9531   
Valid |     1/1    | loss:    5.0914 | fps:    5.1374 | acc:    0.4800   

Epoch 5/20
----------
Train |     4/4    | loss:    0.0694 | fps:    3.8056 | acc:    0.9766   
Valid |     1/1    | loss:    0.5074 | fps:    5.0890 | acc:    0.8400   

Epoch 6/20
----------
Train |     4/4    | loss: 

In [None]:
resnet.eval()
predict = []
for x,y in val_loader:
  pred = resnet(x).detach().cpu()
  label = y

In [None]:
pred = np.argmax(pred,axis = 1)

In [None]:
from sklearn import metrics
from sklearn.metrics import accuracy_score

In [None]:
target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(metrics.classification_report(label, pred, target_names=target_names))

              precision    recall  f1-score   support

      Hellas       1.00      0.80      0.89         5
         Huy       0.83      1.00      0.91         5
      Nguyen       1.00      1.00      1.00         5
       Phong       1.00      1.00      1.00         5
         Phu       1.00      1.00      1.00         5

    accuracy                           0.96        25
   macro avg       0.97      0.96      0.96        25
weighted avg       0.97      0.96      0.96        25



**AutoFaiss - CLIP + Faiss**

In [None]:
!pip install clip-retrieval autofaiss

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting clip-retrieval
  Downloading clip_retrieval-2.37.0-py3-none-any.whl (343 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m343.4/343.4 kB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting autofaiss
  Downloading autofaiss-2.15.8-py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.1/70.1 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting img2dataset<2,>=1.25.5 (from clip-retrieval)
  Downloading img2dataset-1.41.0-py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.6/40.6 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting clip-anytorch<3,>=2.5.0 (from clip-retrieval)
  Downloading clip_anytorch-2.5.2-py3-none-any.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m35.8 MB/s[0m eta [36m0:00:00[0m
Collec

In [None]:
!clip-retrieval inference --input_dataset /content/dataset/train_cropped --output_folder /content/dataset/train_embedding

The number of samples has been estimated to be 54
Starting the worker
dataset is 30
Starting work on task 0
100%|████████████████████████████████████████| 354M/354M [00:02<00:00, 137MiB/s]
warming up with batch size 256 on cpu
done warming up in 182.14397382736206s
 sample_per_sec 4 ; sample_count 54 

In [None]:
!autofaiss build_index --embeddings="/content/dataset/train_embedding/img_emb" \
                    --index_path="/content/knn.index" \
                    --index_infos_path="/content/infos.json" \
                    --metric_type="ip" \
                    --max_index_query_time_ms=10 \
                    --max_index_memory_usage="4GB"

2023-06-18 19:46:52,435 [INFO]: Using 2 omp threads (processes), consider increasing --nb_cores if you have more
2023-06-18 19:46:52,436 [INFO]: Launching the whole pipeline 06/18/2023, 19:46:52
2023-06-18 19:46:52,436 [INFO]: Reading total number of vectors and dimension 06/18/2023, 19:46:52
  0% 0/1 [00:00<?, ?it/s]100% 1/1 [00:00<00:00, 15827.56it/s]
2023-06-18 19:46:52,465 [INFO]: There are 54 embeddings of dim 512
2023-06-18 19:46:52,465 [INFO]: >>> Finished "Reading total number of vectors and dimension" in 0.0284 secs
2023-06-18 19:46:52,465 [INFO]: 	Compute estimated construction time of the index 06/18/2023, 19:46:52
2023-06-18 19:46:52,465 [INFO]: 		-> Train: 16.7 minutes
2023-06-18 19:46:52,465 [INFO]: 		-> Add: 0.0 seconds
2023-06-18 19:46:52,465 [INFO]: 		Total: 16.7 minutes
2023-06-18 19:46:52,465 [INFO]: 	>>> Finished "Compute estimated construction time of the index" in 0.0004 secs
2023-06-18 19:46:52,465 [INFO]: 	Checking that your have enough memory available to cre

In [None]:
import faiss
import torch
import clip
import os
import pandas as pd

In [None]:
df = pd.read_parquet("/content/dataset/train_embedding/metadata/metadata_0.parquet")
image_list = df["image_path"].tolist()
ind = faiss.read_index("/content/knn.index")

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

In [None]:
trans = transforms.Compose([
    np.float32,
    # transforms.ToTensor(),
    fixed_image_standardization
])

val_dataset = datasets.ImageFolder('/content/dataset/val_cropped')
test_class_idx = val_dataset.class_to_idx

In [None]:
idx_to_class = {v: k for k, v in test_class_idx.items()}
print(idx_to_class)

target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(target_names)

{0: 'Hellas', 1: 'Huy', 2: 'Nguyen', 3: 'Phong', 4: 'Phu'}
['Hellas', 'Huy', 'Nguyen', 'Phong', 'Phu']


In [None]:
from PIL import Image
from collections import defaultdict

In [None]:
labels = []
preds = []
k = 5
for x,y in val_dataset:
    image_tensor = preprocess(x)
    image_features = model.encode_image(torch.unsqueeze(image_tensor.to(device), dim=0))
    image_features /= image_features.norm(dim=-1, keepdim=True)
    image_embeddings = image_features.cpu().detach().numpy().astype('float32')
    D, I = ind.search(image_embeddings, k)
    print(D,I)
    i_candidate = defaultdict(int)
    for D_ele,I_ele in zip(D[0],I[0]):
      if D_ele > 0.7:
        name = image_list[I_ele].split('/')[-2]
        i_candidate[test_class_idx[name]] += 1
    key_with_max_value = max(i_candidate, key=lambda k: i_candidate[k])

    preds.append(key_with_max_value)
    labels.append(y)

[[0.9814776 0.9811022 0.9768701 0.9695312 0.968061 ]] [[33 31 30 46 39]]
[[0.9602302  0.9559616  0.9500785  0.946908   0.94595134]] [[40 37 36 25  7]]
[[0.98577493 0.9793645  0.9723014  0.9697337  0.96467674]] [[35 39 36 33 40]]
[[0.9912002  0.98894477 0.98893046 0.98604625 0.9849465 ]] [[44 43 47 45 46]]
[[0.97098    0.95289326 0.9502202  0.9474362  0.94551396]] [[51 42 34 50 11]]
[[0.95729834 0.95626354 0.95396113 0.9523598  0.94224334]] [[21 14 22 10 13]]
[[0.92248875 0.91735256 0.9145962  0.91353023 0.9114625 ]] [[33 36 34 32 22]]
[[0.9472196  0.9436085  0.94320464 0.94121546 0.9362284 ]] [[ 2  8 26 18 19]]
[[0.93213534 0.92167145 0.9194058  0.91939265 0.9167016 ]] [[23 29 48 46 33]]
[[0.9298672  0.9270016  0.92661995 0.92609537 0.92108256]] [[43 13 19 44 23]]
[[0.9462588  0.94098294 0.9320184  0.9314344  0.9266585 ]] [[ 9  1 27 16 15]]
[[0.9419172  0.9397419  0.93809783 0.936774   0.9321478 ]] [[18  5 26 27 11]]
[[0.9620621  0.95556676 0.9519692  0.9475317  0.9452912 ]] [[18 16  2

In [None]:
from sklearn import metrics
from sklearn.metrics import accuracy_score

In [None]:
print(preds)
print(labels)

[0, 0, 0, 0, 0, 4, 0, 4, 0, 0, 4, 4, 4, 4, 4, 4, 3, 4, 0, 3, 4, 4, 4, 4, 4]
[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4]


In [None]:
target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(metrics.classification_report(labels, preds, target_names=target_names))

              precision    recall  f1-score   support

      Hellas       0.56      1.00      0.71         5
         Huy       0.00      0.00      0.00         5
      Nguyen       0.00      0.00      0.00         5
       Phong       1.00      0.40      0.57         5
         Phu       0.36      1.00      0.53         5

    accuracy                           0.48        25
   macro avg       0.38      0.48      0.36        25
weighted avg       0.38      0.48      0.36        25



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


**Facenet + Faiss**

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

facenet = InceptionResnetV1(pretrained='vggface2').eval()
facenet = facenet.to(device)

In [None]:
embeddings, labels, class_to_idx = X_train, y_train, X_train_class_idx

In [None]:
!rm -r facenet_emb
!mkdir facenet_emb

rm: cannot remove 'facenet_emb': No such file or directory


In [None]:
np.save('/content/facenet_emb/facenet.npy', embeddings)

In [None]:
!autofaiss build_index --embeddings="/content/facenet_emb" \
                    --index_path="/content/knn_facenet.index" \
                    --index_infos_path="/content/infos_facenet.json" \
                    --metric_type="ip" \
                    --max_index_query_time_ms=10 \
                    --max_index_memory_usage="4GB"

2023-06-18 19:54:18,417 [INFO]: Using 2 omp threads (processes), consider increasing --nb_cores if you have more
2023-06-18 19:54:18,417 [INFO]: Launching the whole pipeline 06/18/2023, 19:54:18
2023-06-18 19:54:18,417 [INFO]: Reading total number of vectors and dimension 06/18/2023, 19:54:18
  0% 0/1 [00:00<?, ?it/s]100% 1/1 [00:00<00:00, 13189.64it/s]
2023-06-18 19:54:18,482 [INFO]: There are 127 embeddings of dim 512
2023-06-18 19:54:18,482 [INFO]: >>> Finished "Reading total number of vectors and dimension" in 0.0650 secs
2023-06-18 19:54:18,482 [INFO]: 	Compute estimated construction time of the index 06/18/2023, 19:54:18
2023-06-18 19:54:18,482 [INFO]: 		-> Train: 16.7 minutes
2023-06-18 19:54:18,482 [INFO]: 		-> Add: 0.0 seconds
2023-06-18 19:54:18,482 [INFO]: 		Total: 16.7 minutes
2023-06-18 19:54:18,482 [INFO]: 	>>> Finished "Compute estimated construction time of the index" in 0.0002 secs
2023-06-18 19:54:18,483 [INFO]: 	Checking that your have enough memory available to cr

In [None]:
ind = faiss.read_index("/content/knn_facenet.index")

In [None]:
preds = []
k = 1
for image_emb in X_test:
    D, I = ind.search(image_emb[np.newaxis, :], k)
    print(D,I)
    i_candidate = defaultdict(int)
    for D_ele,I_ele in zip(D[0],I[0]):
      # if D_ele > 0.7:
        cls = labels[I_ele]
        i_candidate[cls] += 1
    # try:
    key_with_max_value = max(i_candidate, key=lambda k: i_candidate[k])
    # except:
    #   print("error")
    #   key_with_max_value = 0

    preds.append(key_with_max_value)

[[0.94171274]] [[2]]
[[0.93268883]] [[7]]
[[0.94907737]] [[6]]
[[0.98267686]] [[18]]
[[0.88385683]] [[21]]
[[0.72854424]] [[28]]
[[0.871317]] [[37]]
[[0.8147012]] [[29]]
[[0.7699375]] [[46]]
[[0.91810703]] [[46]]
[[0.90323716]] [[74]]
[[0.74952936]] [[71]]
[[0.9538892]] [[51]]
[[0.9163799]] [[72]]
[[0.9418812]] [[53]]
[[0.9404378]] [[83]]
[[0.9845713]] [[99]]
[[0.9340857]] [[96]]
[[0.86662614]] [[98]]
[[0.965695]] [[76]]
[[0.90524125]] [[108]]
[[0.9515755]] [[110]]
[[0.83670175]] [[109]]
[[0.9521544]] [[120]]
[[0.9633839]] [[122]]


In [None]:
target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(metrics.classification_report(y_test, preds, target_names=target_names))

              precision    recall  f1-score   support

      Hellas       1.00      1.00      1.00         5
         Huy       1.00      1.00      1.00         5
      Nguyen       1.00      1.00      1.00         5
       Phong       1.00      1.00      1.00         5
         Phu       1.00      1.00      1.00         5

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25



**Alignment**

In [17]:
!pip install --upgrade imutils

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [18]:
!pip install dlib

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [19]:
from imutils import face_utils
import numpy as np
import argparse
import imutils
import dlib
import cv2
from google.colab.patches import cv2_imshow
from imutils.face_utils import FaceAligner
from imutils.face_utils import rect_to_bb
import math
import matplotlib.pyplot as plt

In [20]:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('/content/drive/MyDrive/faceRecognition/dlib/shape_predictor_68_face_landmarks.dat')
predictor2 = dlib.shape_predictor('/content/drive/MyDrive/faceRecognition/dlib/shape_predictor_81_face_landmarks.dat')

In [21]:
def facial_landmarks(image):
    try:
        grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    except:
        grayscale_image = image

    # array of rectangles surrounding faces detected
    rectangles = detector(grayscale_image, 1)

    # If at least one face is detected, find its landmarks
    if len(rectangles) > 0:
        # Get 68 landmark points
        faceLandmarks = predictor(grayscale_image, rectangles[0])
        faceLandmarks = face_utils.shape_to_np(faceLandmarks)
        return faceLandmarks,rectangles
    else:
        return None

In [22]:
import cv2
import numpy as np
from skimage import transform as trans

__file__ = 'test'

# reference facial points, a list of coordinates (x,y)
REFERENCE_FACIAL_POINTS = [
    [30.29459953, 51.69630051],
    [65.53179932, 51.50139999],
    [48.02519989, 71.73660278],
    [33.54930115, 92.3655014],
    [62.72990036, 92.20410156]
]

DEFAULT_CROP_SIZE = (96, 112)


class FaceWarpException(Exception):
    def __str__(self):
        return 'In File {}:{}'.format(
            __file__, super.__str__(self))


def get_reference_facial_points(output_size=None,
                                inner_padding_factor=0.0,
                                outer_padding=(0, 0),
                                default_square=False):
    tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
    tmp_crop_size = np.array(DEFAULT_CROP_SIZE)

    # 0) make the inner region a square
    if default_square:
        size_diff = max(tmp_crop_size) - tmp_crop_size
        tmp_5pts += size_diff / 2
        tmp_crop_size += size_diff

    # print('---> default:')
    # print('              crop_size = ', tmp_crop_size)
    # print('              reference_5pts = ', tmp_5pts)

    if (output_size and
            output_size[0] == tmp_crop_size[0] and
            output_size[1] == tmp_crop_size[1]):
        print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size))
        return tmp_5pts

    if (inner_padding_factor == 0 and
            outer_padding == (0, 0)):
        if output_size is None:
            print('No paddings to do: return default reference points')
            return tmp_5pts
        else:
            raise FaceWarpException(
                'No paddings to do, output_size must be None or {}'.format(tmp_crop_size))

    # check output size
    if not (0 <= inner_padding_factor <= 1.0):
        raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')

    if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0)
            and output_size is None):
        output_size = tmp_crop_size * \
                      (1 + inner_padding_factor * 2).astype(np.int32)
        output_size += np.array(outer_padding)
        print('              deduced from paddings, output_size = ', output_size)

    if not (outer_padding[0] < output_size[0]
            and outer_padding[1] < output_size[1]):
        raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
                                'and outer_padding[1] < output_size[1])')

    # 1) pad the inner region according inner_padding_factor
    # print('---> STEP1: pad the inner region according inner_padding_factor')
    if inner_padding_factor > 0:
        size_diff = tmp_crop_size * inner_padding_factor * 2
        tmp_5pts += size_diff / 2
        tmp_crop_size += np.round(size_diff).astype(np.int32)

    # print('              crop_size = ', tmp_crop_size)
    # print('              reference_5pts = ', tmp_5pts)

    # 2) resize the padded inner region
    # print('---> STEP2: resize the padded inner region')
    size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
    # print('              crop_size = ', tmp_crop_size)
    # print('              size_bf_outer_pad = ', size_bf_outer_pad)

    if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
        raise FaceWarpException('Must have (output_size - outer_padding)'
                                '= some_scale * (crop_size * (1.0 + inner_padding_factor)')

    scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
    # print('              resize scale_factor = ', scale_factor)
    tmp_5pts = tmp_5pts * scale_factor
    #    size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
    #    tmp_5pts = tmp_5pts + size_diff / 2
    tmp_crop_size = size_bf_outer_pad
    # print('              crop_size = ', tmp_crop_size)
    # print('              reference_5pts = ', tmp_5pts)

    # 3) add outer_padding to make output_size
    reference_5point = tmp_5pts + np.array(outer_padding)
    tmp_crop_size = output_size
    # print('---> STEP3: add outer_padding to make output_size')
    # print('              crop_size = ', tmp_crop_size)
    # print('              reference_5pts = ', tmp_5pts)
    #
    # print('===> end get_reference_facial_points\n')

    return reference_5point


def get_affine_transform_matrix(src_pts, dst_pts):
    tfm = np.float32([[1, 0, 0], [0, 1, 0]])
    n_pts = src_pts.shape[0]
    ones = np.ones((n_pts, 1), src_pts.dtype)
    src_pts_ = np.hstack([src_pts, ones])
    dst_pts_ = np.hstack([dst_pts, ones])

    A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)

    if rank == 3:
        tfm = np.float32([
            [A[0, 0], A[1, 0], A[2, 0]],
            [A[0, 1], A[1, 1], A[2, 1]]
        ])
    elif rank == 2:
        tfm = np.float32([
            [A[0, 0], A[1, 0], 0],
            [A[0, 1], A[1, 1], 0]
        ])

    return tfm


def warp_and_crop_face(src_img,
                       facial_pts,
                       reference_pts=None,
                       crop_size=(96, 112),
                       align_type='smilarity'):
    if reference_pts is None:
        if crop_size[0] == 96 and crop_size[1] == 112:
            reference_pts = REFERENCE_FACIAL_POINTS
        else:
            default_square = False
            inner_padding_factor = 0
            outer_padding = (0, 0)
            output_size = crop_size

            reference_pts = get_reference_facial_points(output_size,
                                                        inner_padding_factor,
                                                        outer_padding,
                                                        default_square)

    ref_pts = np.float32(reference_pts)
    ref_pts_shp = ref_pts.shape
    if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
        raise FaceWarpException(
            'reference_pts.shape must be (K,2) or (2,K) and K>2')

    if ref_pts_shp[0] == 2:
        ref_pts = ref_pts.T

    src_pts = np.float32(facial_pts)
    src_pts_shp = src_pts.shape
    if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
        raise FaceWarpException(
            'facial_pts.shape must be (K,2) or (2,K) and K>2')

    if src_pts_shp[0] == 2:
        src_pts = src_pts.T

    if src_pts.shape != ref_pts.shape:
        raise FaceWarpException(
            'facial_pts and reference_pts must have the same shape')

    if align_type == 'cv2_affine':
        tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
    #        print('cv2.getAffineTransform() returns tfm=\n' + str(tfm))
    elif align_type == 'affine':
        tfm = get_affine_transform_matrix(src_pts, ref_pts)
    #        print('get_affine_transform_matrix() returns tfm=\n' + str(tfm))
    else:
        # tfm = get_similarity_transform_for_cv2(src_pts, ref_pts)
        tform = trans.SimilarityTransform()
        tform.estimate(src_pts, ref_pts)
        tfm = tform.params[0:2, :]

    face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))

    return face_img

In [23]:
!mkdir dataset/train_crop
!mkdir dataset/val_crop

In [24]:
for path in os.listdir('/content/dataset/train'):
  # try:
  #   os.chdir(f'/content/dataset/train_crop/{path}')
  # except:
  os.mkdir(f'/content/dataset/train_crop/{path}')
  os.chdir(f'/content/dataset/train_crop/{path}')
  for link in os.listdir('/content/dataset/train/' + path):
    try:
      originalImage = cv2.imread(f'/content/dataset/train/{path}/{link}')
      landmarks,rec = facial_landmarks(originalImage)
      (x, y, w, h) = face_utils.rect_to_bb(rec[0])
      a = [landmarks[17],landmarks[26],landmarks[33],landmarks[4],landmarks[12]]
      img = warp_and_crop_face(originalImage,a,reference_pts=None,crop_size=(96,112),align_type='')
      cv2.imwrite(f'{link}', img)
    except:
      continue

In [25]:
for path in os.listdir('/content/dataset/val'):
  # try:
  #   os.chdir(f'/content/dataset/train_crop/{path}')
  # except:
  os.mkdir(f'/content/dataset/val_crop/{path}')
  os.chdir(f'/content/dataset/val_crop/{path}')
  for link in os.listdir('/content/dataset/val/' + path):
    try:
      originalImage = cv2.imread(f'/content/dataset/val/{path}/{link}')
      landmarks,rec = facial_landmarks(originalImage)
      (x, y, w, h) = face_utils.rect_to_bb(rec[0])
      a = [landmarks[17],landmarks[26],landmarks[33],landmarks[4],landmarks[12]]
      img = warp_and_crop_face(originalImage,a,reference_pts=None,crop_size=(96,112),align_type='')
      cv2.imwrite(f'{link}', img)
    except:
      continue

In [26]:
dataset_train = datasets.ImageFolder(root="/content/dataset/train_crop")
dataset_val = datasets.ImageFolder(root="/content/dataset/val_crop")

X_train, y_train = dataset_to_embeddings(dataset_train, mtcnn, facenet)
X_test, y_test = dataset_to_embeddings(dataset_val, mtcnn, facenet)

X_train_class_idx = dataset_train.class_to_idx
X_test_class_idx = dataset_val.class_to_idx

embeddings, labels, class_to_idx = X_train, y_train, X_train_class_idx

/content/dataset/train_crop/Hellas/z3340946320295_903d7490fc28499652fb1db63c8139ca.jpg
/content/dataset/train_crop/Hellas/z3340946322989_55b6073bf74e82bd388e8cc5f61c4928.jpg
/content/dataset/train_crop/Hellas/z3340946329487_0bc341657d4cd6e5f8c4a3cdced6a473.jpg
/content/dataset/train_crop/Hellas/z3340946337250_e2952e7c6f55d4130924103a4af7d69e.jpg
/content/dataset/train_crop/Hellas/z3340946337997_7cc8336b72385b95bbbe45e76f3850b0.jpg
/content/dataset/train_crop/Hellas/z3340946351201_7b03fe2053ff3d4c519fd693e296431e.jpg
/content/dataset/train_crop/Hellas/z3340946352121_e7b6fa28181f9754e872351bad0612ce.jpg
/content/dataset/train_crop/Hellas/z3340946357038_65e005498d95bed1ebf6f6923ebfc0a1.jpg
/content/dataset/train_crop/Hellas/z3340946362560_837363a994db83ca8273f48b8171737f.jpg
/content/dataset/train_crop/Hellas/z3340946363323_952948f29ecda87e075534f1173d164a.jpg
/content/dataset/train_crop/Hellas/z3340946364863_64f4fa3b17ba38ca25eac6a04d0a4d76.jpg
/content/dataset/train_crop/Hellas/z3340946

In [27]:
!rm -r /content/facenet_emb
!mkdir /content/facenet_emb

rm: cannot remove '/content/facenet_emb': No such file or directory


In [28]:
np.save('/content/facenet_emb/facenet.npy', embeddings)

In [29]:
!pip install autofaiss

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting autofaiss
  Downloading autofaiss-2.15.8-py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.1/70.1 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fire<0.5.0,>=0.4.0 (from autofaiss)
  Downloading fire-0.4.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.7/87.7 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting embedding-reader<2,>=1.5.1 (from autofaiss)
  Downloading embedding_reader-1.5.1-py3-none-any.whl (18 kB)
Collecting faiss-cpu<2,>=1 (from autofaiss)
  Downloading faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m58.3 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: fire
  Buil

In [30]:
!autofaiss build_index --embeddings="/content/facenet_emb" \
                    --index_path="/content/knn_facenet.index" \
                    --index_infos_path="/content/infos_facenet.json" \
                    --metric_type="ip" \
                    --max_index_query_time_ms=10 \
                    --max_index_memory_usage="4GB"

2023-06-19 09:30:18,395 [INFO]: Using 2 omp threads (processes), consider increasing --nb_cores if you have more
2023-06-19 09:30:18,395 [INFO]: Launching the whole pipeline 06/19/2023, 09:30:18
2023-06-19 09:30:18,395 [INFO]: Reading total number of vectors and dimension 06/19/2023, 09:30:18
  0% 0/1 [00:00<?, ?it/s]100% 1/1 [00:00<00:00, 10565.00it/s]
2023-06-19 09:30:18,465 [INFO]: There are 110 embeddings of dim 512
2023-06-19 09:30:18,466 [INFO]: >>> Finished "Reading total number of vectors and dimension" in 0.0704 secs
2023-06-19 09:30:18,466 [INFO]: 	Compute estimated construction time of the index 06/19/2023, 09:30:18
2023-06-19 09:30:18,466 [INFO]: 		-> Train: 16.7 minutes
2023-06-19 09:30:18,466 [INFO]: 		-> Add: 0.0 seconds
2023-06-19 09:30:18,466 [INFO]: 		Total: 16.7 minutes
2023-06-19 09:30:18,466 [INFO]: 	>>> Finished "Compute estimated construction time of the index" in 0.0002 secs
2023-06-19 09:30:18,466 [INFO]: 	Checking that your have enough memory available to cr

In [31]:
import faiss
import torch
import os
import pandas as pd
from collections import defaultdict

In [32]:
ind = faiss.read_index("/content/knn_facenet.index")

In [37]:
preds = []
k = 3
for image_emb in X_test:
    D, I = ind.search(image_emb[np.newaxis, :], k)
    print(D,I)
    i_candidate = defaultdict(int)
    for D_ele,I_ele in zip(D[0],I[0]):
      # if D_ele > 0.7:
        cls = labels[I_ele]
        i_candidate[cls] += 1
    # try:
    key_with_max_value = max(i_candidate, key=lambda k: i_candidate[k])
    # except:
    #   print("error")
    #   key_with_max_value = 0

    preds.append(key_with_max_value)

[[0.9244436  0.910566   0.90784436]] [[3 5 2]]
[[0.95380974 0.9394542  0.9340483 ]] [[11 13 14]]
[[0.81213397 0.81093276 0.80566835]] [[ 8 20 17]]
[[0.7024101  0.69576365 0.67275786]] [[82 96 44]]
[[0.8273859  0.8075385  0.77624667]] [[32 29 22]]
[[0.89393556 0.73590124 0.72063905]] [[25 33 36]]
[[0.79905057 0.78677183 0.7855057 ]] [[40 23 28]]
[[0.8883059 0.8100925 0.7761458]] [[40 23 28]]
[[0.9178004  0.89162153 0.8780757 ]] [[65 54 50]]
[[0.80750763 0.8058837  0.7815418 ]] [[60 62 54]]
[[0.89923334 0.8953867  0.8267541 ]] [[47 58 55]]
[[0.8883485 0.8808952 0.8447958]] [[63 54 50]]
[[0.884149   0.8391694  0.83591574]] [[46 48 45]]
[[0.9574333  0.95743054 0.9489771 ]] [[69 77 71]]
[[0.9649584  0.96062565 0.9598875 ]] [[85 86 76]]
[[0.90813273 0.8930341  0.8795388 ]] [[81 80 68]]
[[0.825261   0.8249568  0.75583285]] [[70 83 78]]
[[0.94777375 0.94063264 0.935001  ]] [[67 79 69]]
[[0.88629234 0.8796291  0.84141845]] [[90 93 94]]
[[0.95361906 0.9443839  0.90837586]] [[109  95  92]]
[[0.89

In [34]:
from sklearn import metrics
from sklearn.metrics import accuracy_score

In [38]:
test_class_idx = dataset_val.class_to_idx

idx_to_class = {v: k for k, v in test_class_idx.items()}
print(idx_to_class)

target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(target_names)

{0: 'Hellas', 1: 'Huy', 2: 'Nguyen', 3: 'Phong', 4: 'Phu'}
['Hellas', 'Huy', 'Nguyen', 'Phong', 'Phu']


In [39]:
target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(metrics.classification_report(y_test, preds, target_names=target_names))

              precision    recall  f1-score   support

      Hellas       1.00      1.00      1.00         3
         Huy       1.00      0.80      0.89         5
      Nguyen       1.00      1.00      1.00         5
       Phong       0.83      1.00      0.91         5
         Phu       1.00      1.00      1.00         5

    accuracy                           0.96        23
   macro avg       0.97      0.96      0.96        23
weighted avg       0.96      0.96      0.96        23

