In [1]:
!pip install facenet-pytorch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting facenet-pytorch
  Downloading facenet_pytorch-2.5.3-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m64.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: facenet-pytorch
Successfully installed facenet-pytorch-2.5.3


In [2]:
from facenet_pytorch import MTCNN, InceptionResnetV1, fixed_image_standardization, training
import torch
from torch.utils.data import DataLoader, SubsetRandomSampler
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
import numpy as np
import os
from facenet_pytorch import MTCNN, InceptionResnetV1, extract_face
from sklearn import svm
from PIL import Image
from sklearn import metrics
from sklearn.metrics import accuracy_score

**Mounted Drive**

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip /content/drive/MyDrive/faceRecognition/dataset/5_celeb.zip -d dataset

**Util Function**

In [5]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

facenet = InceptionResnetV1(pretrained='vggface2').eval()
facenet = facenet.to(device)

  0%|          | 0.00/107M [00:00<?, ?B/s]

In [6]:
def whitens(img):
    mean = img.mean()
    std = img.std()
    std_adj = std.clamp(min=1.0 / (float(img.numel()) ** 0.5))
    y = (img - mean) / std_adj
    return y

def extract_features(mtcnn, facenet, img):
    img = img.to(device)
    img = transforms.ToPILImage()(img.squeeze_(0))
    bbs, _ = mtcnn.detect(img)
    if bbs is None:
        # if no face is detected
        return None, None

    faces = torch.stack([extract_face(img, bb) for bb in bbs])
    embeddings = facenet(whitens(faces)).detach().numpy()

    return bbs, embeddings

def dataset_to_embeddings(dataset, mtcnn, facenet):
    transform = transforms.Compose([
        transforms.Resize(160),
        transforms.ToTensor()
    ])

    embeddings = []
    labels = []
    for img_path, label in dataset.samples:
        print(img_path)

        _, embedding = extract_features(mtcnn, facenet, transform(Image.open(img_path).convert('RGB')).unsqueeze_(0))
        if embedding is None:
            print("Could not find face on {}".format(img_path))
            continue
        if embedding.shape[0] > 1:
            print("Multiple faces detected for {}, taking one with highest probability".format(img_path))
            embedding = embedding[0, :]
        embeddings.append(embedding.flatten())
        labels.append(label)

    return np.stack(embeddings), labels

def train(embeddings, labels):
    clf = svm.SVC(probability=True)
    clf.fit(embeddings, labels)
    return clf

**Dataset**

In [7]:
# Create the ImageFolder dataset
dataset_train = datasets.ImageFolder(root="/content/dataset/train")
dataset_val = datasets.ImageFolder(root="/content/dataset/val")

In [8]:
transform = transforms.Compose([
    transforms.Resize(160),
    transforms.ToTensor()
])

test = extract_features(mtcnn, facenet, transform(dataset_train[30][0]))

In [9]:
test[1].shape

(1, 512)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

transform = transforms.Compose([
    transforms.Resize(160),
    # transforms.ToTensor()
])

def plot_img_bbox(img,target):
    # plot the image and bboxes
    # Bounding boxes are defined as follows: x-min y-min width height
    fig, a = plt.subplots(1,1)
    fig.set_size_inches(5,5)
    a.imshow(img)
    for box in target:
        x, y, width, height  = box[0], box[1], box[2]-box[0], box[3]-box[1]
        rect = patches.Rectangle((x, y),
                                 width, height,
                                 linewidth = 2,
                                 edgecolor = 'r',
                                 facecolor = 'none')

        # Draw the bounding box on top of the image
        a.add_patch(rect)
    plt.show()

# plotting the image with bboxes. Feel free to change the index
img = transform(dataset_train[30][0])
target = test[0]
print(test[1])
plot_img_bbox(img,target)

**Facenet + SVM**

In [None]:
X_train, y_train = dataset_to_embeddings(dataset_train, mtcnn, facenet)
X_test, y_test = dataset_to_embeddings(dataset_val, mtcnn, facenet)

X_train_class_idx = dataset_train.class_to_idx
X_test_class_idx = dataset_val.class_to_idx

embeddings, labels, class_to_idx = X_train, y_train, X_train_class_idx

In [None]:
clf = train(embeddings, labels)

In [None]:
len(X_test)

25

In [None]:
idx_to_class = {v: k for k, v in class_to_idx.items()}
print(idx_to_class)

target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(metrics.classification_report(labels, clf.predict(embeddings), target_names=target_names))

# Predict labels for validation set and calculate accuracy
y_val_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_val_pred)
print('Validation Accuracy: {:.2f}%'.format(accuracy*100))

{0: 'ben_afflek', 1: 'elton_john', 2: 'jerry_seinfeld', 3: 'madonna', 4: 'mindy_kaling'}
                precision    recall  f1-score   support

    ben_afflek       1.00      1.00      1.00        14
    elton_john       1.00      1.00      1.00        16
jerry_seinfeld       1.00      1.00      1.00        21
       madonna       1.00      1.00      1.00        19
  mindy_kaling       1.00      1.00      1.00        22

      accuracy                           1.00        92
     macro avg       1.00      1.00      1.00        92
  weighted avg       1.00      1.00      1.00        92

Validation Accuracy: 100.00%


#Facenet Only

In [None]:
print(len(idx_to_class))

5


In [None]:
resnet = InceptionResnetV1(
    classify=True,
    pretrained='vggface2',
    num_classes=len(idx_to_class)
).to(device)

**Train**

In [12]:
data_dir = '/content/dataset/train'
dataset_train = datasets.ImageFolder(data_dir, transform=transforms.Resize((512,512)))
dataset_train.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
        for p, _ in dataset_train.samples
]

In [13]:
batch_size = 32
epochs = 20
workers = 0 if os.name == 'nt' else 8

In [14]:
train_loader = DataLoader(
    dataset_train,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

for i, (x, y) in enumerate(train_loader):
    print(x)
    print(mtcnn(x))
    break
    # mtcnn(x, save_path=y)
    # print('\rBatch {} of {}'.format(i + 1, len(train_loader)), end='')



[<PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DE9BA90>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCEEE30>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCEECE0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCEECB0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCEEDA0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCED270>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCEEC50>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCEED10>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCEEE60>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCEEE00>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCEEDD0>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCEED70>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCEED40>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCEEE90>, <PIL.Image.Image image mode=RGB size=512x512 at 0x7F6F6DCEEEC0>, <PIL.Image.Image image m

**Val**

In [None]:
data_dir = '/content/dataset/val'
dataset_val = datasets.ImageFolder(data_dir, transform=transforms.Resize((512,512)))
dataset_val.samples = [
    (p, p.replace(data_dir, data_dir + '_cropped'))
        for p, _ in dataset_val.samples
]

In [None]:
val_loader = DataLoader(
    dataset_val,
    num_workers=workers,
    batch_size=batch_size,
    collate_fn=training.collate_pil
)

for i, (x, y) in enumerate(val_loader):
    mtcnn(x, save_path=y)
    print('\rBatch {} of {}'.format(i + 1, len(val_loader)), end='')

NameError: ignored

**Adapt dataset**

In [None]:
optimizer = optim.Adam(resnet.parameters(), lr=0.001)
scheduler = MultiStepLR(optimizer, [5, 10])

trans = transforms.Compose([
    np.float32,
    transforms.ToTensor(),
    fixed_image_standardization
])

train_dataset = datasets.ImageFolder('/content/dataset/train_cropped', transform=trans)
img_inds_train = np.arange(len(train_dataset))
np.random.shuffle(img_inds_train)

val_dataset = datasets.ImageFolder('/content/dataset/val_cropped', transform=trans)
img_inds_val = np.arange(len(val_dataset))
np.random.shuffle(img_inds_val)

train_loader = DataLoader(
    train_dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(img_inds_train)
)
val_loader = DataLoader(
    val_dataset,
    num_workers=workers,
    batch_size=batch_size,
    sampler=SubsetRandomSampler(img_inds_val)
)

NameError: ignored

**Training**

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()
metrics = {
    'fps': training.BatchTimer(),
    'acc': training.accuracy
}

In [None]:
writer = SummaryWriter()
writer.iteration, writer.interval = 0, 10

print('\n\nInitial')
print('-' * 10)
resnet.eval()
training.pass_epoch(
    resnet, loss_fn, val_loader,
    batch_metrics=metrics, show_running=True, device=device,
    writer=writer
)

for epoch in range(epochs):
    print('\nEpoch {}/{}'.format(epoch + 1, epochs))
    print('-' * 10)

    resnet.train()
    training.pass_epoch(
        resnet, loss_fn, train_loader, optimizer, scheduler,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

    resnet.eval()
    training.pass_epoch(
        resnet, loss_fn, val_loader,
        batch_metrics=metrics, show_running=True, device=device,
        writer=writer
    )

writer.close()



Initial
----------
Valid |     1/1    | loss:    1.0637 | fps:    0.6090 | acc:    0.7692   

Epoch 1/20
----------
Train |     3/3    | loss:    0.0054 | fps:    3.5250 | acc:    1.0000   
Valid |     1/1    | loss:    0.9858 | fps:    7.6993 | acc:    0.8462   

Epoch 2/20
----------
Train |     3/3    | loss:    0.0136 | fps:    3.5040 | acc:    1.0000   
Valid |     1/1    | loss:    0.9429 | fps:    7.5642 | acc:    0.8846   

Epoch 3/20
----------
Train |     3/3    | loss:    0.0113 | fps:    3.5109 | acc:    1.0000   
Valid |     1/1    | loss:    0.8892 | fps:    7.2661 | acc:    0.8846   

Epoch 4/20
----------
Train |     3/3    | loss:    0.0231 | fps:    3.5237 | acc:    0.9896   
Valid |     1/1    | loss:    0.8437 | fps:    6.6503 | acc:    0.8462   

Epoch 5/20
----------
Train |     3/3    | loss:    0.0087 | fps:    3.6195 | acc:    1.0000   
Valid |     1/1    | loss:    0.8212 | fps:    6.5171 | acc:    0.8462   

Epoch 6/20
----------
Train |     3/3    | loss: 

KeyboardInterrupt: ignored

In [None]:
resnet.eval()
predict = []
for x,y in val_loader:
  pred = resnet(x).detach().cpu()
  label = y



In [None]:
pred = np.argmax(pred,axis = 1)

In [None]:
from sklearn import metrics
from sklearn.metrics import accuracy_score

In [None]:
target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(metrics.classification_report(label, pred, target_names=target_names))

                precision    recall  f1-score   support

    ben_afflek       0.80      0.80      0.80         5
    elton_john       0.57      1.00      0.73         4
jerry_seinfeld       1.00      0.71      0.83         7
       madonna       1.00      0.80      0.89         5
  mindy_kaling       1.00      1.00      1.00         5

      accuracy                           0.85        26
     macro avg       0.87      0.86      0.85        26
  weighted avg       0.90      0.85      0.85        26



#Faiss

**AutoFaiss - CLIP + FAISS**

In [None]:
!pip install clip-retrieval autofaiss

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting clip-retrieval
  Downloading clip_retrieval-2.37.0-py3-none-any.whl (343 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m343.4/343.4 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting autofaiss
  Downloading autofaiss-2.15.8-py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.1/70.1 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting img2dataset<2,>=1.25.5 (from clip-retrieval)
  Downloading img2dataset-1.41.0-py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.6/40.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting clip-anytorch<3,>=2.5.0 (from clip-retrieval)
  Downloading clip_anytorch-2.5.2-py3-none-any.whl (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
Collect

In [None]:
!clip-retrieval inference --input_dataset /content/dataset/train_cropped --output_folder /content/dataset/train_embedding

The number of samples has been estimated to be 91
Starting the worker
dataset is 30
Starting work on task 0
100%|███████████████████████████████████████| 354M/354M [00:05<00:00, 59.4MiB/s]
warming up with batch size 256 on cpu
done warming up in 206.28352618217468s
 sample_per_sec 3 ; sample_count 91 

In [None]:
!autofaiss build_index --embeddings="/content/dataset/train_embedding/img_emb" \
                    --index_path="/content/knn.index" \
                    --index_infos_path="/content/infos.json" \
                    --metric_type="ip" \
                    --max_index_query_time_ms=10 \
                    --max_index_memory_usage="4GB"

2023-06-18 14:42:24,239 [INFO]: Using 2 omp threads (processes), consider increasing --nb_cores if you have more
2023-06-18 14:42:24,240 [INFO]: Launching the whole pipeline 06/18/2023, 14:42:24
2023-06-18 14:42:24,240 [INFO]: Reading total number of vectors and dimension 06/18/2023, 14:42:24
100% 1/1 [00:00<00:00, 9776.93it/s]
2023-06-18 14:42:24,569 [INFO]: There are 91 embeddings of dim 512
2023-06-18 14:42:24,575 [INFO]: >>> Finished "Reading total number of vectors and dimension" in 0.3298 secs
2023-06-18 14:42:24,576 [INFO]: 	Compute estimated construction time of the index 06/18/2023, 14:42:24
2023-06-18 14:42:24,577 [INFO]: 		-> Train: 16.7 minutes
2023-06-18 14:42:24,577 [INFO]: 		-> Add: 0.0 seconds
2023-06-18 14:42:24,577 [INFO]: 		Total: 16.7 minutes
2023-06-18 14:42:24,584 [INFO]: 	>>> Finished "Compute estimated construction time of the index" in 0.0007 secs
2023-06-18 14:42:24,585 [INFO]: 	Checking that your have enough memory available to create the index 06/18/2023, 14

**Search**

In [None]:
import faiss
import torch
import clip
import os
import pandas as pd

In [None]:
df = pd.read_parquet("/content/dataset/train_embedding/metadata/metadata_0.parquet")
image_list = df["image_path"].tolist()
ind = faiss.read_index("/content/knn.index")

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

**Set up val dataset**

In [None]:
trans = transforms.Compose([
    np.float32,
    # transforms.ToTensor(),
    fixed_image_standardization
])

val_dataset = datasets.ImageFolder('/content/dataset/val_cropped')
test_class_idx = val_dataset.class_to_idx

In [None]:
idx_to_class = {v: k for k, v in test_class_idx.items()}
print(idx_to_class)

target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(target_names)

{0: 'ben_afflek', 1: 'elton_john', 2: 'jerry_seinfeld', 3: 'madonna', 4: 'mindy_kaling'}
['ben_afflek', 'elton_john', 'jerry_seinfeld', 'madonna', 'mindy_kaling']


In [None]:
from PIL import Image
from collections import defaultdict

In [None]:
image_tensor = preprocess(Image.open('/content/dataset/val/ben_afflek/httpabsolumentgratuitfreefrimagesbenaffleckjpg.jpg'))
image_features = model.encode_image(torch.unsqueeze(image_tensor.to(device), dim=0))
image_features /= image_features.norm(dim=-1, keepdim=True)
image_embeddings = image_features.cpu().detach().numpy().astype('float32')
D, I = ind.search(image_embeddings, 5)

In [None]:
labels = []
preds = []
k = 3
for x,y in val_dataset:
    image_tensor = preprocess(x)
    image_features = model.encode_image(torch.unsqueeze(image_tensor.to(device), dim=0))
    image_features /= image_features.norm(dim=-1, keepdim=True)
    image_embeddings = image_features.cpu().detach().numpy().astype('float32')
    D, I = ind.search(image_embeddings, k)
    # print(D,I)
    i_candidate = defaultdict(int)
    for D_ele,I_ele in zip(D[0],I[0]):
      if D_ele > 0.7:
        name = image_list[I_ele].split('/')[-2]
        i_candidate[test_class_idx[name]] += 1
    key_with_max_value = max(i_candidate, key=lambda k: i_candidate[k])

    preds.append(key_with_max_value)
    labels.append(y)

In [None]:
from sklearn import metrics
from sklearn.metrics import accuracy_score

In [None]:
target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(metrics.classification_report(labels, preds, target_names=target_names))

                precision    recall  f1-score   support

    ben_afflek       0.83      1.00      0.91         5
    elton_john       1.00      1.00      1.00         4
jerry_seinfeld       1.00      0.71      0.83         7
       madonna       1.00      1.00      1.00         5
  mindy_kaling       0.83      1.00      0.91         5

      accuracy                           0.92        26
     macro avg       0.93      0.94      0.93        26
  weighted avg       0.94      0.92      0.92        26



**Facenet + Faiss**

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device
)

facenet = InceptionResnetV1(pretrained='vggface2').eval()
facenet = facenet.to(device)

In [None]:
# import numpy as np
# data = np.load('/content/dataset/train_embedding/img_emb/img_emb_0.npy')
#data.shape

In [None]:
embeddings, labels, class_to_idx = X_train, y_train, X_train_class_idx

In [None]:
!rm -r facenet_emb
!mkdir facenet_emb

rm: cannot remove 'facenet_emb': No such file or directory


In [None]:
np.save('/content/facenet_emb/facenet.npy', embeddings)

In [None]:
!autofaiss build_index --embeddings="/content/facenet_emb" \
                    --index_path="/content/knn_facenet.index" \
                    --index_infos_path="/content/infos_facenet.json" \
                    --metric_type="ip" \
                    --max_index_query_time_ms=10 \
                    --max_index_memory_usage="4GB"

2023-06-18 15:38:47,479 [INFO]: Using 2 omp threads (processes), consider increasing --nb_cores if you have more
2023-06-18 15:38:47,479 [INFO]: Launching the whole pipeline 06/18/2023, 15:38:47
2023-06-18 15:38:47,479 [INFO]: Reading total number of vectors and dimension 06/18/2023, 15:38:47
  0% 0/1 [00:00<?, ?it/s]100% 1/1 [00:00<00:00, 14122.24it/s]
2023-06-18 15:38:47,519 [INFO]: There are 93 embeddings of dim 512
2023-06-18 15:38:47,519 [INFO]: >>> Finished "Reading total number of vectors and dimension" in 0.0394 secs
2023-06-18 15:38:47,519 [INFO]: 	Compute estimated construction time of the index 06/18/2023, 15:38:47
2023-06-18 15:38:47,519 [INFO]: 		-> Train: 16.7 minutes
2023-06-18 15:38:47,519 [INFO]: 		-> Add: 0.0 seconds
2023-06-18 15:38:47,519 [INFO]: 		Total: 16.7 minutes
2023-06-18 15:38:47,519 [INFO]: 	>>> Finished "Compute estimated construction time of the index" in 0.0002 secs
2023-06-18 15:38:47,519 [INFO]: 	Checking that your have enough memory available to cre

In [None]:
ind = faiss.read_index("/content/knn_facenet.index")

In [None]:
print(labels)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]


In [None]:
preds = []
k = 5
for image_emb in X_test:
    D, I = ind.search(image_emb[np.newaxis, :], k)
    print(D,I)
    i_candidate = defaultdict(int)
    for D_ele,I_ele in zip(D[0],I[0]):
      # if D_ele > 0.7:
        cls = labels[I_ele]
        i_candidate[cls] += 1
    # try:
    key_with_max_value = max(i_candidate, key=lambda k: i_candidate[k])
    # except:
    #   print("error")
    #   key_with_max_value = 0

    preds.append(key_with_max_value)

[[0.741638   0.6051879  0.5813914  0.5583176  0.55315816]] [[ 9  0  8  1 11]]
[[0.7357062  0.72427213 0.7227762  0.6088511  0.57897544]] [[ 4  1  7 13  6]]
[[0.9597626  0.5537939  0.5434317  0.5231418  0.50185114]] [[ 5  9  7  1 11]]
[[0.4220538  0.34983587 0.29222882 0.25649747 0.2458143 ]] [[ 0  3  9 23  5]]
[[0.6477177  0.5769872  0.5712532  0.53599465 0.5292058 ]] [[10  9  5 11  0]]
[[0.8913976  0.8131018  0.7927766  0.69853055 0.68443555]] [[21 19 28 15 16]]
[[0.54405385 0.5172003  0.5000552  0.49620724 0.4705905 ]] [[30 57 29 54 69]]
[[0.72608554 0.7127732  0.6990677  0.66552025 0.6223527 ]] [[19 21 28 29 17]]
[[0.80854964 0.7383498  0.7068225  0.69533753 0.64965105]] [[21 16 19 28 29]]
[[0.56915426 0.5687758  0.54122424 0.49597368 0.4886323 ]] [[28 16 27 21 19]]
[[0.77401054 0.76777744 0.76656365 0.7366657  0.72549856]] [[31 49 35 48 45]]
[[0.7221998 0.7129574 0.6962161 0.680742  0.6761063]] [[37 45 43 40 33]]
[[0.7972399  0.79268676 0.79146916 0.78686774 0.7819207 ]] [[49 40 31

In [None]:
preds

[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4]

In [None]:
target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(metrics.classification_report(y_test, preds, target_names=target_names))

                precision    recall  f1-score   support

    ben_afflek       1.00      1.00      1.00         5
    elton_john       1.00      0.80      0.89         5
jerry_seinfeld       1.00      1.00      1.00         5
       madonna       0.83      1.00      0.91         5
  mindy_kaling       1.00      1.00      1.00         5

      accuracy                           0.96        25
     macro avg       0.97      0.96      0.96        25
  weighted avg       0.97      0.96      0.96        25



In [None]:
preds

[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4]

**Alignment**

In [15]:
!pip install --upgrade imutils

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [16]:
!pip install dlib

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [17]:
from imutils import face_utils
import numpy as np
import argparse
import imutils
import dlib
import cv2
from google.colab.patches import cv2_imshow
from imutils.face_utils import FaceAligner
from imutils.face_utils import rect_to_bb
import math
import matplotlib.pyplot as plt

In [18]:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('/content/drive/MyDrive/faceRecognition/dlib/shape_predictor_68_face_landmarks.dat')
predictor2 = dlib.shape_predictor('/content/drive/MyDrive/faceRecognition/dlib/shape_predictor_81_face_landmarks.dat')

In [19]:
def facial_landmarks(image):
    try:
        grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    except:
        grayscale_image = image

    # array of rectangles surrounding faces detected
    rectangles = detector(grayscale_image, 1)

    # If at least one face is detected, find its landmarks
    if len(rectangles) > 0:
        # Get 68 landmark points
        faceLandmarks = predictor(grayscale_image, rectangles[0])
        faceLandmarks = face_utils.shape_to_np(faceLandmarks)
        return faceLandmarks,rectangles
    else:
        return None

In [20]:
import cv2
import numpy as np
from skimage import transform as trans

__file__ = 'test'

# reference facial points, a list of coordinates (x,y)
REFERENCE_FACIAL_POINTS = [
    [30.29459953, 51.69630051],
    [65.53179932, 51.50139999],
    [48.02519989, 71.73660278],
    [33.54930115, 92.3655014],
    [62.72990036, 92.20410156]
]

DEFAULT_CROP_SIZE = (96, 112)


class FaceWarpException(Exception):
    def __str__(self):
        return 'In File {}:{}'.format(
            __file__, super.__str__(self))


def get_reference_facial_points(output_size=None,
                                inner_padding_factor=0.0,
                                outer_padding=(0, 0),
                                default_square=False):
    tmp_5pts = np.array(REFERENCE_FACIAL_POINTS)
    tmp_crop_size = np.array(DEFAULT_CROP_SIZE)

    # 0) make the inner region a square
    if default_square:
        size_diff = max(tmp_crop_size) - tmp_crop_size
        tmp_5pts += size_diff / 2
        tmp_crop_size += size_diff

    # print('---> default:')
    # print('              crop_size = ', tmp_crop_size)
    # print('              reference_5pts = ', tmp_5pts)

    if (output_size and
            output_size[0] == tmp_crop_size[0] and
            output_size[1] == tmp_crop_size[1]):
        print('output_size == DEFAULT_CROP_SIZE {}: return default reference points'.format(tmp_crop_size))
        return tmp_5pts

    if (inner_padding_factor == 0 and
            outer_padding == (0, 0)):
        if output_size is None:
            print('No paddings to do: return default reference points')
            return tmp_5pts
        else:
            raise FaceWarpException(
                'No paddings to do, output_size must be None or {}'.format(tmp_crop_size))

    # check output size
    if not (0 <= inner_padding_factor <= 1.0):
        raise FaceWarpException('Not (0 <= inner_padding_factor <= 1.0)')

    if ((inner_padding_factor > 0 or outer_padding[0] > 0 or outer_padding[1] > 0)
            and output_size is None):
        output_size = tmp_crop_size * \
                      (1 + inner_padding_factor * 2).astype(np.int32)
        output_size += np.array(outer_padding)
        print('              deduced from paddings, output_size = ', output_size)

    if not (outer_padding[0] < output_size[0]
            and outer_padding[1] < output_size[1]):
        raise FaceWarpException('Not (outer_padding[0] < output_size[0]'
                                'and outer_padding[1] < output_size[1])')

    # 1) pad the inner region according inner_padding_factor
    # print('---> STEP1: pad the inner region according inner_padding_factor')
    if inner_padding_factor > 0:
        size_diff = tmp_crop_size * inner_padding_factor * 2
        tmp_5pts += size_diff / 2
        tmp_crop_size += np.round(size_diff).astype(np.int32)

    # print('              crop_size = ', tmp_crop_size)
    # print('              reference_5pts = ', tmp_5pts)

    # 2) resize the padded inner region
    # print('---> STEP2: resize the padded inner region')
    size_bf_outer_pad = np.array(output_size) - np.array(outer_padding) * 2
    # print('              crop_size = ', tmp_crop_size)
    # print('              size_bf_outer_pad = ', size_bf_outer_pad)

    if size_bf_outer_pad[0] * tmp_crop_size[1] != size_bf_outer_pad[1] * tmp_crop_size[0]:
        raise FaceWarpException('Must have (output_size - outer_padding)'
                                '= some_scale * (crop_size * (1.0 + inner_padding_factor)')

    scale_factor = size_bf_outer_pad[0].astype(np.float32) / tmp_crop_size[0]
    # print('              resize scale_factor = ', scale_factor)
    tmp_5pts = tmp_5pts * scale_factor
    #    size_diff = tmp_crop_size * (scale_factor - min(scale_factor))
    #    tmp_5pts = tmp_5pts + size_diff / 2
    tmp_crop_size = size_bf_outer_pad
    # print('              crop_size = ', tmp_crop_size)
    # print('              reference_5pts = ', tmp_5pts)

    # 3) add outer_padding to make output_size
    reference_5point = tmp_5pts + np.array(outer_padding)
    tmp_crop_size = output_size
    # print('---> STEP3: add outer_padding to make output_size')
    # print('              crop_size = ', tmp_crop_size)
    # print('              reference_5pts = ', tmp_5pts)
    #
    # print('===> end get_reference_facial_points\n')

    return reference_5point


def get_affine_transform_matrix(src_pts, dst_pts):
    tfm = np.float32([[1, 0, 0], [0, 1, 0]])
    n_pts = src_pts.shape[0]
    ones = np.ones((n_pts, 1), src_pts.dtype)
    src_pts_ = np.hstack([src_pts, ones])
    dst_pts_ = np.hstack([dst_pts, ones])

    A, res, rank, s = np.linalg.lstsq(src_pts_, dst_pts_)

    if rank == 3:
        tfm = np.float32([
            [A[0, 0], A[1, 0], A[2, 0]],
            [A[0, 1], A[1, 1], A[2, 1]]
        ])
    elif rank == 2:
        tfm = np.float32([
            [A[0, 0], A[1, 0], 0],
            [A[0, 1], A[1, 1], 0]
        ])

    return tfm


def warp_and_crop_face(src_img,
                       facial_pts,
                       reference_pts=None,
                       crop_size=(96, 112),
                       align_type='smilarity'):
    if reference_pts is None:
        if crop_size[0] == 96 and crop_size[1] == 112:
            reference_pts = REFERENCE_FACIAL_POINTS
        else:
            default_square = False
            inner_padding_factor = 0
            outer_padding = (0, 0)
            output_size = crop_size

            reference_pts = get_reference_facial_points(output_size,
                                                        inner_padding_factor,
                                                        outer_padding,
                                                        default_square)

    ref_pts = np.float32(reference_pts)
    ref_pts_shp = ref_pts.shape
    if max(ref_pts_shp) < 3 or min(ref_pts_shp) != 2:
        raise FaceWarpException(
            'reference_pts.shape must be (K,2) or (2,K) and K>2')

    if ref_pts_shp[0] == 2:
        ref_pts = ref_pts.T

    src_pts = np.float32(facial_pts)
    src_pts_shp = src_pts.shape
    if max(src_pts_shp) < 3 or min(src_pts_shp) != 2:
        raise FaceWarpException(
            'facial_pts.shape must be (K,2) or (2,K) and K>2')

    if src_pts_shp[0] == 2:
        src_pts = src_pts.T

    if src_pts.shape != ref_pts.shape:
        raise FaceWarpException(
            'facial_pts and reference_pts must have the same shape')

    if align_type == 'cv2_affine':
        tfm = cv2.getAffineTransform(src_pts[0:3], ref_pts[0:3])
    #        print('cv2.getAffineTransform() returns tfm=\n' + str(tfm))
    elif align_type == 'affine':
        tfm = get_affine_transform_matrix(src_pts, ref_pts)
    #        print('get_affine_transform_matrix() returns tfm=\n' + str(tfm))
    else:
        # tfm = get_similarity_transform_for_cv2(src_pts, ref_pts)
        tform = trans.SimilarityTransform()
        tform.estimate(src_pts, ref_pts)
        tfm = tform.params[0:2, :]

    face_img = cv2.warpAffine(src_img, tfm, (crop_size[0], crop_size[1]))

    return face_img

In [24]:
!mkdir dataset/train_crop
!mkdir dataset/val_crop

In [32]:
for path in os.listdir('/content/dataset/train'):
  # try:
  #   os.chdir(f'/content/dataset/train_crop/{path}')
  # except:
  os.mkdir(f'/content/dataset/train_crop/{path}')
  os.chdir(f'/content/dataset/train_crop/{path}')
  for link in os.listdir('/content/dataset/train/' + path):
    try:
      originalImage = cv2.imread(f'/content/dataset/train/{path}/{link}')
      landmarks,rec = facial_landmarks(originalImage)
      (x, y, w, h) = face_utils.rect_to_bb(rec[0])
      a = [landmarks[17],landmarks[26],landmarks[33],landmarks[4],landmarks[12]]
      img = warp_and_crop_face(originalImage,a,reference_pts=None,crop_size=(96,112),align_type='')
      cv2.imwrite(f'{link}', img)
    except:
      continue

In [33]:
for path in os.listdir('/content/dataset/val'):
  # try:
  #   os.chdir(f'/content/dataset/train_crop/{path}')
  # except:
  os.mkdir(f'/content/dataset/val_crop/{path}')
  os.chdir(f'/content/dataset/val_crop/{path}')
  for link in os.listdir('/content/dataset/val/' + path):
    try:
      originalImage = cv2.imread(f'/content/dataset/val/{path}/{link}')
      landmarks,rec = facial_landmarks(originalImage)
      (x, y, w, h) = face_utils.rect_to_bb(rec[0])
      a = [landmarks[17],landmarks[26],landmarks[33],landmarks[4],landmarks[12]]
      img = warp_and_crop_face(originalImage,a,reference_pts=None,crop_size=(96,112),align_type='')
      cv2.imwrite(f'{link}', img)
    except:
      continue

In [34]:
dataset_train = datasets.ImageFolder(root="/content/dataset/train_crop")
dataset_val = datasets.ImageFolder(root="/content/dataset/val_crop")

X_train, y_train = dataset_to_embeddings(dataset_train, mtcnn, facenet)
X_test, y_test = dataset_to_embeddings(dataset_val, mtcnn, facenet)

X_train_class_idx = dataset_train.class_to_idx
X_test_class_idx = dataset_val.class_to_idx

embeddings, labels, class_to_idx = X_train, y_train, X_train_class_idx

/content/dataset/train_crop/ben_afflek/httpcsvkmeuaeccjpg.jpg
/content/dataset/train_crop/ben_afflek/httpimagesfandangocomrImageRendererredesignstaticimgnoxportraitjpgpcpcpcimagesmasterrepositoryperformerimagespjpg.jpg
/content/dataset/train_crop/ben_afflek/httpssmediacacheakpinimgcomxdbbdbbbececacdecdcdfjpg.jpg
/content/dataset/train_crop/ben_afflek/httpssmediacacheakpinimgcomxdfdfadcfeabjpg.jpg
/content/dataset/train_crop/ben_afflek/httpssmediacacheakpinimgcomxedaedabcbefbcbabbjpg.jpg
/content/dataset/train_crop/ben_afflek/httpssmediacacheakpinimgcomxeebdfdbaaajpg.jpg
/content/dataset/train_crop/ben_afflek/httpsuploadwikimediaorgwikipediacommonsthumbddBenAffleckbyGageSkidmorejpgpxBenAffleckbyGageSkidmorejpg.jpg
/content/dataset/train_crop/ben_afflek/httptrwebimgacstanetcxbdddmediasnmediajpg.jpg
/content/dataset/train_crop/ben_afflek/httpwwwaceshowbizcomimagesphotobenaffleckjpg.jpg
/content/dataset/train_crop/ben_afflek/httpwwwallposterscomimagesPostersPFjpg.jpg
/content/dataset/train

In [42]:
!rm -r /content/facenet_emb
!mkdir /content/facenet_emb

rm: cannot remove '/content/facenet_emb': No such file or directory


In [43]:
np.save('/content/facenet_emb/facenet.npy', embeddings)

In [45]:
!pip install autofaiss

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting autofaiss
  Downloading autofaiss-2.15.8-py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.1/70.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fire<0.5.0,>=0.4.0 (from autofaiss)
  Downloading fire-0.4.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.7/87.7 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting embedding-reader<2,>=1.5.1 (from autofaiss)
  Downloading embedding_reader-1.5.1-py3-none-any.whl (18 kB)
Collecting faiss-cpu<2,>=1 (from autofaiss)
  Downloading faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m60.8 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: fire
  Buil

In [46]:
!autofaiss build_index --embeddings="/content/facenet_emb" \
                    --index_path="/content/knn_facenet.index" \
                    --index_infos_path="/content/infos_facenet.json" \
                    --metric_type="ip" \
                    --max_index_query_time_ms=10 \
                    --max_index_memory_usage="4GB"

2023-06-19 08:59:51,070 [INFO]: Using 2 omp threads (processes), consider increasing --nb_cores if you have more
2023-06-19 08:59:51,075 [INFO]: Launching the whole pipeline 06/19/2023, 08:59:51
2023-06-19 08:59:51,075 [INFO]: Reading total number of vectors and dimension 06/19/2023, 08:59:51
  0% 0/1 [00:00<?, ?it/s]100% 1/1 [00:00<00:00, 16644.06it/s]
2023-06-19 08:59:51,118 [INFO]: There are 88 embeddings of dim 512
2023-06-19 08:59:51,118 [INFO]: >>> Finished "Reading total number of vectors and dimension" in 0.0435 secs
2023-06-19 08:59:51,119 [INFO]: 	Compute estimated construction time of the index 06/19/2023, 08:59:51
2023-06-19 08:59:51,119 [INFO]: 		-> Train: 16.7 minutes
2023-06-19 08:59:51,120 [INFO]: 		-> Add: 0.0 seconds
2023-06-19 08:59:51,120 [INFO]: 		Total: 16.7 minutes
2023-06-19 08:59:51,120 [INFO]: 	>>> Finished "Compute estimated construction time of the index" in 0.0003 secs
2023-06-19 08:59:51,120 [INFO]: 	Checking that your have enough memory available to cre

In [52]:
import faiss
import torch
import os
import pandas as pd
from collections import defaultdict

In [49]:
ind = faiss.read_index("/content/knn_facenet.index")

In [50]:
print(labels)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]


In [67]:
preds = []
k = 5
for image_emb in X_test:
    D, I = ind.search(image_emb[np.newaxis, :], k)
    print(D,I)
    i_candidate = defaultdict(int)
    for D_ele,I_ele in zip(D[0],I[0]):
      # if D_ele > 0.7:
        cls = labels[I_ele]
        i_candidate[cls] += 1
    # try:
    key_with_max_value = max(i_candidate, key=lambda k: i_candidate[k])
    # except:
    #   print("error")
    #   key_with_max_value = 0

    preds.append(key_with_max_value)

[[0.63068545 0.6127332  0.5875063  0.57932496 0.51305753]] [[8 9 0 5 1]]
[[0.73820114 0.69468373 0.6858582  0.6767702  0.65058136]] [[ 6  1  9 12  4]]
[[0.8709558  0.61281395 0.6028794  0.57177866 0.5668622 ]] [[ 5  1  8  7 11]]
[[0.6204376  0.61376035 0.56622636 0.53639716 0.510901  ]] [[ 5  9  8 12  7]]
[[0.77670765 0.7425232  0.73400223 0.6831244  0.6380301 ]] [[19 18 14 25 16]]
[[0.6597472  0.62895    0.6266669  0.62017846 0.61965   ]] [[27 26 17 25 18]]
[[0.82673585 0.80561006 0.7178046  0.7118243  0.6581554 ]] [[18 19 25 26 14]]
[[0.7062645  0.6421448  0.60937154 0.60728216 0.5656978 ]] [[25 15 14 19 18]]
[[0.69252634 0.64726985 0.6102964  0.6094415  0.53007907]] [[18 19 14 25 15]]
[[0.76592195 0.7546424  0.7489149  0.7452918  0.6926686 ]] [[32 46 28 45 30]]
[[0.70690143 0.6419416  0.6360741  0.62480783 0.6215017 ]] [[30 42 28 44 40]]
[[0.8042834  0.79666233 0.77691996 0.7667087  0.7624382 ]] [[30 33 40 32 42]]
[[0.80708575 0.7890631  0.7653747  0.73917973 0.72221076]] [[28 31 36

In [55]:
from sklearn import metrics
from sklearn.metrics import accuracy_score

In [63]:
test_class_idx = dataset_val.class_to_idx

idx_to_class = {v: k for k, v in test_class_idx.items()}
print(idx_to_class)

target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(target_names)

{0: 'ben_afflek', 1: 'elton_john', 2: 'jerry_seinfeld', 3: 'madonna', 4: 'mindy_kaling'}
['ben_afflek', 'elton_john', 'jerry_seinfeld', 'madonna', 'mindy_kaling']


In [68]:
target_names = list(map(lambda i: i[1], sorted(idx_to_class.items(), key=lambda i: i[0])))
print(metrics.classification_report(y_test, preds, target_names=target_names))

                precision    recall  f1-score   support

    ben_afflek       1.00      1.00      1.00         4
    elton_john       0.83      1.00      0.91         5
jerry_seinfeld       1.00      1.00      1.00         5
       madonna       1.00      0.80      0.89         5
  mindy_kaling       1.00      1.00      1.00         5

      accuracy                           0.96        24
     macro avg       0.97      0.96      0.96        24
  weighted avg       0.97      0.96      0.96        24

