# Facial image clustering

In [102]:
import torch
import pandas as pd
import matplotlib.pyplot as plt

from pprint import pprint
from glob import glob
from tqdm import tqdm
from PIL import Image

from facenet_pytorch import InceptionResnetV1, MTCNN

from torchvision import transforms, datasets
from torch.utils.data import DataLoader, Dataset

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import DBSCAN

In [103]:
class GlobImageDataset(Dataset):
    def __init__(self, glob_pattern, recursive=False, transform=None):
        self.files = glob(glob_pattern, recursive=recursive)
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        with open(self.files[idx], 'rb') as f:
            img = Image.open(f)
            if self.transform is not None:
                img = self.transform(img)

        return img, self.files[idx]

Load pretrained facial recognision model (InceptionResnetV1) trained on the vggface2 dataset

In [104]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Running on device: {device}')


mtcnn = MTCNN(
    image_size=160, margin=0, min_face_size=20,
    thresholds=[0.6, 0.7, 0.7], factor=0.709, post_process=True,
    device=device)

resnet = InceptionResnetV1(pretrained='vggface2', device=device).eval()

Running on device: cpu


In [105]:
dataset = GlobImageDataset('../../datasets/fr/**/*.jpg',  recursive=True, transform=transforms.ToTensor())
dataloader = DataLoader(dataset)

In [106]:
embeddings = []
for img, path in tqdm(dataloader):
    with torch.no_grad():
        pprint(img.size())
        pprint(img.unsqueeze(0).size())
        img = img.to(device)
        # img_cropped = mtcnn(img)
        img_cropped = img
        img_embedding = resnet(img_cropped.unsqueeze(0))
        embeddings.append(img_embedding)

  0%|          | 0/2680 [00:00<?, ?it/s]torch.Size([1, 3, 512, 512])
torch.Size([1, 1, 3, 512, 512])



RuntimeError: Expected 4-dimensional input for 4-dimensional weight [32, 3, 3, 3], but got 5-dimensional input of size [1, 1, 3, 512, 512] instead

In [None]:
tf_img = lambda i: ToTensor()(i).unsqueeze(0)

list_embs = []
for index, (img, path) in tqdm(enumerate(dataloader)):
    if index >= 30: break

    pprint(path)

    img = tf_img(Image.open(path[0])).to(device)
    # img_cropped = mtcnn(img)
    img_embedding = resnet(img_cropped.unsqueeze(0))
    list_embs.append(img_embedding)

In [None]:
def scatter_thumbnails(data, images, zoom=0.12, colors=None):
    assert len(data) == len(images)

    # reduce embedding dimentions to 2
    x = PCA(n_components=2).fit_transform(data) if len(data[0]) > 2 else data

    # create a scatter plot.
    f = plt.figure(figsize=(22, 15))
    ax = plt.subplot(aspect='equal')
    sc = ax.scatter(x[:,0], x[:,1], s=4)
    _ = ax.axis('off')
    _ = ax.axis('tight')

    # add thumbnails :)
    from matplotlib.offsetbox import OffsetImage, AnnotationBbox
    for i in range(len(images)):
        image = plt.imread(images[i])
        im = OffsetImage(image, zoom=zoom)
        bboxprops = dict(edgecolor=colors[i]) if colors is not None else None
        ab = AnnotationBbox(im, x[i], xycoords='data',
                            frameon=(bboxprops is not None),
                            pad=0.02,
                            bboxprops=bboxprops)
        ax.add_artist(ab)
    return ax

In [None]:
# x = PCA(n_components=50).fit_transform(list_embs)
x = TSNE(perplexity=50, n_components=3).fit_transform(list_embs)

_ = scatter_thumbnails(x, face_files, zoom=0.2)
plt.title('3D t-Distributed Stochastic Neighbor Embedding')
plt.show()

In [None]:
import os
cwd = os.getcwd()
print(cwd)