In [None]:
from utils import TripletImageLoader
from torchvision import datasets, transforms, models
from torch.autograd import Variable
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch
import dlib
import numpy as np
from utils import prepareOpenFace
from utils import send_query, send_large_query

import time
import datetime
from IPython.display import display
from IPython.display import Image as im
from PIL import Image

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
cudnn.benchmark = True

## Upload filenames to the database

In [None]:
def mkstamp(date):
    parts = date.split('.')
    if len(parts) == 4:
        timestamp = int(parts[-2]) * 1000  + int(parts[-1][:3])

    else:
        dt = datetime.datetime.strptime(date, "%Y-%m-%d.%H-%M-%S.%f")
        timestamp = time.mktime(dt.timetuple()) + (dt.microsecond / 1000000.0)
    
        timestamp *= 1000
    
    return int(timestamp)

def get_ts(path):
    img_name = os.path.basename(path)
    ts = mkstamp(img_name[img_name.find('2018-'):-5])
    return ts

batch_size = 4500
paths = open('aligned.txt').read().splitlines()
for idx in range(0, len(paths), batch_size):
    FULL_QUERY = ''
    for path in paths[idx:idx+batch_size]:
        ts = get_ts(path)
        SINGLE_SQL_QUERY_STRING =\
        'INSERT INTO ALIGNED(PATH, TIMESTAMP) VALUES("%s", %d); '\
        %(path, ts)
        FULL_QUERY += SINGLE_SQL_QUERY_STRING
    send_query(FULL_QUERY, verbose=False)
    print('[%5d : %5d]'%(min(idx+batch_size, len(paths)), len(paths)))


## Download complete, sorted path database

In [None]:
query_result = send_large_query('SELECT path FROM aligned ORDER BY aligned_ID', 
                                batch_size=50000, verbose=False)
database_paths = [q['path'] for q in query_result]

## Define a batch image loader that _FETCHES_ the pre-aligned faces

In [None]:
class customDataset(torch.utils.data.Dataset):
    def __init__(self, paths, transform=None):
        super(customDataset, self).__init__()
        self.paths = paths
        self.transform = transform
        
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, idx):
        img = Image.open(self.paths[idx])
        if self.transform is not None:
            img = self.transform(img)
        
        return img
    
dataset = customDataset(
    paths=database_paths,
    transform=transforms.Compose([
        transforms.Resize(96),
        transforms.CenterCrop(96),
        transforms.ToTensor(),
    ]), 
)
dataloader = torch.utils.data.DataLoader(dataset, shuffle=False, batch_size=1024, num_workers=10)

## Load the face-embedding network
### make sure that it will be optimized for inference

In [None]:
net = prepareOpenFace()
net.load_state_dict(torch.load('weights/openface.pth'))
net = net.eval()
net.cuda()
for p in net.parameters():
    p.requires_grad = False
cudnn.benchmark = True

In [None]:
embs = None
for batch_idx, imgs in enumerate(dataloader, 1):
    #torch.cuda.empty_cache()
    X = Variable(imgs, volatile=True, requires_grad=False).cuda()
    if embs is None:
        embs = net(X)[0]
    else:
        embs = torch.cat([embs, net(X)[0]])
    print('[%5d|%5d]'%(batch_idx, len(dataloader)))
    

## Save the embeddings database

In [None]:
embedding_database = {
    'paths': dataset.paths,
    'embeddings': embs.cpu()
}

torch.save(embedding_database, 'ALIGNED_EMBEDDING_DATABASE.pth')

In [None]:
X = torch.autograd.Variable(next(thumb_iter).cuda(), volatile=True, requires_grad=False)
embeddings_128, embeddings_736 = net(X)

In [None]:
embeddings = embeddings_128

In [None]:
def getThumb(x):
    thumb = x.data.cpu().numpy()
    thumb = np.array(255 * thumb.transpose(1, 2, 0), dtype='uint8')
    return Image.fromarray(thumb)

In [None]:
def plotwithMargin(anchor_idx, margin=0.1, only_correct=True):
    anchor_embedding = embeddings[anchor_idx].expand_as(embeddings)
    distance = ((embeddings-anchor_embedding)**2).mean(-1)
    for i, (x, d) in enumerate(zip(X, distance)):
        print(i, d.data[0], d.data[0] < margin)
        if only_correct and d.data[0] < margin:
            display(getThumb(x))

In [None]:
embs.data - torch.ones(1, 128).cuda()

In [None]:
plotwithMargin(250, 0.01)

In [None]:
def plotKclosest(anchor_idx, k):
    anchor_embedding = embeddings[anchor_idx].expand_as(embeddings)
    distance = ((embeddings-anchor_embedding)**2).mean(-1)
    idxs = torch.sort(distance)[1][:k]
    for i in range(k):
        print(i, idxs[i].data[0], distance[idxs[i]].data[0])
        display(getThumb(X[idxs[i].data[0]]))

In [None]:
plotKclosest(117000, 100)

In [None]:
cudnn.benchmark = True
torch.cuda.empty_cache()

In [None]:
embs = None

In [None]:
from utils import TripletImageLoader
from torchvision import datasets, transforms, models
from torch.autograd import Variable
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch
import dlib
import numpy as np

from IPython.display import display
from IPython.display import Image as im
from PIL import Image

In [None]:
embedding_database = torch.load('ALIGNED_EMBEDDING_DATABASE.pth')

In [None]:
embs = embedding_database['embeddings']
paths = embedding_database['paths']

In [None]:
def plotKclosest(anchor_idx, k):
    anchor_embedding = embs[anchor_idx].expand_as(embs)
    distance = ((embs-anchor_embedding)**2).mean(-1)
    idxs = torch.sort(distance)[1][:k]
    for i in range(k):
        print(i, idxs[i].data[0], distance[idxs[i]].data[0])
        
        display(Image.open(dataset.paths[idxs[i].data[0]]))

In [None]:
dataset.paths.index('/home/csbotos/video/stable_synced/stable_recordings/camdir-0/2018-04-10/2018-04-10.16/2018-04-10.16-15/aligned/hasface-2018-04-10.16-15-54.000441.jpg')

In [None]:
from utils import send_query, send_large_query

query_params = {
    'minusoffset': 2000,
    'plusoffset': 2000,
}

SQL_QUERY = '''
    SELECT aligned_ID, path, name FROM aligned JOIN (
        SELECT name, timestamp-{minusoffset} as start, timestamp+{plusoffset} as end 
        FROM Mandacsko_log WHERE gate = "Forgóvilla jobb (kintről) BE") 
    ON aligned.timestamp BETWEEN start AND end;
'''
SQL_QUERY = SQL_QUERY.format(**query_params)

aligned_ID_name_path = send_query(SQL_QUERY)


In [None]:
# There could be cases when a path is assigned to multiple users
aligned_ID_names = {}
path_aligned_ID = {}
for q in aligned_ID_name_path:
    if aligned_ID_names.get(q['aligned_ID']) is None:
        aligned_ID_names[int(q['aligned_ID'])] = [q['name']]
    else:
        aligned_ID_names[int(q['aligned_ID'])].append(q['name'])
        
    path_aligned_ID[q['path']] = int(q['aligned_ID'])

In [None]:
aligned_ID_names

In [None]:
dataloader.dataset.paths = list(path_aligned_ID.keys())

In [None]:
embs = None

In [None]:
for batch_idx, imgs in enumerate(dataloader, 1):
    #torch.cuda.empty_cache()
    X = Variable(imgs, volatile=True, requires_grad=False).cuda()
    if embs is None:
        embs = net(X)[0]
    else:
        embs = torch.cat([embs, net(X)[0]])
    print('[%5d|%5d]'%(batch_idx, len(dataloader)))
    

In [None]:
names = []
for embedded_ID, path in enumerate(dataloader.dataset.paths):
    if path_aligned_ID.get(path) is not None:
        names.append(aligned_ID_names[path_aligned_ID[path]])
    else:
        print('FUUUUCK')

In [None]:
# The i_th embedding in the N x 128 array corresponds to the i_th array of the [first] registered name
names = [aligned_ID_names[aligned_ID][0] for aligned_ID in path_aligned_ID.values()]    

In [None]:
names

In [None]:
registered_embeddings = {
    'names': names,
    'embeddings': embs
}

In [None]:
torch.save(registered_embeddings, 'registered_embeddings.tar')

In [None]:
len(embs)

In [None]:
len([x for x in aligned_ID_names.values() if len(x) > 1])

In [None]:
len(embedded_ID_names), len(dataloader.dataset.paths)

In [None]:
embedded_ID_names[117000]

In [None]:
embedded_ID_names

In [None]:
name_vector_db = {
    'embedded_ID_aligned_ID': embedded_ID_aligned_ID,
    'aligned'
}

In [None]:
anchor_embedding = embs[48666].expand_as(embs)
distance = pdist(embs, anchor_embedding)

In [None]:
for i, d in enumerate(distance):
    if d.data[0] < 0.2:
        print(d.data[0], d.data[0]<0.2)
        display(Image.open(dataset.paths[i]))

In [None]:
anchor_embedding = embs[48637:48645]
pdist = nn.PairwiseDistance(p=2)
distance = pdist(anchor_embedding, anchor_embedding)

In [None]:
for i in range(48637, 48645):
    display(Image.open(dataset.paths[i]))

In [None]:
dmatrix = torch.sum((anchor_embedding[:, None, :] - anchor_embedding[None, :, :]) ** 2, dim=-1)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.imshow(dmatrix.data)
plt.colorbar()

In [None]:
%%timeit
next(iter(loader))

In [None]:
torch.nn.PairwiseDistance()

In [None]:
from utils import TripletImageLoader
from torchvision import datasets, transforms
from torch.autograd import Variable
import torch

In [None]:
dl = torch.utils.data.DataLoader(
    TripletImageLoader(
        'name_photoPaths_database.csv', 
        transform=transforms.Compose([
            transforms.CenterCrop(480),
            transforms.ToTensor(),
        ])),
    batch_size=16, num_workers=4)

In [None]:
%%time
i = 0
for anchor_batch, distant_batch, similar_batch in dl:
    print(anchor_batch.size(), flush=True)
    i += 1
    if i>15: break

In [None]:
import numpy as np
import random

In [None]:
tester = ["asdasd" for _ in range(100000)]

In [None]:
%timeit tester[np.random.choice(len(tester))]

In [None]:
%timeit random.choice(tester)

In [None]:
%%timeit
x = Variable(torch.stack(next(iter(dl))))

In [None]:
%%timeit
x = Variable(torch.stack(next(iter(dl))))

In [None]:
x.shape