In [None]:
from utils import TripletImageLoader
from torchvision import datasets, transforms, models
from torch.autograd import Variable
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch
import dlib
import numpy as np

from IPython.display import display
from IPython.display import Image as im
from PIL import Image

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [None]:
class customDataset(torch.utils.data.Dataset):
    def __init__(self, paths, transform=None):
        super(customDataset, self).__init__()
        self.paths = paths
        self.transform = transform
        
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, idx):
        img = Image.open(self.paths[idx])
        if self.transform is not None:
            img = self.transform(img)
        
        return img
    
dataset = customDataset(
    paths=open('aligned_list.txt').read().splitlines(),
    transform=transforms.Compose([
        transforms.Resize(96),
        transforms.CenterCrop(96),
        transforms.ToTensor(),
    ]), 
)
dataloader = torch.utils.data.DataLoader(dataset, shuffle=False, batch_size=1024, num_workers=20)

In [None]:
thumb_iter = iter(dataloader)

In [None]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.features = models.squeezenet1_1().features
        self.embedding = nn.Sequential(
            nn.Linear(2048, 512),
            nn.Dropout(),
            nn.ReLU(),
            nn.Linear(512, 128)
        )

    def forward(self, x):
        x = self.features(x)
        x = nn.functional.adaptive_max_pool2d(x, 2)
        batch_size = x.size(0)
        x = x.view(batch_size, -1)
        return self.embedding(x)

pdist = nn.PairwiseDistance(p=2)

In [None]:
def normalize(x):
    return x / x.norm(2, dim=1, keepdim=True)

In [None]:
net = Net().cuda()
best_checkpoint = torch.load('runs/TripletNet/model_best.pth.tar')
print(best_checkpoint['best_prec1'], 'Epoch', best_checkpoint['epoch'])
net.load_state_dict(best_checkpoint['state_dict'])
net = net.eval()
for p in net.parameters():
    p.requires_grad = False

In [None]:
X = torch.autograd.Variable(next(thumb_iter).cuda(), volatile=True)
embeddings = normalize(net(X))

In [None]:
anchor_embedding = embeddings[0].expand_as(embeddings)
distance = pdist(embeddings, anchor_embedding)

In [None]:
def getThumb(x):
    thumb = x.data.cpu().numpy()
    thumb = np.array(255 * thumb.transpose(1, 2, 0), dtype='uint8')
    return Image.fromarray(thumb)

In [None]:
for x, s in zip(X, distance):
    print(s.data[0], s.data[0]<0.5)
    if s.data[0] < 0.5:
        display(getThumb(x))

In [None]:
cudnn.benchmark = True
torch.cuda.empty_cache()

In [None]:
embs = None

In [None]:
for batch_idx, imgs in enumerate(dataloader, 1):
    #torch.cuda.empty_cache()
    X = Variable(imgs, volatile=True).cuda()
    if embs is None:
        embs = normalize(net(X))
    else:
        embs = torch.cat([embs, normalize(net(X))])
    print('[%5d|%5d]'%(batch_idx, len(dataloader)))
    

In [None]:
len(dataset)

In [None]:
embs

In [None]:
embedding_database = {}
for path, embedding in zip(dataset.paths, embs):
    embedding_database[path] = embedding.cpu()

In [None]:
torch.save(embedding_database, 'EMBEDDING_DATABASE_251961.csv')

In [None]:
from utils import TripletImageLoader
from torchvision import datasets, transforms, models
from torch.autograd import Variable
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch
import dlib
import numpy as np

from IPython.display import display
from IPython.display import Image as im
from PIL import Image

In [None]:
embedding_database = torch.load('EMBEDDING_DATABASE_251961.csv')

In [None]:
embs = torch.stack(list(embedding_database.values()))

In [None]:
embs.size()

In [None]:
anchor_embedding = embs[48666].expand_as(embs)
distance = pdist(embs, anchor_embedding)

In [None]:
for i, d in enumerate(distance):
    if d.data[0] < 0.2:
        print(d.data[0], d.data[0]<0.2)
        display(Image.open(dataset.paths[i]))

In [None]:
anchor_embedding = embs[48637:48645]
pdist = nn.PairwiseDistance(p=2)
distance = pdist(anchor_embedding, anchor_embedding)

In [None]:
for i in range(48637, 48645):
    display(Image.open(dataset.paths[i]))

In [None]:
dmatrix = torch.sum((anchor_embedding[:, None, :] - anchor_embedding[None, :, :]) ** 2, dim=-1)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.imshow(dmatrix.data)
plt.colorbar()

In [None]:
%%timeit
next(iter(loader))

In [None]:
torch.nn.PairwiseDistance()

In [None]:
from utils import TripletImageLoader
from torchvision import datasets, transforms
from torch.autograd import Variable
import torch

In [None]:
dl = torch.utils.data.DataLoader(
    TripletImageLoader(
        'name_photoPaths_database.csv', 
        transform=transforms.Compose([
            transforms.CenterCrop(480),
            transforms.ToTensor(),
        ])),
    batch_size=16, num_workers=4)

In [None]:
%%time
i = 0
for anchor_batch, distant_batch, similar_batch in dl:
    print(anchor_batch.size(), flush=True)
    i += 1
    if i>15: break

In [None]:
import numpy as np
import random

In [None]:
tester = ["asdasd" for _ in range(100000)]

In [None]:
%timeit tester[np.random.choice(len(tester))]

In [None]:
%timeit random.choice(tester)

In [None]:
%%timeit
x = Variable(torch.stack(next(iter(dl))))

In [None]:
%%timeit
x = Variable(torch.stack(next(iter(dl))))

In [None]:
x.shape