In [1]:
import os
from os import path
import random
import cv2
from tqdm import tqdm
import pickle

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.image import imread
from sklearn.neighbors import NearestNeighbors
from skimage import util

import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from torch.autograd import Variable

import face_recognition as fr


# https://hackernoon.com/how-to-autoencode-your-pok%C3%A9mon-6b0f5c7b7d97
# https://www.cs.toronto.edu/~lczhang/360/lec/w05/autoencoder.html
# https://www.youtube.com/watch?v=IKOHHItzukk&list=PLZbbT5o_s2xrfNyHZsM6ufI0iZENK9xgG&index=18
# https://discuss.pytorch.org/t/layer-reshape-issue/18938/4
# https://towardsdatascience.com/find-similar-images-using-autoencoders-315f374029ea
# https://towardsdatascience.com/build-a-simple-image-retrieval-system-with-an-autoencoder-673a262b7921

# I need to test how multiple faces works with face_recognition

In [2]:
FOLDER = '/People Pictures/'
IMG_SIZE = 50
CURRENT_DIR = path.abspath(path.curdir)
BATCH_SIZE = 32
NUM_EPOCHS = 25
LR = 0.001


img_transform = transforms.Compose([
#     transforms.ColorJitter(brightness=0.8, contrast=0, saturation=0, hue=0),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [3]:
files = os.listdir(CURRENT_DIR + FOLDER)
images = []


for file in tqdm(files):
    
    image = cv2.imread(os.path.join(CURRENT_DIR + FOLDER,file),0)
    face_locations = fr.face_locations(image)

    try:
        image = image[face_locations[0][0]:face_locations[0][2], face_locations[0][3]:face_locations[0][1]]
        image = cv2.resize(image,(IMG_SIZE,IMG_SIZE))
        images.append(image)

    except:
        pass

print(len(images))

data_set = pd.DataFrame({'image':images}).sample(frac=1).reset_index(drop=True)

train_set = data_set.sample(frac=0.75, random_state=0)
test_set = data_set.drop(train_set.index)

100%|██████████| 13898/13898 [08:55<00:00, 25.95it/s]

10780





In [65]:
def increase_brightness(img, value):

    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)

    if value>0:
        lim = 255 - value
        v[v > lim] = 255
        v[v <= lim] += value
    elif value<0:
        lim = -1*value
        v[v<lim] = 0
        v[v>=lim] -= lim

    final_hsv = cv2.merge((h, s, v))
    img = cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    return img

In [124]:
plt.imshow(images[0],cmap='gray')
plt.show()

In [17]:
# noise = np.random.normal(0, 10, images[0].shape)
image_with_noise = util.random_noise(images[0],var=0.001)
image_with_noise.shape
# image_with_noise = increase_brightness(image_with_noise, np.random.randint(-50,51))
# image_with_noise = torch.flip(torch.tensor(image_with_noise), dims=(1,)) 
# plt.imshow(image_with_noise,cmap='gray')
# plt.show()

(1, 50, 50)

In [8]:
class ImageDataset(Dataset):

    def __init__(self,data,transform=img_transform):
        self.data = data
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self,index):
        if torch.is_tensor(index):
            index = index.tolist()

        item = self.data.iloc[index]
        image = item[0]/255
        
        if self.transform:
            sample = self.transform(image)
        
        return image.reshape(1,IMG_SIZE,IMG_SIZE)
    
dataset = ImageDataset(train_set)

In [9]:
dataloader = DataLoader(
    dataset, 
    batch_size=BATCH_SIZE,
    shuffle=True
    )

In [10]:
sample = next(iter(dataloader))
images = sample
images.shape    

torch.Size([32, 1, 50, 50])

In [11]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        
        self.Conv1 = nn.Conv2d(1,16,3)
        self.Conv2 = nn.Conv2d(16,32,3)
        self.Conv3 = nn.Conv2d(32,64,7)

        
        self.pool = nn.MaxPool2d(2,2,return_indices=True)
        

        self.ConvTrans3 = nn.ConvTranspose2d(64,32,7)
        self.ConvTrans2 = nn.ConvTranspose2d(32,16,3)
        self.ConvTrans1 = nn.ConvTranspose2d(16,1,3)
        
        self.unpool = nn.MaxUnpool2d(2,2)
        self.sig = nn.Sigmoid()
        
        

    def forward(self, x):
        
        x = self.Conv1(x)
        x = F.relu(x)
        x, indices1 = self.pool(x)
        x = self.Conv2(x)
        x = F.relu(x)
        x, indices2 = self.pool(x)
        x = self.Conv3(x)
        
        x = self.ConvTrans3(x)
        x = self.unpool(x,indices2)
        x = F.relu(x)
        x = self.ConvTrans2(x)
        x = self.unpool(x,indices1)
        x = F.relu(x)
        x = self.ConvTrans1(x)
        x = self.sig(x)
        
        return x
    
    def encode(self,x):
        
        x = self.Conv1(x)
        x = F.relu(x)
        x, indices1 = self.pool(x)
        x = self.Conv2(x)
        x = F.relu(x)
        x, indices2 = self.pool(x)
        x = self.Conv3(x)
        
        return x

In [12]:
model = Autoencoder().float()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR,
                             weight_decay=1e-5)

In [13]:
sample = next(iter(dataloader))
images = sample
images = images.float()
print(images.shape)
with torch.no_grad():
    x = model(images)
print(x.shape)

torch.Size([32, 1, 50, 50])
torch.Size([32, 1, 50, 50])


In [79]:
def train(model, num_epochs, criterion, optimizer, dataloader):
    torch.manual_seed(42)

    outputs = []
    for epoch in range(num_epochs):
        for data in dataloader:
 
            img = data
            if np.random.randint(0,2)==0:
                img = torch.flip(img, dims=(1,))              
            noisy_img = util.random_noise(img,var=0.001)
            for img in noisy_img:
#                 img = img * 255
                print(img.squeeze().shape)
                img = img.squeeze()
                img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
#                 img = increase_brightness(img,np.random.randint(-50,51))
#             noisy_img = increase_brightness(noisy_img, np.random.randint(-50,51))
            
            
            recon = model(torch.tensor(noisy_img).float())
            loss = criterion(recon.double(), img.double())
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            break
        break

        print(f'Epoch:{epoch+1}, Loss:{float(loss)}')
        outputs.append((epoch, img, recon),)
    return outputs

In [80]:
test = train(model, NUM_EPOCHS, criterion, optimizer, dataloader)

(50, 50)


error: OpenCV(4.2.0) d:\bld\libopencv_1578930274633\work\modules\imgproc\src\color.simd_helpers.hpp:94: error: (-2:Unspecified error) in function '__cdecl cv::impl::`anonymous-namespace'::CvtHelper<struct cv::impl::`anonymous namespace'::Set<1,-1,-1>,struct cv::impl::A0x8568db86::Set<3,4,-1>,struct cv::impl::A0x8568db86::Set<0,2,5>,2>::CvtHelper(const class cv::_InputArray &,const class cv::_OutputArray &,int)'
> Unsupported depth of input image:
>     'VDepth::contains(depth)'
> where
>     'depth' is 6 (CV_64F)


In [80]:
torch.save(model.state_dict(),CURRENT_DIR + 'FaceModel')

In [81]:
model = Autoencoder().float()
model.load_state_dict(torch.load(CURRENT_DIR + 'FaceModel'))
model.eval()

Autoencoder(
  (Conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1))
  (Conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1))
  (Conv3): Conv2d(32, 64, kernel_size=(7, 7), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (ConvTrans3): ConvTranspose2d(64, 32, kernel_size=(7, 7), stride=(1, 1))
  (ConvTrans2): ConvTranspose2d(32, 16, kernel_size=(3, 3), stride=(1, 1))
  (ConvTrans1): ConvTranspose2d(16, 1, kernel_size=(3, 3), stride=(1, 1))
  (unpool): MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0))
  (sig): Sigmoid()
)

In [38]:
testset = ImageDataset(test_set)
testloader = dataloader = DataLoader(
    testset, 
    batch_size=BATCH_SIZE,
    shuffle=True
    )

In [125]:
sample = next(iter(testloader))
images = sample
grid = torchvision.utils.make_grid(images,nrow=8)
plt.figure(figsize=(50,50))
plt.imshow(grid.numpy().transpose(1,2,0))

In [126]:
with torch.no_grad():
    test_images = model(images.float())
test_grid = torchvision.utils.make_grid(test_images,nrow=8)
plt.figure(figsize=(50,50))
plt.imshow(test_grid.numpy().transpose(1,2,0))

In [41]:
finalset = ImageDataset(data_set)
finalloader = dataloader = DataLoader(
    finalset, 
    batch_size=len(data_set),
    shuffle=True
    )
og_images = next(iter(dataloader))


with torch.no_grad():
    encodings = model.encode(og_images.float())
encodings = encodings.reshape((-1, np.prod(1600)))
encodings = encodings.numpy()
# og_images = og_images.numpy().transpose(3,1,2,0)


In [42]:
encodings.shape

(10780, 1600)

In [43]:
knn = NearestNeighbors(n_neighbors=5, metric="cosine")
knn.fit(np.asarray(encodings))

NearestNeighbors(algorithm='auto', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

In [44]:
distances, indices = knn.kneighbors(encodings[16].reshape(1,-1))

In [127]:
plt.imshow(og_images[16].permute(1,2,0).squeeze(),cmap='gray')

In [47]:
closest_images = og_images[indices]

In [128]:
closest_grid = torchvision.utils.make_grid(closest_images,nrow=5)
plt.figure(figsize=(50,50))
plt.imshow(closest_grid.numpy().transpose(1,2,0).squeeze())

In [129]:
CELEB_FOLDER = '/Celeb Pictures/'
celeb_files = os.listdir(CURRENT_DIR + CELEB_FOLDER)
celeb_images = []
celeb_pokemons = []

for file in tqdm(celeb_files):
    
    celeb_image = cv2.imread(os.path.join(CURRENT_DIR + CELEB_FOLDER,file),0)
    face_locations = fr.face_locations(celeb_image)
    celeb_pokemons.append(file.split('.')[0])

    
    celeb_image = celeb_image[face_locations[0][0]:face_locations[0][2], face_locations[0][3]:face_locations[0][1]]
    celeb_image = cv2.resize(celeb_image,(IMG_SIZE,IMG_SIZE))
    celeb_images.append(celeb_image)

    
celebset = ImageDataset(pd.DataFrame({'image':celeb_images}).sample(frac=1).reset_index(drop=True))
celebloader = DataLoader(
    celebset, 
    batch_size=len(celeb_images),
    shuffle=True
    )
c_images = next(iter(celebloader))
plt.imshow(celeb_images[3],cmap='gray')

In [130]:
new_image = celeb_images[3]
print(new_image.shape)
print(new_image.dtype)
new_image = increase_brightness(new_image,np.random.randint(-100,101))
plt.imshow(new_image,cmap='gray')

In [141]:
celeb_pokemons

['Dugtrio', 'Jinx', 'Rapidash', 'Regirock']

In [92]:
with torch.no_grad():
    encodings = model.encode(c_images.float())
encodings = encodings.reshape((-1, np.prod(1600)))
encodings = encodings.numpy()
encodings.shape

(4, 1600)

In [63]:
og_images.shape

torch.Size([10780, 1, 50, 50])

In [95]:
distances, indices = knn.kneighbors(encodings[2].reshape(1,-1))

In [133]:
plt.imshow(c_images[3].permute(1,2,0).squeeze(),cmap='gray')

In [134]:
distances, indices = knn.kneighbors(encodings[0].reshape(1,-1))
closest_images = og_images[indices]
closest_grid = torchvision.utils.make_grid(closest_images,nrow=5)
plt.figure(figsize=(50,50))
plt.imshow(closest_grid.numpy().transpose(1,2,0).squeeze())

In [123]:
for data in dataloader:
    imgs = data
    
    noisy_imgs = util.random_noise(imgs,var=0.001)
    

    noisy_imgs = noisy_imgs.squeeze()
    noisy_imgs = noisy_imgs * 255
    noisy_imgs = noisy_imgs.astype('uint8')
    for i in range(len(noisy_imgs)):
        noisy_imgs[i] = increase_brightness(noisy_imgs[i],np.random.randint(-50,50))

    noisy_imgs = noisy_imgs.astype('float')
    noisy_imgs = noisy_imgs/255
    print(noisy_imgs[0])

    
    
    break

[[0.27843137 0.41176471 0.30980392 ... 0.17254902 0.43921569 0.4       ]
 [0.31764706 0.27843137 0.25882353 ... 0.25490196 0.3372549  0.30588235]
 [0.18431373 0.28235294 0.30980392 ... 0.18431373 0.31764706 0.34117647]
 ...
 [0.82352941 0.79607843 0.6627451  ... 0.14901961 0.23529412 0.15294118]
 [0.74901961 0.53333333 0.17647059 ... 0.19215686 0.21960784 0.19215686]
 [0.38823529 0.14901961 0.12941176 ... 0.24313725 0.19607843 0.14509804]]
