In [1]:
import torch
from pathlib import Path
import numpy as np
import cv2

class CubiCasa(torch.utils.data.Dataset):
	def __init__(self, data_root, transform=None):
		super(CubiCasa, self).__init__()
		self.data = [str(Path(data_root, f.name)) for f in Path(data_root).iterdir()]
		self.transform = transform

	def __getitem__(self, idx):
		img = cv2.imread(self.data[idx], 0)

		if self.transform:
			img = self.transform(img)
			
		return img, 0
	
	def __len__(self):
		return len(self.data)

In [2]:
__author__ = 'SherlockLiao'

import os
import datetime
import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.datasets import MNIST
from torchvision.utils import save_image

# if not os.path.exists('./mlp_img'):
#     os.mkdir('./mlp_img')

img_size = 256

def to_img(x):
    x = 0.5 * (x + 1)
    x = x.clamp(0, 1)
    x = x.view(x.size(0), 1, img_size, img_size)
    return x


num_epochs = 150
batch_size = 128
learning_rate = 1e-3

img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])

dataset = CubiCasa('data/transformed', transform=img_transform)
# dataset = MNIST('./data', transform=img_transform, download=True)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(1, 16, 3, stride=3, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2),  # b, 16, 5, 5
            nn.Conv2d(16, 8, 3, stride=2, padding=1),  # b, 8, 3, 3
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=1)  # b, 8, 2, 2
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(8, 16, 3, stride=2),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose2d(16, 8, 5, stride=3, padding=1),  # b, 8, 15, 15
            nn.ReLU(True),
            nn.ConvTranspose2d(8, 1, 2, stride=2, padding=1),  # b, 1, 28, 28
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

time = datetime.datetime.now().strftime("%Y%m%d_%H:%M%S")
out = f"{Path.cwd()}/trained/{time}"
os.mkdir(out)

!echo $out

model = autoencoder().cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,
                             weight_decay=1e-5)
for epoch in range(num_epochs):
    total_loss = 0
    for data in dataloader:
        img, _ = data
        img = Variable(img).cuda()
        # ===================forward=====================
        output = model(img)
        loss = criterion(output, img)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # ===================log========================
        total_loss += loss.data
    print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch+1, num_epochs, total_loss / len(dataloader)))
    if epoch % 10 == 0:
        pic = to_img(output.cpu().data)
        save_image(pic, f'{out}/image_{epoch}.png')

!cp playground.ipynb $out
torch.save(model.state_dict(), f'{out}/sim_autoencoder.pth')

/home/duonghaidang/Developer/CubiCasa5k/trained/20211016_17:0852
epoch [1/150], loss:0.3029
epoch [2/150], loss:0.1196


KeyboardInterrupt: 

In [None]:
from sklearn.neighbors import NearestNeighbors

X = np.vstack([d.numpy() for d, _ in dataloader])
X = torch.from_numpy(X).cuda()
with torch.no_grad():
	model.eval()
	enc = model.encoder(X).cpu().reshape(5168, -1)
	# print(enc.shape)
	knn = NearestNeighbors()
	knn.fit(enc)

In [None]:
knn.kneighbors(enc[1].cpu().reshape(1, -1), 10)

In [None]:
import matplotlib.pyplot as plt


plt.imshow(X[2247].cpu().squeeze(), cmap = 'gray')

In [None]:
plt.imshow(X[1].cpu().squeeze(), cmap = 'gray')

In [None]:
plt.imshow(X[1614].cpu().squeeze(), cmap = 'gray')