# CAE - Sketch simplification

Attempt sketch simplification using CAE

### Libraries

In [1]:
# Pytorch libraries
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn.functional as F

# Torchvision libraries
import torchvision.transforms as TF

# open cv
import cv2

# pandas library
import pandas as pd

# Matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

# os library
import os



## Dataset 

In [2]:
class Sketches(Dataset):
    def __init__(self, annotations_file, img_dir, mode = 'train', transform = None, target_transform = None):
        self.img_labels = annotations_file        # To obtain the labels as the images are annotated in this file
        self.img_dir = img_dir      # TO obtain the directory of the image
        self.transform = transform
        self.target_transform = target_transform
        self.mode = mode
        
    def __len__(self):
        return len(self.img_labels)  # Returns no. of data through the labels
    
    def __getitem__(self, idx):
        # joining the directory of the images + finding the label with the corresponding idx, and adding its directory name
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx,0])
        # image reading on the path given: img_path
        image = cv2.imread(img_path)
        # Label from the img_labels
        label = self.img_labels.iloc[idx, 0]
       
        # Transforming of the images if needed, eg: converting to Tensors
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

## Hyperparameters

In [3]:
# Train dataset directory
train_directory = r'/kaggle/input/realsketches'
print(f'Train dataset directory: {train_directory}')

# Test dataset directory
test_directory = r'/kaggle/input/unclean-sketches'
print(f'Test dataset directory: {test_directory}')

# Batch size
batch_size = 8
print(f'Batch size: {batch_size}')

# Annotations file
imgs = os.listdir(train_directory)
train_frame = pd.DataFrame(imgs)

imgs = os.listdir(test_directory)
test_frame = pd.DataFrame(imgs)

# transform
img_transform = TF.Compose([
            TF.ToTensor(),
            #TF.Resize((256, 256)),
            #TF.RandomResizedCrop(256)
        ])
print(f'Transform : {img_transform}')

# Device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Device: {device}')

# Learning rate
lr = 3e-4
print(f'learning rate: {lr}')

# Weight Decay
wd = 1e-7
print(f'Weight Decay: {wd}')

# Bottleneck size
bottleneck_size = 16
print(f'bottleneck width: {bottleneck_size}')

# number of workers
n_cpu = 2
print(f'No. of workers: {n_cpu}')

# No. of epochs
epochs = 80
print(f'No. of epochs: {epochs}')

Train dataset directory: /kaggle/input/realsketches
Test dataset directory: /kaggle/input/unclean-sketches
Batch size: 8
Transform : Compose(
    ToTensor()
)
Device: cuda
learning rate: 0.0003
Weight Decay: 1e-07
bottleneck width: 16
No. of workers: 2
No. of epochs: 80


In [4]:
test_data = Sketches(
    img_dir = test_directory,
    mode = 'test',
    annotations_file = test_frame,
    transform = img_transform
)

train_data = Sketches(
    img_dir = train_directory,
    mode = 'train',
    annotations_file = train_frame,
    transform = img_transform,
)

## Train and Test Dataloader

In [5]:
train_loader = DataLoader(
    dataset = train_data,
    batch_size = batch_size,
    shuffle = True,
    num_workers = n_cpu
)

## Additional Hyperparameters

In [6]:
# No. of batches:
size = len(train_loader)
print(f'No. of batches: {size}')

# No. of images
count = len(os.listdir(train_directory))
print(f'No. of images: {count}')

for batch, (X, _) in enumerate(train_loader):    
    # Image Size
    img_size = X[0][0].shape
    print(f'image size: {img_size}')

    # Channels
    channels = X[0].shape[0]
    print(f'No. of channels: {channels}')
    break

No. of batches: 79
No. of images: 632
image size: torch.Size([512, 512])
No. of channels: 3


In [7]:
test_loader = DataLoader(
    dataset = test_data,
    batch_size = batch_size,
    num_workers = 1
)

## Model

In [8]:
# The input dimensions can be replaced with the dimensions of the image.
class Net(torch.nn.Module):
    def __init__(self, channels = channels):
        super(Net, self).__init__()
        self.downconv1 = torch.nn.Sequential(
            torch.nn.Conv2d(channels, 48, 5, 2, 2),
            torch.nn.BatchNorm2d(48),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(48, 128, 3, 1, 1),
            torch.nn.BatchNorm2d(128),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(128, 128, 3, 1, 1),
            torch.nn.BatchNorm2d(128),
            torch.nn.ReLU(),
        )
        self.downconv2 = torch.nn.Sequential(
            torch.nn.Conv2d(128,256, 3, 2, 1),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(256, 256, 3, 1, 1),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(256, 256, 3, 1, 1),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
        )
        self.downconv3 = torch.nn.Sequential(
            torch.nn.Conv2d(256, 256, 3, 2, 1),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(256, 512, 3, 1, 1),
            torch.nn.BatchNorm2d(512),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(512, 1024, 3, 1, 1),
            torch.nn.BatchNorm2d(1024),
            torch.nn.ReLU(),
        )
        self.flat = torch.nn.Sequential(
            torch.nn.Conv2d(1024, 1024, 3, 1, 1),
            torch.nn.BatchNorm2d(1024),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(1024, 1024, 3, 1, 1),
            torch.nn.BatchNorm2d(1024),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(1024, 1024, 3, 1, 1),
            torch.nn.BatchNorm2d(1024),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(1024, 512, 3, 1, 1),
            torch.nn.BatchNorm2d(512),
            torch.nn.ReLU(),

            torch.nn.Conv2d(512, 256, 3, 1, 1),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
        )
        
        self.upconv1 = torch.nn.Sequential(
#           torch.nn.Conv2d(256, 256, 4, 0.5, 1),
            torch.nn.ConvTranspose2d(256, 256, 4, 2, 1),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(256, 256, 3, 1, 1),
            torch.nn.BatchNorm2d(256),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(256, 128, 3, 1, 1),
            torch.nn.BatchNorm2d(128),
            torch.nn.ReLU(),
        )
        self.upconv2 = torch.nn.Sequential(
#             torch.nn.Conv2d(128, 128, 4, 0.5, 1),
            torch.nn.ConvTranspose2d(128, 128, 4, 2, 1),            
            torch.nn.BatchNorm2d(128),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(128, 128, 3, 1, 1),
            torch.nn.BatchNorm2d(128),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(128, 48, 3, 1, 1),
            torch.nn.BatchNorm2d(48),
            torch.nn.ReLU(),
        )
        self.upconv3 = torch.nn.Sequential(
#             torch.nn.Conv2d(48, 48, 4, 0.5, 1),
            torch.nn.ConvTranspose2d(48, 48,4, 2, 1),
            torch.nn.BatchNorm2d(48),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(48, 24, 3, 1, 1),
            torch.nn.BatchNorm2d(24),
            torch.nn.ReLU(),
            
            torch.nn.Conv2d(24, channels, 3, 1, 1),
        )


    def forward(self, x):
        conv1_out = self.downconv1(x)
        conv2_out = self.downconv2(conv1_out)
        conv3_out = self.downconv3(conv2_out)
        flat_out = self.flat(conv3_out)
        upconv1_out = self.upconv1(flat_out)
        upconv2_out = self.upconv2(upconv1_out)
        upconv3_out = self.upconv3(upconv2_out)
        return upconv3_out

In [9]:
model = Net()
print(model)

Net(
  (downconv1): Sequential(
    (0): Conv2d(3, 48, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(48, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU()
  )
  (downconv2): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): Conv2d(256, 256, k

In [10]:
# Loss Function
loss_fn = nn.BCEWithLogitsLoss()
print(loss_fn)

# Optimizer
optimizer = torch.optim.Adam(model.parameters(), lr = lr, weight_decay = wd)
print(optimizer)

BCEWithLogitsLoss()
Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: False
    lr: 0.0003
    maximize: False
    weight_decay: 1e-07
)


## Training

In [11]:
def train(dataloader, model, loss_fn, optimizer):
    model.train()
    for batch, (X, _) in enumerate(dataloader):
        # Transforms
        X, model = X.to(device), model.to(device)
        
        # Forwardprop
        X_reconstructed = model(X)
        
        # Flattening
        flatten_X = X.reshape(batch_size, -1)
        flatten_X_reconstructed = X_reconstructed.reshape(batch_size, -1)
                
        # Calculating loss
        loss = loss_fn(X_reconstructed, X)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Steps
        if batch % 4 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{count:>5d}]")

## Prediction

In [12]:
def Prediction(model, dataloader, epoch):
    model.eval()
       
    with torch.no_grad():    
        X, _ = next(iter(dataloader))
        X, model = X.to(device), model.to(device)
        
        X_reconstructed = model(X)
        
        for j in range(1, batch_size):
            img = torch.sigmoid(X_reconstructed[j][0]).to('cpu')
            plt.imsave(f'/kaggle/working/Epoch {epoch} Reconstructed {j} sketch.png', img, cmap = 'gray')
            

## Iteration

In [None]:
for i in range(epochs):
    print(f'Epoch: {i+1} -----------------------')
    train(train_loader, model, loss_fn, optimizer)
    if ((i+1) % 5 ==0):
        Prediction(model, test_loader, i+1)
print('Done!')

Epoch: 1 -----------------------
loss: 0.696778  [    8/  632]
loss: 0.594334  [   40/  632]
loss: 0.534555  [   72/  632]
loss: 0.497206  [  104/  632]
loss: 0.459505  [  136/  632]
loss: 0.424979  [  168/  632]
loss: 0.398229  [  200/  632]
loss: 0.367586  [  232/  632]
loss: 0.344488  [  264/  632]
loss: 0.320195  [  296/  632]
loss: 0.293789  [  328/  632]
loss: 0.261584  [  360/  632]
loss: 0.245224  [  392/  632]
loss: 0.227931  [  424/  632]
loss: 0.204851  [  456/  632]
loss: 0.193476  [  488/  632]
loss: 0.180564  [  520/  632]
loss: 0.170256  [  552/  632]
loss: 0.156621  [  584/  632]
loss: 0.144217  [  616/  632]
Epoch: 2 -----------------------
loss: 0.144435  [    8/  632]
loss: 0.142909  [   40/  632]
loss: 0.123221  [   72/  632]
loss: 0.120119  [  104/  632]
loss: 0.113414  [  136/  632]
loss: 0.128983  [  168/  632]
loss: 0.127224  [  200/  632]
loss: 0.113000  [  232/  632]
loss: 0.102027  [  264/  632]
loss: 0.091174  [  296/  632]
loss: 0.100461  [  328/  632]
loss

In [None]:
Prediction(model, test_loader, 'final')