# Assignment 3

This assignment is on neural rendering and shape processing-computer graphics. We provide you with a dataset of 2D icons and corresponding vector graphics. It stems from a line of work on translating low-resolution icons to visually appealing vector forms and was kindly provided by Sheffer et al. for the purpose of this assignment. Detailed assignment instructions are given in the supplemented PDF file.

# Setting up the environment

In [None]:
# import standard PyTorch modules
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# import plotting utilities
import matplotlib.pyplot as plt

# define constants
import math # needed for math.pi
eps = 0.00001
device = "cuda"

In [None]:
# download dataset from the web (400 MB file from https://www.cs.ubc.ca/~rhodin/20_CPSC_532R_533R/assignments/EgoCap_nth10.hdf5)
dataset_file_name = "ImagerIcon_subset.hdf5"
import os.path
import urllib
if not os.path.exists(dataset_file_name):
    print("Downloading dataset")
    urllib.request.urlretrieve("https://www.cs.ubc.ca/~rhodin/20_CPSC_532R_533R/assignments/"+dataset_file_name, dataset_file_name)
    print("Done downloading")
else:
    print("Dataset already present, nothing to be done")

In [None]:
# loading of icon images and vectors
import h5py
import os

# loading of the icon dataset 
class IconDataset(torch.utils.data.Dataset):
    def __init__(self, data_file):
        super(IconDataset).__init__()
        print("Loading dataset to memory, can take some seconds")
        with h5py.File(data_file, 'r') as hf:
            self.polygon = torch.from_numpy(hf['polygon'][...])
            self.imgs  = torch.from_numpy(hf['img'][...])
        print(".. done loading")
        
    def __len__(self):
        return self.polygon.shape[0]
    
    def __getitem__(self, idx):
        # transpose to bring the point dimension in the first place
        poly = self.polygon[idx].T.clone()
        # negate show icons upright, scale to make networks better behaved
        poly[1,:] *= -1
        sample = {'img': self.imgs[idx].float()/255, # shape 3 x H x W
                  'polygon': poly, # shape 2 x N
                  }
        return sample

In [None]:
# load and split the dataset
icon = IconDataset(dataset_file_name)
print("Number of examples in dataset: {}".format(len(icon)))

val_ratio = 0.05
val_size = int(len(icon)*val_ratio)
indices_val = list(range(0, val_size))
indices_train = list(range(val_size, len(icon)))

val_set = torch.utils.data.Subset(icon, indices_val)
train_set = torch.utils.data.Subset(icon, indices_train)

In [None]:
# display dataset examples
i = 2
img_pil = torchvision.transforms.ToPILImage()(train_set[i]['img'])
plt.imshow(img_pil)
plt.show()
plt.plot(*train_set[i]['polygon'])
plt.show()
num_points = train_set[i]['polygon'].shape[-1]

In [None]:
# a helper function to map between devices (GPU and CPU)
def dict_to_device(dictionary, device):
    for k,v in dictionary.items():
        dictionary[k] = v.to(device)
    return dictionary

# Task I: Neural Rendering

The first task is about generating an image given a vector form

In [None]:
# network architecture skeleton
class IconGenerator(nn.Module):
    def __init__(self, num_points, channels=32, out_channels=3):
        super(IconGenerator, self).__init__()

        # maps the input points of size (batch dim) x 2 x N
        # to a feature map (batch dim) x (#channels) x 2 x 2 
        self.MLP = nn.Sequential(
            nn.Linear(in_features=num_points*2, out_features=channels * 2*2),
            nn.ReLU(True),
        )

        # define a sequence of upsampling, batch norm, ReLu, etc. to map 2x2 features to 32 x 32 images
        self.main = nn.Sequential(
            # input size: (batch dim) x (#channels) x 2 x 2
            # TODO,TASK I: define a sequence of suitable layers. Note, you don't have to use nn.Sequential.
            nn.Sigmoid(),
            # output size: (batch dim) x (#out_channels=3) x 32 x 32
        )
      

    def forward(self, input_dict):
        poly = input_dict['polygon']
        batch_size = poly.shape[0]
        img_init = self.MLP(poly.view([batch_size,-1]))
        img = self.main(img_init.view([batch_size,-1,2,2]))
        return {'img': img}
network_gen = IconGenerator(num_points).cuda()

In [None]:
from IPython import display
losses = []
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 4, shuffle=True)

key = "img"
loss_fn = # TODO loss function
fig=plt.figure(figsize=(20, 5), dpi= 80, facecolor='w', edgecolor='k')
axes=fig.subplots(1,4)
optimizer = optim.Adam(network_gen.parameters(), lr=0.001)
for epoch in range(50):
    iterator = iter(train_loader)
    for i in range(len(train_loader)):
        batch = next(iterator)
        batch_size = batch[key].shape[0]
        dict_to_device(batch, device)

        preds = network_gen(batch)
        
        loss = loss_fn(preds[key], batch[key])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())

    # render the first image in the batch after each epoch
    for ax in axes:
        ax.cla()
    bi = 0 #epoch % batch_size
    
    points_gt = batch['polygon'][bi].cpu()
    axes[0].fill(*points_gt, edgecolor='k', fill=True) # this command closes the loop
    axes[0].plot(*points_gt.cpu(),'.') # this command closes the loop
    axes[0].set_title('Input polygon')

    p_img_pil = torchvision.transforms.ToPILImage()(preds['img'][bi].cpu())
    axes[1].imshow(p_img_pil)
    axes[1].set_title('Rendered image')

    l_img_pil = torchvision.transforms.ToPILImage()(batch['img'][bi].cpu())
    axes[2].imshow(l_img_pil)
    axes[2].set_title('Ground truth image')

    axes[3].plot(losses)
    axes[3].set_yscale('log')
    axes[3].set_title('Training loss')
    axes[3].set_xlabel("Gradient iterations")
    
    display.clear_output(wait=True)
    display.display(plt.gcf())
    print("Plot after epoch {} (iteration {})".format(epoch, len(losses)))
display.clear_output(wait=True)

# Task II: A simple autoencoder, preliminaries

In [None]:
# simple (but inefficient) polygon autoencoder using fully-connected layers
class AE(nn.Module):
    def __init__(self, num_points, bottleneck_width):
        super(AE, self).__init__()
        max_channels = 128
        
        self.fc1a = nn.Linear(2*num_points, max_channels)
        self.fc1c = nn.Linear(max_channels, bottleneck_width)
        
        self.fc2a = nn.Linear(bottleneck_width, max_channels)
        self.fc2c = nn.Linear(max_channels, 2*num_points)

    def encode(self, dictionary):
        x = dictionary['polygon']
        batch_size = x.shape[0]
        x = x.view(batch_size, -1)
        h1 = nn.ReLU()(self.fc1a(x))
        return self.fc1c(h1)
    
    def decode(self, z):
        batch_size = z.shape[0]
        h2 = nn.ReLU()(self.fc2a(z))
        h2 = self.fc2c(h2)
        
        y_NCW = h2.view([batch_size,2,-1])
        return {'polygon': y_NCW}

    def forward(self, dictionary):
        z = self.encode(dictionary)        
        poly_dict = self.decode(z)
        return poly_dict

In [None]:
net_simple = AE(num_points=96, bottleneck_width=10).cuda()
num_training_epochs = 350

In [None]:
from IPython import display
losses = []
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 8, shuffle=True, drop_last=False)
print(len(train_loader))
key = "polygon"
loss_fn = torch.nn.MSELoss()
fig=plt.figure(figsize=(20, 5), dpi= 80, facecolor='w', edgecolor='k')
axes=fig.subplots(1,4)
optimizer = optim.Adam(net_simple.parameters(), lr=0.001)
for epoch in range(num_training_epochs):
    iterator = iter(train_loader)
    for i in range(len(train_loader)):
        batch = next(iterator)
        batch_size = batch[key].shape[0]
        dict_to_device(batch, device)
        preds = net_simple(batch)
        
        loss = loss_fn(preds[key], batch[key])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())


    if epoch % 10 == 0:
        bi = 0 #epoch % batch_size
        for ax in axes:
            ax.cla()
        points_gt = batch['polygon'][bi].cpu()
        axes[0].plot(*points_gt.cpu(),'.') # this command closes the loop
        axes[0].plot(*points_gt.cpu()[:,0],'.',ms=10,color='red') # mark the first vertext to identify issue
        axes[0].set_title('Input polygon')

        axes[1].plot(*preds['polygon'][bi].detach().cpu()[:,0],'.',ms=10,color='red') # mark the first vertext to identify issue
        axes[1].fill(*preds['polygon'][bi].detach().cpu(), edgecolor='k', fill=True) # this command closes the loop
        axes[1].set_title('Output polygon')
        
        axes[2].fill(*points_gt.cpu(), edgecolor='gray', fill=False) # this command closes the loop
        axes[2].plot(*preds['polygon'][bi].detach().cpu(),".")
        axes[2].set_title('Output pointcloud (GT in gray)')
        axes[3].plot(losses)
        axes[3].set_yscale('log')
        axes[3].set_xlabel("Gradient iterations")
        axes[3].set_title('Training loss')    
        display.clear_output(wait=True)
        display.display(plt.gcf())
        print("Plot after epoch {} (iteration {})".format(epoch, len(losses)))
display.display(plt.gcf())

# Task III: A simple autoencoder

In [None]:
# two-sided loss on the distance to the nearest neighbor
def chamfer_distance(pred, label):
    batch_size = label.shape[0]
    num_points = label.shape[-1]
    pred_exp  = pred.view( [batch_size,2,1,-1])
    label_exp = label.view([batch_size,2,-1,1])
    diff_sq = (pred_exp-label_exp)**2
    diff    = torch.sum(diff_sq, dim=1)
    min_label, min_label_i = torch.min(diff, dim=-2)
    min_pred,  min_pred_i  = torch.min(diff, dim=-1)
    return torch.mean(min_label) + torch.mean(min_pred)

# functions to roll a tensor along dimension 1 and 2 by n places
def roll_1(x, n=1):
    return torch.cat((x[:,-n:], x[:,:-n]),dim=1)
def roll_2(x, n=1):
    return torch.cat((x[:,:,-n:], x[:,:,:-n]),dim=2)

# a function that takes two polygons as input and returns the minimum MSE over all possible starting point rotations
def roll_invariant_MSE(pred, label):
    min_MSE = 999999
    # TASK III
    return min_MSE

In [None]:
# train this new network, net_simple2, with roll_invariant_MSE to be able to compare results to the MSE training
net_simple2 = AE(num_points=96, bottleneck_width=10).cuda()
num_training_epochs = 350

In [None]:
from IPython import display
losses = []
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 8, shuffle=True, drop_last=False)

key = "polygon"
loss_fn = chamfer_distance
# loss_fn = roll_invariant_MSE # TODO: use your new loss once 
fig=plt.figure(figsize=(20, 5), dpi= 80, facecolor='w', edgecolor='k')
axes=fig.subplots(1,4)
optimizer = optim.Adam(net_simple2.parameters(), lr=0.001)
for epoch in range(num_training_epochs):
    iterator = iter(train_loader)
    for i in range(len(train_loader)):
        batch = next(iterator)
        batch_size = batch[key].shape[0]
        dict_to_device(batch, device)
        preds = net_simple2(batch)
        
        loss = loss_fn(preds[key], batch[key])# + 0.1*angle_prior(preds[key])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())


    if epoch % 20 == 0:
        for ax in axes:
            ax.cla()
        bi = 0
        points_gt = batch['polygon'][bi].cpu()
        axes[0].plot(*points_gt.cpu(),'.') # this command closes the loop
        axes[0].plot(*points_gt.cpu()[:,0],'.',ms=10,color='red') # mark the first vertext to identify issue
        axes[0].set_title('Input polygon')

        axes[1].plot(*preds['polygon'][bi].detach().cpu()[:,0],'.',ms=10,color='red') # mark the first vertext to identify issue
        axes[1].fill(*preds['polygon'][bi].detach().cpu(), edgecolor='k', fill=True) # this command closes the loop
        axes[1].set_title('Output polygon')
        
        axes[2].fill(*points_gt.cpu(), edgecolor='gray', fill=False) # this command closes the loop
        axes[2].plot(*preds['polygon'][bi].detach().cpu(),".")
        axes[2].set_title('Output pointcloud (GT in gray)')
        axes[3].plot(losses)
        axes[3].set_yscale('log')
        axes[3].set_xlabel("Gradient iterations")
        axes[3].set_title('Training loss')    
        display.clear_output(wait=True)
        display.display(plt.gcf())
        print("Plot after epoch {} (iteration {})".format(epoch, len(losses)))
display.display(plt.gcf())

# Task III: Improving the NN architecture

In [None]:
# an improved autoencoder that uses *no* fully-connected layer
# padding_mode='circular' be careful using it
class PolygonAE(nn.Module):
    def __init__(self, num_points, bottleneck_width):
        super(PolygonAE, self).__init__()
        
        # TODO, TASK III: Avoid any fully-connected layer in the encoder

        # It is OK to maintain the following decoder
        self.fc2a = nn.Linear(bottleneck_width, channels_decoder)
        self.fc2c = nn.Linear(channels_decoder, 2*num_points)

    def encode(self, dictionary):
        x_NCW = dictionary['polygon']
        
        # TODO
        
        return x_NCW

    def decode(self, z):
        batch_size = z.shape[0]
        h2 = nn.ReLU()(self.fc2a(z))
        h2 = self.fc2c(h2)    
        y_NCW = h2.view([batch_size,2,-1])
        return {'polygon': y_NCW}

    def forward(self, dictionary):
        z = self.encode(dictionary)        
        out_dict = self.decode(z)
        return out_dict


In [None]:
# train a new network to be able to compare results to the initial training
#net_graph = AE(num_points=96, bottleneck_width=10).cuda() # TODO: try this one first by commenting PolygonAE.
net_graph = PolygonAE(num_points=96, bottleneck_width=10).cuda() # TODO: uncomment this to replace the simlpe AR with your dedicated one
num_training_epochs = 2000

In [None]:
from IPython import display
losses = []
train_loader = torch.utils.data.DataLoader(train_set, batch_size = 16, shuffle=True, drop_last=True)

def augment_polygon(poly):
    # shift starting point
    num_points = poly.shape[-1]
    random_number = torch.LongTensor(1).random_(0, num_points).item()
    poly = roll_2(poly,n=random_number)
    return poly

key = "polygon"
loss_fn = roll_invariant_MSE
fig=plt.figure(figsize=(20, 5), dpi= 80, facecolor='w', edgecolor='k')
axes=fig.subplots(1,4)
optimizer = optim.Adam(net_graph.parameters(), lr=0.001)
for epoch in range(num_training_epochs):
    iterator = iter(train_loader)
    for i in range(len(train_loader)):
        batch = next(iterator)
        batch['polygon'] = augment_polygon(batch['polygon'])
        
        batch_size = batch[key].shape[0]
        dict_to_device(batch, device)
        preds = net_graph(batch)
        
        loss = loss_fn(preds[key], batch[key])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())

    if epoch % 20 == 0:
        bi = 0
        for ax in axes:
            ax.cla()

        points_gt = batch['polygon'][bi].cpu()
        axes[0].plot(*points_gt.cpu(),'.') # this command closes the loop
        axes[0].plot(*points_gt.cpu()[:,0],'.',ms=10,color='red') # mark the first vertext to identify issue
        axes[0].set_title('Input polygon')

        axes[1].plot(*preds['polygon'][bi].detach().cpu()[:,0],'.',ms=10,color='red') # mark the first vertext to identify issue
        axes[1].fill(*preds['polygon'][bi].detach().cpu(), edgecolor='k', fill=True) # this command closes the loop
        axes[1].set_title('Output polygon')
        
        axes[2].fill(*points_gt.cpu(), edgecolor='gray', fill=False) # this command closes the loop
        axes[2].plot(*preds['polygon'][bi].detach().cpu(),".")
        axes[2].set_title('Output pointcloud (GT in gray)')
        axes[3].plot(losses)
        axes[3].set_yscale('log')
        axes[3].set_xlabel("Gradient iterations")
        axes[3].set_title('Training loss') 
        display.clear_output(wait=True)
        display.display(plt.gcf())
        print("Plot after epoch {} (iteration {})".format(epoch, len(losses)))
display.display(plt.gcf())

# Task IV: Shape space interpolation

In [None]:
# TODO implement task IV

TODO: Explain your findings here.