In [1]:
%cd ..

/home/cole/Documents/dl-final-project


In [2]:
import os
import random

import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams['figure.figsize'] = [5, 5]
matplotlib.rcParams['figure.dpi'] = 200

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision

from helpers.data_helper import UnlabeledDataset, LabeledDataset
from helpers.helper import collate_fn, draw_box

In [3]:
random.seed(0)
np.random.seed(0)
torch.manual_seed(0);

In [4]:
image_folder = '../data'
annotation_csv = '../data/annotation.csv'

In [5]:
# You shouldn't change the unlabeled_scene_index
# The first 106 scenes are unlabeled
unlabeled_scene_index = np.arange(106)
# The scenes from 106 - 133 are labeled
# You should devide the labeled_scene_index into two subsets (training and validation)
labeled_scene_index = np.arange(106, 134)

In [6]:
transform = torchvision.transforms.ToTensor()

unlabeled_trainset = UnlabeledDataset(image_folder=image_folder, 
                                      scene_index=unlabeled_scene_index, 
                                      first_dim='image',  #'sample' 
                                      transform=transform)

unlabeled_trainloader = torch.utils.data.DataLoader(unlabeled_trainset, 
                                                    batch_size=100, 
                                                    shuffle=True, 
                                                    num_workers=2)

In [7]:
# The labeled dataset can only be retrieved by sample.
# And all the returned data are tuple of tensors, since bounding boxes may have different size
# You can choose whether the loader returns the extra_info. It is optional. You don't have to use it.
labeled_trainset = LabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=labeled_scene_index,
                                  transform=transform,
                                  extra_info=True
                                 )
labeled_trainloader = torch.utils.data.DataLoader(labeled_trainset, 
                                                  batch_size=2, 
                                                  shuffle=True, 
                                                  num_workers=2, 
                                                  collate_fn=collate_fn)

In [8]:
# sample, target, road_image, extra = iter(labeled_trainloader).next()
# print(torch.stack(sample).shape)
image, camera_index = iter(unlabeled_trainloader).next()
print(image.shape)

torch.Size([100, 3, 256, 306])


# Model

In [10]:
device = torch.device('cpu') 
device

device(type='cpu')

### Hyperparameters

In [10]:
from model.vae import VAE, vae_loss_function

epochs = 50
hidden_size = 1024
latent_size = 512

model = VAE(h_dim=hidden_size, z_dim=latent_size).to(device)
criterion = vae_loss_function
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [None]:
model.train()

for epoch in range(epochs):
    loss,bce,kld = 0,0,0
    
    max_batches = len(unlabeled_trainloader)
    for idx, (images, camera_index) in enumerate(unlabeled_trainloader):
        images = images.to(device)
        recon_images, mu, logvar = model(images)
        
        loss, bce, kld = criterion(recon_images, images, mu, logvar)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if idx % 1000 == 0:
            print('[', epoch, '|', idx ,'/', max_batches, ']', 
                  'loss:', loss.item(), 'bce:', bce.item(), 'kld:', kld.item())

    torch.save(model.state_dict(), 'vae-epoch-' + str(epoch) + '.torch')

[ 0 | 0 / 212 ] loss: 16860848.0 bce: 16831960.0 kld: 28887.875
