In [1]:
import torch
import torch.nn as nn
from torch.nn import init
import functools
from torch.optim import lr_scheduler

if torch.cuda.is_available():
    # Set Device
    torch.cuda.set_device(1)
    print("Cuda enabled on device: {}".format(torch.cuda.current_device()))



Cuda enabled on device: 1


In [2]:
from depthnet.model import DepthNet
from collections import OrderedDict


class DepthNetWithHints(nn.Module):
    def __init__(self, depthnet, hist_len, num_hints_layers):
        """Takes an existing DepthNet, along with the size of the histogram and the size of its bins"""
        super(DepthNetWithHints, self).__init__()
        self.input_nc = depthnet.input_nc
        self.output_nc = depthnet.output_nc
        self.depthnet = depthnet
        self.hist_len = hist_len
        self.num_hints_layers = num_hints_layers
        
        # Create hints network
        assert num_hints_layers > 0
        # Extract number of out channels of conv4
        hints_output = depthnet.model4[0].out_channels
        hints = OrderedDict([("hints_conv_0", nn.Conv2d(self.hist_len, hints_output, kernel_size=1))])
        hints.update({"hints_relu0_1": nn.ReLU(True)})
        j = 2
        for _ in range(num_hints_layers-1):
            hints.update({"hints_conv_{}".format(j): nn.Conv2d(hints_output, hints_output, kernel_size=1)})
            j += 1
            hints.update({"hints_relu_{}".format(j): nn.ReLU(True)})
            j += 1
        
        self.global_hints = nn.Sequential(hints)
        
    def forward(self, input_A, hist):
        # |hist| should be a (1, hist_len, 1, 1) tensor
        # First 4 layers of regular depthnet
        conv1_2 = self.depthnet.model1(input_A)
        conv2_2 = self.depthnet.model2(conv1_2[:,:,::2,::2]) # downsample
        conv3_3 = self.depthnet.model3(conv2_2[:,:,::2,::2]) # downsample
        conv4_3 = self.depthnet.model4(conv3_3[:,:,::2,::2]) # downsample
        
        # Global hints network
        hints_out = self.global_hints(hist)
        # Replicate and add to output of conv4 (broadcasting takes care of this)
        conv5_3 = self.depthnet.model5(conv4_3 + hints_out)
        
        # Finish doing the rest of the depthnet
        conv6_3 = self.depthnet.model6(conv5_3)
        conv7_3 = self.depthnet.model7(conv6_3)
        conv8_up = self.depthnet.model8up(conv7_3) + self.depthnet.model3short8(conv3_3) # Shortcut
        conv8_3 = self.depthnet.model8(conv8_up)
        conv9_up = self.depthnet.model9up(conv8_3) + self.depthnet.model2short9(conv2_2) # Shortcut
        conv9_3 = self.depthnet.model9(conv9_up)
        conv10_up = self.depthnet.model10up(conv9_3) + self.depthnet.model1short10(conv1_2) # Shortcut
        conv10_2 = self.depthnet.model10(conv10_up)
        out_reg = self.depthnet.model_out(conv10_2)
        return out_reg
        

In [3]:
import torchvision.transforms as transforms
import numpy as np
from PIL import Image
import os
from depthnet.dataset import DepthDataset, ToFloat, CenterCrop, RandomCrop, AddDepthHist, ToTensor
    
    
# TODO: Do this the right way:

def get_global_stats(self, outFile=None, writeFile=False):
    """Calculate mean and variance of each rgb channel.

    Optionally caches the result of this calculation in outfile so it doesn't need to be done each
    time the dataset is loaded.
    """
    S = np.zeros(3)
    S_sq = np.zeros(3)
    npixels = 0.
    for depthFile, rgbFile in self.data:
        rgbImg = Image.open(os.path.join(self.dataDir, rgbFile))
        rgbImg = np.asarray(rgbImg, dtype=np.uint16)
#             print(rgbImg[0:10, 0:10, :])

        npixels += rgbImg.shape[0]*rgbImg.shape[1]
        for channel in range(rgbImg.shape[2]):
            S[channel] += np.sum(rgbImg[:,:,channel])
            S_sq[channel] += np.sum((rgbImg[:,:,channel])**2)
    mean = S/npixels
    var = S_sq/npixels - mean**2

    # Load full dataset (memory-intensive)
#         full = []
#         for depthFile, rgbFile in self.data:
#             rgbImg = Image.open(os.path.join(self.dataDir, rgbFile))
#             rgbImg = np.asarray(rgbImg, dtype=np.uint16)
#             full.append(rgbImg)

#         a = np.array(full)
#         mean_true = np.mean(a, axis=(0, 1, 2))
#         var_true = np.var(a, axis=(0, 1, 2))
#         print("actual mean and variance: {} {}".format(mean_true, var_true))
#         print(a.shape)
    return mean, var

DepthDataset.get_global_stats = get_global_stats
class NormalizeRGB(object):
    def __init__(self, mean, var):
        """
        mean - np.array of size 3 - the means of the three color channels over the whole (training) dataset
        var - np.array of size 3 - the variances of the three color channels over the whole (training) dataset
        """
        self.mean = mean
        self.var = var
    def __call__(self, sample):
        sample["rgb"] -= self.mean
        sample["rgb"] /= np.sqrt(self.var)
#         print(sample["rgb"][0:10, 0:10, 0])
        return sample
# Load training data
train_txt = "data/sunrgbd_nyu/train.txt"
trainDir = "data/sunrgbd_nyu"
train = DepthDataset(train_txt, trainDir)
mean, var = train.get_global_stats()
train.transform = transforms.Compose([ToFloat(),
                                      RandomCrop((320, 400)),
                                      AddDepthHist(bins=800//3, range=(0,8)),
                                      NormalizeRGB(mean, var),
                                      ToTensor(),
                                     ])
#                      transform=transforms.Compose([ToFloat(), Crop_8(), ToFloat(), ToTensor()])
#                      transform=transforms.Compose([ToFloat(), Crop_small(), ToFloat(), ToTensor()])
                    
# for i in range(len(train)):
#     if np.any((train[i]["depth"] < 0).numpy()):
#         print("yo wat")
#         break
print("Loaded training dataset from {} with size {}.".format(train_txt, len(train)))

dev_txt = "data/sunrgbd_nyu/dev.txt"
devDir = "data/sunrgbd_nyu"
dev = DepthDataset(dev_txt, devDir, 
                     transform = transforms.Compose([ToFloat(),
                                                     CenterCrop((400, 320)),
                                                     AddDepthHist(bins=800//3, range=(0,8)),
                                                     NormalizeRGB(mean, var),
                                                     ToTensor(),
                                                    ])
#                      transform=transforms.Compose([ToFloat(), Crop_8(), ToTensor()])
#                      transform=transforms.Compose([ToFloat(), Crop_small(), ToTensor()])
                    )
print("Loaded dev dataset from {} with size {}.".format(dev_txt, len(dev)))



Loaded training dataset from data/sunrgbd_nyu/train.txt with size 1159.
Loaded dev dataset from data/sunrgbd_nyu/dev.txt with size 145.


In [None]:
# Set up training.
import torch.optim as optim
from tensorboardX import SummaryWriter
import torchvision.utils as vutils
from torch.utils.data import DataLoader
from depthnet.utils import save_checkpoint, validate

def validate(loss, model, val_loader):
    """Computes the validation error of the model on the validation set.
    val_loader should be a DataLoader.
    
    Returns an ordinary number (i.e. not a tensor)
    
    """
    
    it = None
    losses = []
    for it, data in enumerate(val_loader):
        depth = data["depth"].float()
        rgb = data["rgb"].float()
        if torch.cuda.is_available():
            depth = depth.cuda()
            rgb = rgb.cuda()
        if "hist" in data:
#             print(data)
            hist = data["hist"].float()
            if torch.cuda.is_available():
                hist = hist.cuda()
#             print(hist)
            output = model(rgb, hist)
        else:
            output = model(rgb)
        losses.append(loss(output, depth).item())
    nbatches = it+1
    return sum(losses)/nbatches

checkpointfile = "checkpoints_hints/checkpoint_epoch_49.pth.tar"
# lam = 1e-8 # Weight decay parameter for L2 regularization
learning_rate = 1e-6
num_epochs = 50
batch_size = 10
val_batch_size=5

# Build model and loss
# Hyperparameters
input_nc = 3
output_nc = 1
nbins=800//3

dn = DepthNet(input_nc, output_nc)
model = DepthNetWithHints(dn, hist_len=nbins, num_hints_layers=4)

# Tensorboardx
writer = SummaryWriter(comment="with_hints")
# data_trainloss = "data/trainloss"
# data_valloss = "data/valloss"
# Image = "Image"

#################
# Loss function #
#################

def berhu_loss(prediction, target):
    diff = prediction - target
    threshold = 0.2*torch.max(torch.abs(prediction - target))
    c = threshold.detach()
    l2_part = torch.sum((diff**2 + c**2))/(2*c)
    l1_part = torch.sum(torch.abs(diff))
    return l1_part+l2_part

# loss = nn.SmoothL1Loss()
# loss = berhu_loss
loss = nn.MSELoss()

if torch.cuda.is_available():
    model.cuda()
    loss.cuda()

# Checkpointing
if checkpointfile is not None:
    if torch.cuda.is_available():
        checkpoint = torch.load(checkpointfile)
    else:
        # Load GPU model on CPU
        checkpoint = torch.load(checkpointfile,
                                map_location=lambda storage,
                                loc: storage)
    start_epoch = checkpoint['epoch'] + 1
    best_loss = checkpoint['best_loss']
    model.load_state_dict(checkpoint['state_dict'])
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    optimizer.load_state_dict(checkpoint['optim_state_dict'])
    trainlosses = checkpoint['trainlosses']
    vallosses = checkpoint['vallosses']
    for i, trainloss in enumerate(trainlosses): # For tensorboardx
        writer.add_scalar("data/trainloss", trainloss, i)
    vallosses = checkpoint['vallosses']
    for i, valloss in enumerate(vallosses): # For tensorboardx
        writer.add_scalar("data/valloss", valloss, i)
    global_it = len(trainlosses)
    print("=> loaded checkpoint '{}' (trained for {} epochs)".format(checkpointfile, checkpoint['epoch']))
else:
    start_epoch = 0
    global_it = 0 # Track global iterations
    best_loss = torch.FloatTensor([float('inf')])
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    trainlosses = []
    vallosses = []
    # Initialize weights:
    for name, param in model.named_parameters():
        if "conv" in name and "weight" in name:
#             print(name)
            nn.init.xavier_normal_(param)
        if "norm" in name and "weight" in name:
#             print(name)
            nn.init.constant_(param, 1)
        elif "bias" in name:
            nn.init.constant_(param, 0)
            
# Scheduler
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[], gamma=0.1)

# Print summary of setup:
print("loaded checkpointfile: {}".format(checkpointfile))
print("start_epoch: {}".format(start_epoch))
print("global_it: {}".format(global_it))
print("optimizer: {}".format(optimizer))
print("batch_size: {}".format(batch_size))
print("num_epochs: {}".format(num_epochs))
print("learning rate (initial): {}".format(learning_rate))
print("scheduler: {}".format(scheduler.state_dict()))


=> loaded checkpoint 'checkpoints_hints/checkpoint_epoch_49.pth.tar' (trained for 49 epochs)
loaded checkpointfile: checkpoints_hints/checkpoint_epoch_49.pth.tar
start_epoch: 50
global_it: 0
optimizer: Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    initial_lr: 1e-05
    lr: 1e-05
    weight_decay: 0
)
batch_size: 10
num_epochs: 50
learning rate (initial): 1e-06
scheduler: {'milestones': [], 'gamma': 0.1, 'base_lrs': [1e-05], 'last_epoch': -1}


In [None]:
####################
# Run the training #
####################
train_loader = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(dev, batch_size=val_batch_size, shuffle=True, num_workers=4, pin_memory=True)


# Normalize rgb input

for epoch in range(start_epoch, start_epoch + num_epochs):
    print("epoch: {}".format(epoch))
    data = None
    output = None
    for it, data in enumerate(train_loader):
        depth = data["depth"].float()
        rgb = data["rgb"].float()
        hist = data["hist"].float()

#         print(rgb.shape)
#         print(depth.shape)
#         print(hist.shape)
        if torch.cuda.is_available():
            depth = depth.cuda()
            rgb = rgb.cuda()
            hist = hist.cuda()
        # New batch
#         print(rgb.dtype)
        scheduler.optimizer.zero_grad()

        # Normalize rgb input
        output = model(rgb, hist)
        
        # Save the first batch output of every epoch
        
#         a = list(model.parameters())[0].clone()

        trainloss = loss(output, depth)
        trainloss.backward()
#         print(list(model.parameters())[0].grad)
        scheduler.optimizer.step()
#         print(depth)
#         print(output)
#         b = list(model.parameters())[0].clone()

        if not (it % 10):
            print("\titeration: {}\ttrain loss: {}".format(it, trainloss.item()))
        trainlosses.append(trainloss.item())
#         print(trainloss.item())
#         print(writer)
        writer.add_scalar("data/trainloss", trainloss.item(), global_it)
        
        # TESTING:
#         if not ((it + 1) % 5):
#             # Stop after 5 batches
#             break

#         print(torch.equal(a.data, b.data))
        global_it += 1
    # Checkpointing
    # Get bool not ByteTensor"
    valloss = validate(loss, model, val_loader)
    print("End epoch {}\tval loss: {}".format(epoch, valloss))
    vallosses.append(valloss)
    writer.add_scalar("data/valloss", valloss, epoch)

    # Save the last batch output of every epoch
    rgb_input = vutils.make_grid(data["rgb"], nrow=batch_size, normalize=True, scale_each=True)
    writer.add_image('image/rgb_input', rgb_input, epoch)
    
    depth_truth = vutils.make_grid(data["depth"], nrow=batch_size, normalize=True, scale_each=True)
    writer.add_image('image/depth_truth', depth_truth, epoch)
    
    depth_output = vutils.make_grid(output, nrow=batch_size, normalize=True, scale_each=True)
    writer.add_image('image/depth_output', depth_output, epoch)
    
    for name, param in model.named_parameters():
        writer.add_histogram(name, param.clone().cpu().data.numpy(), global_it)
#     save_images(data["rgb"], data["depth"], output, outputDir="images", filename="epoch_{}".format(epoch))
    
    is_best = bool(trainloss.data.cpu().numpy() < best_loss.numpy())
    # Get greater Tensor to keep track best acc
    best_loss = torch.FloatTensor(min(trainloss.data.cpu().numpy(), best_loss.numpy()))
    # Save checkpoint
    save_checkpoint({
        'epoch': epoch,
        'global_it' : global_it,
        'state_dict': model.state_dict(),
        'best_loss': best_loss,
        'optim_state_dict': optimizer.state_dict(),
        'trainlosses': trainlosses,
        'vallosses': vallosses
    }, is_best, filename="checkpoints_hints/checkpoint_epoch_{}.pth.tar".format(epoch), always_save=True)

# Close tensorboardX    
# writer.export_scalars_to_json("./all_scalars.json") # for other processing
writer.close()

epoch: 50
	iteration: 0	train loss: 0.42086899280548096
	iteration: 10	train loss: 0.6157236695289612
	iteration: 20	train loss: 0.7417676448822021
	iteration: 30	train loss: 0.39305663108825684
	iteration: 40	train loss: 0.48142266273498535
	iteration: 50	train loss: 0.5271826982498169
	iteration: 60	train loss: 0.45935332775115967
	iteration: 70	train loss: 0.46585604548454285
	iteration: 80	train loss: 0.5897435545921326
	iteration: 90	train loss: 0.6782640814781189
	iteration: 100	train loss: 0.3469370901584625
	iteration: 110	train loss: 0.5648832321166992
End epoch 50	val loss: 0.7640022193563396
=> Saving checkpoint to: checkpoints_hints/checkpoint_epoch_50.pth.tar
epoch: 51
	iteration: 0	train loss: 0.4101755917072296
	iteration: 10	train loss: 0.5434679985046387
	iteration: 20	train loss: 0.5831940174102783
	iteration: 30	train loss: 0.5146448612213135
	iteration: 40	train loss: 0.4713344871997833
	iteration: 50	train loss: 0.6203109622001648
	iteration: 60	train loss: 0.74479