In [1]:

from SILog import SILogLoss

import os
import tqdm
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from matplotlib import pyplot as plt
import inference as I
from torchvision import models
import torch.nn.functional as F

OUT_SIZE = (2200, 1550)
in_transform = transforms.Compose([
    transforms.Resize(OUT_SIZE),
    transforms.ToTensor()
])

out_transform = transforms.Compose([
    transforms.Resize(OUT_SIZE),
    transforms.ToTensor()
])
class MonocularDepthDataset(Dataset):
    def __init__(self, df, in_transform=None,out_transform = None ):
        self.df = df
        self.in_transform = in_transform
        self.out_transform = out_transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_path = self.df[idx][0]
        depth_path = self.df[idx][1]

        image = Image.open(image_path)
        depth = Image.open(depth_path)

        if self.in_transform:
            image = self.in_transform(image)
        if self.out_transform:
            depth = self.out_transform(depth)
        #print(depth_path,depth.shape)
        return image, depth

In [2]:
def conv_relu_block(in_channel,out_channel,kernel,padding):
    return nn.Sequential(
            nn.Conv2d(in_channel,out_channel, kernel_size = kernel, padding=padding),
            nn.ReLU()) #nn.ReLU(inplace=True) #nn.Ge
class vanilla_unet_full_nearest(nn.Module):
    def __init__(self, n_class):
        super().__init__()
        self.input_1 = conv_relu_block(3,3,3,1) ##grayscale inputs
        #self.input_2 = conv_relu_block(64, 64, 3, 1) #no extra channels

        self.base_model = models.resnet18(pretrained=True)
        self.base_layers = list(self.base_model.children())

        self.l0 = nn.Sequential(*self.base_layers[:3])
        self.U0_conv = conv_relu_block(64, 64, 1, 0)
        self.conv_up0 = conv_relu_block(64 + 256, 128, 3, 0)

        self.l1 = nn.Sequential(*self.base_layers[3:5])
        self.U1_conv = conv_relu_block(64, 64, 1, 0)
        self.conv_up1 = conv_relu_block(64 + 256, 256, 3, 1)

        self.l2 = self.base_layers[5]
        self.U2_conv = conv_relu_block(128, 128, 1, 0)
        self.conv_up2 = conv_relu_block(128 + 512, 256, 3, 1)

        self.l3 = self.base_layers[6]
        self.U3_conv = conv_relu_block(256, 256, 1, 0)
        self.conv_up3 = conv_relu_block(256 + 512, 512, 3, 1)

        self.l4 = self.base_layers[7]
        self.U4_conv = conv_relu_block(512, 512, 1, 0)

        self.conv_up4 = conv_relu_block(64 + 128, 64, 3, 1)

        self.out4 = nn.Conv2d(128, n_class, 1)

        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

    def forward(self, x):
        x = torch.cat([x,x,x], axis = 1)
        x = self.input_1(x)
        
        #print(x.shape,'x')
         #concat on channel
        #x_one = self.input_2(x_one)
        block0 = self.l0(x)
        block1 = self.l1(block0)
        block2 = self.l2(block1)
        block3 = self.l3(block2)
        block4 = self.l4(block3)

        block4 = self.U4_conv(block4)
        #print(block4.shape)
        x = nn.Upsample(size = (138,97), mode='bilinear', align_corners=True)(block4)
        block3 = self.U3_conv(block3)
        
        x = torch.cat([x, block3], axis=1)
        x = self.conv_up3(x)
        
        x = nn.Upsample(size = (275,194), mode='bilinear', align_corners=True)(x)
        
        block2 = self.U2_conv(block2)
        
        #print(x.shape, block2.shape)
        x = torch.cat([x, block2], axis=1)
        
        x = self.conv_up2(x)

        x = nn.Upsample(size = (550,388), mode='bilinear', align_corners=True)(x)
        block1 = self.U1_conv(block1)
        #print(x.shape, block1.shape)
        
        x = torch.cat([x, block1], axis=1)
        x = self.conv_up1(x)

        x = nn.Upsample(size = (1100, 775), mode='bilinear', align_corners=True)(x) 
        block0 = self.U0_conv(block0)

        #print(x.shape, block0.shape)
        x = torch.cat([x, block0], axis=1)
        x = self.conv_up0(x)
        out4 = self.out4(x)

        #out4_upsampled = F.interpolate(out4, size=OUT_SIZE, mode='nearest', align_corners=True)
        out4_upsampled = F.interpolate(out4, size=OUT_SIZE, mode='nearest')
        
        out = F.relu(out4_upsampled)
        
        
        return out

In [3]:
PATH = 'unet_full_relu_nearest.pt'
device = torch.device("cuda")
model = vanilla_unet_full_nearest(n_class=1)
model = nn.DataParallel(model)
model.load_state_dict(torch.load(PATH, map_location="cuda:0"))
model.to(device)

in_path = "inputs/"
y_path = 'depth_annotations/'
dir_list = os.listdir(in_path)
d_paths = [(in_path+v,y_path+v) for v in dir_list]
val_dataset = MonocularDepthDataset(d_paths, in_transform = in_transform, out_transform = out_transform)
val_dataloader = DataLoader(val_dataset, batch_size=4, num_workers=4)


criterion = SILogLoss()
with torch.no_grad():
    running_loss = 0.0
    for images, depths in tqdm.tqdm_notebook(val_dataloader):
        images = images.to(device)
        depths = depths.to(device)
        outputs = model(images)
        #print('out shape',outputs.shape)
        loss = criterion(outputs, depths)
        running_loss += loss.item()
print(running_loss/len(val_dataset))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for images, depths in tqdm.tqdm_notebook(val_dataloader):


  0%|          | 0/68 [00:00<?, ?it/s]

21.990882124724212
