In [1]:
import torch
from PIL import Image
import torch.nn as nn
import torchvision.models as models
from torchvision.transforms import transforms
from torchvision.transforms import ToTensor, Lambda
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
import pandas as pd
import cv2
import numpy as np
from tqdm import tqdm
import pytorch_lightning as pl
from pytorch_lightning import Trainer
import torchmetrics
import os

torch.cuda.empty_cache()

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'[INFO] Using {device} for inference')



[INFO] Using cuda for inference


## Create custom dataset

In [2]:
class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, base_path):
        self.img_labels = pd.read_csv(annotations_file)
        self.base_path = base_path

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        print(self.base_path)
        img_path = os.path.join(self.base_path, self.img_labels.iloc[idx, 0])
        label_path = os.path.join(self.base_path, self.img_labels.iloc[idx, 1])
        print(img_path)
        image = cv2.imread(img_path)
        label = cv2.imread(label_path)

        image = cv2.resize(image,(640, 480))
        label = cv2.resize(label, (320, 240))

        temp_transform = transforms.ToTensor()
        img_tr = temp_transform(image)
        label_tr = temp_transform(label)

        # calculate mean and std
        mean, std = img_tr.mean([1,2]), img_tr.std([1,2])
        transform_norm_image = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std)])

        mean, std = label_tr.mean([1,2]), label_tr.std([1,2])
        transform_norm_label = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean, std), transforms.Grayscale()])

        transform_flip = transforms.RandomHorizontalFlip(p=0.5)

        image = transform_norm_image(image)
        label = transform_norm_label(label)

        transform_flip(image)
        transform_flip(label)

        return image, label, img_path, label_path

## Define paths

In [3]:
test_file_name = "nyu2_test.csv"
train_file_name = "nyu2_train.csv"
model_name = "unet.pt"

batch_size = 5

# os.chdir("..")
# base_path = os.path.abspath(os.curdir)
base_path = "/home/ameya/Documents/Deep Learning/Depth_estimation/"
img_dir = base_path + 'data/'
print(base_path)

train_file = base_path + 'csv/' + train_file_name
test_file = base_path + 'csv/' + test_file_name
model_path = base_path + 'models/' + model_name

train_dataset = CustomImageDataset(train_file, base_path)
test_dataset = CustomImageDataset(test_file, base_path)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

print("Train dataloader size: ", len(train_dataloader), " ", "Batch size: ", batch_size)
print("Train dataloader size: ", len(test_dataloader), " ", "Batch size: ", batch_size)

/home/ameya/Documents/Deep Learning/Depth estimation/


FileNotFoundError: [Errno 2] No such file or directory: '/home/ameya/Documents/Deep Learning/Depth estimation/csv/nyu2_train.csv'

## Visualize data before proceeding

In [17]:
_, _, img, label = train_dataset[0]

img = plt.imread(base_path+'/'+img)
plt.imshow(img, "gray")
plt.show()

label = plt.imread(base_path+'/'+label)
plt.imshow(label, "gray")
plt.show()

/home/ameya/Documents/Deep Learning/Depth estimation/
/home/ameya/Documents/Deep Learning/Depth estimation/data/nyu2_train/living_room_0038_out/115.jpg


FileNotFoundError: [Errno 2] No such file or directory: '/home/ameya/Documents/Deep Learning/Depth estimation///home/ameya/Documents/Deep Learning/Depth estimation/data/nyu2_train/living_room_0038_out/115.jpg'

## Define Encoder
### Used **Densenet169** as the encoder

In [None]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet169', pretrained=True)
model.eval()

## Custom Unet with Densenet169 as the encoder

In [None]:
class UNet(nn.Module):
    def __init__(self, model):
        super(UNet, self).__init__()
        self.Densenet = model
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear')
        self.bneck_conv = nn.Conv2d(1000, 1664, kernel_size = (1,1), padding='same')
        self.output_conv = nn.Conv2d(model.features.pool0.shape[0], 1, kernel_size = (3,3), padding='same')

    def upsampling(self, input_tensor, n_filters, concat_layer):
        x = self.upsample(input_tensor)
        x = torch.cat((x, concat_layer), 1)
        x = nn.Conv2d(n_filters, 1, kernel_size = (3,3), padding='same')
        x = nn.BatchNorm2d(x)
        x = nn.Conv2d(n_filters, 1, kernel_size = (3,3), padding='same')
        x = nn.BatchNorm2d(x)

    def forward(self, images):    
        dense_op = self.Densenet(images)   
        bneck = self.bneck_conv(dense_op)     
        x = nn.LeakyReLU(bneck)
        x = self.upsampling(bneck, 832, self.Densenet.transition3.pool)
        x = nn.LeakyReLU(x)
        x = self.upsampling(x, 416, self.Densenet.transition2.pool)
        x = nn.LeakyReLU(x)
        x = self.upsampling(x, 208, self.Densenet.transition1.pool)
        x = nn.LeakyReLU(x)
        x = self.upsampling(x, 208, self.Densenet.pool0)
        x = self.output_conv(x)
        return x    

In [None]:
img, label, _, _, = train_dataset[0]

print(img.shape)
print(label.shape)