In [1]:
!nvidia-smi

Sun May  3 00:56:23 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.64.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   36C    P0    38W / 250W |      0MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru

In [2]:
from google.colab import files
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
import os
import numpy as np
from PIL import Image
import glob
import tqdm

import torch
import torchvision   
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.utils.data as data
import warnings
warnings.filterwarnings("ignore")

In [0]:
# !cp /content/drive/My\ Drive/Inria_data/data.zip .

In [0]:
# !unzip -q data.zip

In [0]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(size=(160, 384), scale=(0.8, 1.0)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]),
    'test': transforms.Compose([
        transforms.Resize(size=(160, 384)),
        transforms.ToTensor(),
    ]),
}

In [0]:
folder_path = "./data/"

left_files = sorted(glob.glob(os.path.join(folder_path,'left','*.jpg')))
right_files = sorted(glob.glob(os.path.join(folder_path,'right','*.jpg')))
depth_files = sorted(glob.glob(os.path.join(folder_path,'monocular_depth','*_depth.png')))
mask_files = sorted(glob.glob(os.path.join(folder_path,'rcnn_mask','*.pt')))

idxs = np.arange(len(left_files))
np.random.shuffle(idxs)

train_idxs = idxs[:int(0.8*len(left_files))]
val_idxs = idxs[int(0.8*len(left_files)):]

train_lfiles = [left_files[idx] for idx in train_idxs]
train_rfiles = [right_files[idx] for idx in train_idxs]
train_dfiles = [depth_files[idx] for idx in train_idxs]
train_mfiles = [mask_files[idx] for idx in train_idxs]

val_lfiles = [left_files[idx] for idx in val_idxs]
val_rfiles = [right_files[idx] for idx in val_idxs]
val_dfiles = [depth_files[idx] for idx in val_idxs]
val_mfiles = [mask_files[idx] for idx in val_idxs]

In [0]:
class Inria(data.Dataset):
    def __init__(self, lfiles, rfiles, dfiles, mfiles, transform=None):
        super(Inria, self).__init__()
        self.left_files = lfiles
        self.right_files = rfiles
        self.depth_files = dfiles
        self.mask_files = mfiles

        self.transform = transform

    def __getitem__(self, index):

            left_img_path = self.left_files[index]
            right_img_path = self.right_files[index]
            depth_img_path = self.depth_files[index]
            mask_img_path = self.mask_files[index]

            left = Image.open(left_img_path)
            right = Image.open(right_img_path)
            depth = Image.open(depth_img_path)
            mask = torch.load(mask_img_path)

            if self.transform:
              left = self.transform(left)
              right = self.transform(right)
              depth = self.transform(depth)

            return left, right, depth, mask


    def __len__(self):
        return len(self.left_files)


train_dataset = Inria(train_lfiles, train_rfiles, train_dfiles, train_mfiles , data_transforms['train'])
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=8)

val_dataset = Inria(val_lfiles, val_rfiles, val_dfiles, val_mfiles, data_transforms['test'])
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=8)

In [0]:
class Deep3D(nn.Module):
    def __init__(self):
        super(Deep3D, self).__init__()
        
        self.conv1_1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        self.relu1_1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2_1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.relu2_1 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3_1 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.relu3_1 = nn.ReLU()
        self.conv3_2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.relu3_2 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv4_1 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)
        self.relu4_1 = nn.ReLU()
        self.conv4_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
        self.relu4_2 = nn.ReLU()
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv5_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
        self.relu5_1 = nn.ReLU()
        self.conv5_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
        self.relu5_2 = nn.ReLU()
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.flatten = nn.Flatten()
        self.fc6 = nn.Linear(30720, 512)  
        self.relu6 = nn.ReLU()
        self.drop6 = nn.Dropout(p=0.5)

        self.fc7 = nn.Linear(512, 512)  
        self.relu7 = nn.ReLU()
        self.drop7 = nn.Dropout(p=0.5)

        self.fc8 = nn.Linear(512, 33*12*5)
        # rehape in forward to get pred5

        self.bn_pool4 = nn.BatchNorm2d(512)
        self.pred4 = nn.Conv2d(in_channels=512 ,out_channels=33, kernel_size=3 ,padding=1)
        self.bn_pool3 = nn.BatchNorm2d(256)
        self.pred3 = nn.Conv2d(in_channels=256 ,out_channels=33, kernel_size=3 ,padding=1)
        self.bn_pool2 = nn.BatchNorm2d(128)
        self.pred2 = nn.Conv2d(in_channels=128 ,out_channels=33, kernel_size=3 ,padding=1)
        self.bn_pool1 = nn.BatchNorm2d(64)
        self.pred1 = nn.Conv2d(in_channels=64 ,out_channels=33, kernel_size=3 ,padding=1)
        
        workspace = 0
        scale = 1

        self.relu = nn.ReLU()

        self.deconv_pred1 = nn.ConvTranspose2d(in_channels=33, out_channels=33, kernel_size=1, padding=0, stride=1)
        scale *= 2

        self.deconv_pred2 = nn.ConvTranspose2d(in_channels=33, out_channels=33, kernel_size=2*scale, padding=scale//2, stride=scale)
        scale *= 2

        self.deconv_pred3 = nn.ConvTranspose2d(in_channels=33, out_channels=33, kernel_size=2*scale, padding=scale//2, stride=scale)
        scale *= 2

        self.deconv_pred4 = nn.ConvTranspose2d(in_channels=33, out_channels=33, kernel_size=2*scale, padding=scale//2, stride=scale)
        scale *= 2

        self.deconv_pred5 = nn.ConvTranspose2d(in_channels=33, out_channels=33, kernel_size=2*scale, padding=scale//2, stride=scale)
        self.relu1 =  nn.ReLU()
        scale = 2
        self.deconv_predup = nn.ConvTranspose2d(in_channels=33, out_channels=33, kernel_size=2*scale, padding=scale//2, stride=scale)
        self.relu2 = nn.ReLU()
        self.convolution0 = nn.Conv2d(in_channels=33 ,out_channels=33, kernel_size=3 ,padding=1)

        self.softmax = nn.Softmax()

    def forward(self, x):
        out = x
        
        out = self.relu1_1(self.conv1_1(out))
        pool1 = self.pool1(out)

        out = self.relu2_1(self.conv2_1(pool1))
        pool2 = self.pool2(out)

        out = self.relu3_1(self.conv3_1(pool2))
        out = self.relu3_2(self.conv3_2(out))
        pool3 = self.pool3(out)

        out = self.relu4_1(self.conv4_1(pool3))
        out = self.relu4_2(self.conv4_2(out))
        pool4 = self.pool4(out)

        out = self.relu5_1(self.conv5_1(pool4))
        out = self.relu5_2(self.conv5_2(out))
        pool5 = self.pool5(out)

        out = self.flatten(pool5)

        out = self.drop6(self.relu6(self.fc6(out)))
        out = self.drop7(self.relu7(self.fc7(out)))

        out = self.fc8(out)

        pred5 = torch.reshape(out, (out.shape[0], 33, 5, 12))

        pred4 = self.bn_pool4(pool4)
        pred4 = self.pred4(pred4)
        pred3 = self.bn_pool3(pool3)
        pred3 = self.pred3(pred3)
        pred2 = self.bn_pool2(pool2)
        pred2 = self.pred2(pred2)
        pred1 = self.bn_pool1(pool1)
        pred1 = self.pred1(pred1)

        pred1 = self.deconv_pred1(self.relu(pred1))
        pred2 = self.deconv_pred2(self.relu(pred2))
        pred3 = self.deconv_pred3(self.relu(pred3))
        pred4 = self.deconv_pred4(self.relu(pred4))
        pred5 = self.deconv_pred5(self.relu(pred5))

        pred = pred1 + pred2 + pred3 + pred4 + pred5
        pred = self.relu(pred)

        pred = self.convolution0(self.relu(self.deconv_predup(pred)))

        mask = self.softmax(pred)

        return mask

In [0]:
class DepthRCNN(nn.Module):
    def __init__(self):
        super(DepthRCNN, self).__init__()
        
        self.conv1_1 = nn.Conv2d(in_channels=2, out_channels=64, kernel_size=3, padding=1)
        self.relu1_1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2_1 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.relu2_1 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3_1 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.relu3_1 = nn.ReLU()
        self.conv3_2 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.relu3_2 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv4_1 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, padding=1)
        self.relu4_1 = nn.ReLU()
        self.conv4_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
        self.relu4_2 = nn.ReLU()
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv5_1 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
        self.relu5_1 = nn.ReLU()
        self.conv5_2 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1)
        self.relu5_2 = nn.ReLU()
        self.pool5 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.flatten = nn.Flatten()
        self.fc6 = nn.Linear(30720, 512)  
        self.relu6 = nn.ReLU()
        self.drop6 = nn.Dropout(p=0.5)

        self.fc7 = nn.Linear(512, 512)  
        self.relu7 = nn.ReLU()
        self.drop7 = nn.Dropout(p=0.5)

        self.fc8 = nn.Linear(512, 33*12*5)
        # rehape in forward to get pred5

        self.bn_pool4 = nn.BatchNorm2d(512)
        self.pred4 = nn.Conv2d(in_channels=512 ,out_channels=33, kernel_size=3 ,padding=1)
        self.bn_pool3 = nn.BatchNorm2d(256)
        self.pred3 = nn.Conv2d(in_channels=256 ,out_channels=33, kernel_size=3 ,padding=1)
        self.bn_pool2 = nn.BatchNorm2d(128)
        self.pred2 = nn.Conv2d(in_channels=128 ,out_channels=33, kernel_size=3 ,padding=1)
        self.bn_pool1 = nn.BatchNorm2d(64)
        self.pred1 = nn.Conv2d(in_channels=64 ,out_channels=33, kernel_size=3 ,padding=1)
        
        workspace = 0
        scale = 1

        self.relu = nn.ReLU()

        self.deconv_pred1 = nn.ConvTranspose2d(in_channels=33, out_channels=33, kernel_size=1, padding=0, stride=1)
        scale *= 2

        self.deconv_pred2 = nn.ConvTranspose2d(in_channels=33, out_channels=33, kernel_size=2*scale, padding=scale//2, stride=scale)
        scale *= 2

        self.deconv_pred3 = nn.ConvTranspose2d(in_channels=33, out_channels=33, kernel_size=2*scale, padding=scale//2, stride=scale)
        scale *= 2

        self.deconv_pred4 = nn.ConvTranspose2d(in_channels=33, out_channels=33, kernel_size=2*scale, padding=scale//2, stride=scale)
        scale *= 2

        self.deconv_pred5 = nn.ConvTranspose2d(in_channels=33, out_channels=33, kernel_size=2*scale, padding=scale//2, stride=scale)
        self.relu1 =  nn.ReLU()
        scale = 2
        self.deconv_predup = nn.ConvTranspose2d(in_channels=33, out_channels=33, kernel_size=2*scale, padding=scale//2, stride=scale)
        self.relu2 = nn.ReLU()
        self.convolution0 = nn.Conv2d(in_channels=33 ,out_channels=33, kernel_size=3 ,padding=1)

        self.softmax = nn.Softmax()

    def forward(self, x):
        out = x
        
        out = self.relu1_1(self.conv1_1(out))
        pool1 = self.pool1(out)

        out = self.relu2_1(self.conv2_1(pool1))
        pool2 = self.pool2(out)

        out = self.relu3_1(self.conv3_1(pool2))
        out = self.relu3_2(self.conv3_2(out))
        pool3 = self.pool3(out)

        out = self.relu4_1(self.conv4_1(pool3))
        out = self.relu4_2(self.conv4_2(out))
        pool4 = self.pool4(out)

        out = self.relu5_1(self.conv5_1(pool4))
        out = self.relu5_2(self.conv5_2(out))
        pool5 = self.pool5(out)

        out = self.flatten(pool5)

        out = self.drop6(self.relu6(self.fc6(out)))
        out = self.drop7(self.relu7(self.fc7(out)))

        out = self.fc8(out)

        pred5 = torch.reshape(out, (out.shape[0], 33, 5, 12))

        pred4 = self.bn_pool4(pool4)
        pred4 = self.pred4(pred4)
        pred3 = self.bn_pool3(pool3)
        pred3 = self.pred3(pred3)
        pred2 = self.bn_pool2(pool2)
        pred2 = self.pred2(pred2)
        pred1 = self.bn_pool1(pool1)
        pred1 = self.pred1(pred1)

        pred1 = self.deconv_pred1(self.relu(pred1))
        pred2 = self.deconv_pred2(self.relu(pred2))
        pred3 = self.deconv_pred3(self.relu(pred3))
        pred4 = self.deconv_pred4(self.relu(pred4))
        pred5 = self.deconv_pred5(self.relu(pred5))

        pred = pred1 + pred2 + pred3 + pred4 + pred5
        pred = self.relu(pred)

        pred = self.convolution0(self.relu(self.deconv_predup(pred)))

        mask = self.softmax(pred)

        return mask

In [0]:
def selection_layer(masks, left_image, left_shift=16):

    p2d = (left_shift-1, left_shift, 0, 0)
    padded_img = F.pad(left_image, p2d, 'constant')

    depth = masks.shape[1]
    width = left_image.shape[3]
    layers = []
    layers.append(torch.zeros(padded_img[:,:,:,:width].shape).to(device))
    for d in range(depth-2,-1,-1):
        layers.append(padded_img[:,:,:,d:d+width])
    layers = torch.stack(layers, axis=1)
    disparity_image =  layers * masks.unsqueeze(2)

    return torch.sum(disparity_image, axis=1)

In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device='cpu'
print(device)

net1 = Deep3D()
net2 = DepthRCNN()
net1 = net1.to(device)
net2 = net2.to(device)

cuda


In [13]:
checkpoint = torch.load("/content/drive/My Drive/11785/Project/pre_trained_weights.pt")
net1.load_state_dict(checkpoint["model_state_dict"])

<All keys matched successfully>

In [0]:
def train(model1, model2, data_loader, n_epochs, criterion, scheduler):
    model1.train()
    model2.train()
    for epoch in range(n_epochs):
        avg_loss = 0.0
        num_correct = 0.0
        total = len(data_loader)

        outer = tqdm.notebook.tqdm(total=len(data_loader), desc='training')

        for batch_num, (left_img, right_img, depth_img, rcnn_img) in enumerate(data_loader):
            outer.update(1)
            left_img, right_img, depth_img, rcnn_img = left_img.to(device), right_img.to(device), depth_img.type(torch.FloatTensor).to(device), rcnn_img.to(device) 
            optimizer.zero_grad()

            newImg = torch.cat([depth_img, rcnn_img], dim=1)
            outputs1 = model1(left_img)
            outputs2 = model2(newImg)
            outputs1 = selection_layer(outputs1, left_img)
            outputs2 = selection_layer(outputs2, left_img)

            outputs = outputs1 + outputs2

            loss = criterion(outputs, right_img)
            loss.backward()
            optimizer.step()
            
            avg_loss += loss.item()
            
            torch.cuda.empty_cache()
            del left_img
            del right_img
            del depth_img
            del rcnn_img
            del newImg
            del loss
        scheduler.step(avg_loss)
        print('Epoch: {}\tAvg-Loss: {:.4f}'.format(epoch+1, avg_loss/total))
        # torch.save({
        # 'model_state_dict': model.state_dict(),
        # 'optimizer_label_state_dict': optimizer.state_dict(),
        # }, '/content/drive/My Drive/11785/Project/' + 'weight' + '_' + str(epoch) + '.pt')

In [27]:
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(list(net1.parameters()) + list(net2.parameters()), lr=1e-3)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=0, verbose=True)

train(net1, net2,  train_dataloader, 10, criterion, scheduler)

HBox(children=(IntProgress(value=0, description='training', max=81, style=ProgressStyle(description_width='ini…

Epoch: 1	Avg-Loss: 0.1093


HBox(children=(IntProgress(value=0, description='training', max=81, style=ProgressStyle(description_width='ini…

Epoch     2: reducing learning rate of group 0 to 1.0000e-04.
Epoch: 2	Avg-Loss: 0.1115


HBox(children=(IntProgress(value=0, description='training', max=81, style=ProgressStyle(description_width='ini…

Epoch     3: reducing learning rate of group 0 to 1.0000e-05.
Epoch: 3	Avg-Loss: 0.1112


HBox(children=(IntProgress(value=0, description='training', max=81, style=ProgressStyle(description_width='ini…

Epoch: 4	Avg-Loss: 0.1087


HBox(children=(IntProgress(value=0, description='training', max=81, style=ProgressStyle(description_width='ini…

Epoch: 5	Avg-Loss: 0.1083


HBox(children=(IntProgress(value=0, description='training', max=81, style=ProgressStyle(description_width='ini…

Epoch     6: reducing learning rate of group 0 to 1.0000e-06.
Epoch: 6	Avg-Loss: 0.1104


HBox(children=(IntProgress(value=0, description='training', max=81, style=ProgressStyle(description_width='ini…

Epoch     7: reducing learning rate of group 0 to 1.0000e-07.
Epoch: 7	Avg-Loss: 0.1092


HBox(children=(IntProgress(value=0, description='training', max=81, style=ProgressStyle(description_width='ini…

Epoch     8: reducing learning rate of group 0 to 1.0000e-08.
Epoch: 8	Avg-Loss: 0.1096


HBox(children=(IntProgress(value=0, description='training', max=81, style=ProgressStyle(description_width='ini…

Epoch: 9	Avg-Loss: 0.1097


HBox(children=(IntProgress(value=0, description='training', max=81, style=ProgressStyle(description_width='ini…

Epoch: 10	Avg-Loss: 0.1094


In [0]:
def test(model1, model2, data_loader, criterion):
    model1.eval()
    model2.eval()
    with torch.no_grad():
        avg_loss = 0.0
        total = len(data_loader)

        outer = tqdm.notebook.tqdm(total=len(data_loader), desc='testing')

        for batch_num, (left_img, right_img, depth_img, rcnn_img) in enumerate(data_loader):
            outer.update(1)
            left_img, right_img, depth_img, rcnn_img = left_img.to(device), right_img.to(device), depth_img.type(torch.FloatTensor).to(device), rcnn_img.to(device) 

            newImg = torch.cat([depth_img, rcnn_img], dim=1)
            outputs1 = model1(left_img)
            outputs2 = model2(newImg)
            outputs1 = selection_layer(outputs1, left_img)
            outputs2 = selection_layer(outputs2, left_img)

            outputs = outputs1 + outputs2

            loss = criterion(outputs, right_img)
            
            avg_loss += loss.item()            
            torch.cuda.empty_cache()
            del left_img
            del right_img
            del depth_img
            del rcnn_img
            del newImg
            del loss
        print('Avg-Loss: {:.4f}'.format(avg_loss/total))

In [29]:
test(net1, net2, val_dataloader, criterion)

HBox(children=(IntProgress(value=0, description='testing', max=21, style=ProgressStyle(description_width='init…

Avg-Loss: 0.0463
