In [1]:
import os
import pandas as pd 
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
import torchvision.transforms.functional as TF
from torch.utils.data import Dataset
from PIL import Image
import glob

from torchvision import models
import tqdm

import time
from torch.autograd import Variable
import torch.nn.functional as F
from torchvision.transforms import Resize, Compose, ToPILImage, ToTensor
import pickle
import math

#from efficientnet_pytorch import EfficientNet

#from kornia.filters import SpatialGradient

import random
from torchvision.transforms import RandomCrop

In [11]:
patch_size = (512, 512)

In [12]:
class MonocularDepthDataset(Dataset):
    def __init__(self, df, transform=None, crop_size=patch_size):
        self.df = df
        self.transform = transform
        self.crop_size = crop_size
        

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        image_path = self.df.iloc[idx]['image']
        depth_path = self.df.iloc[idx]['depth']

        image = Image.open(image_path) ##no rgb, takes grayscale
        depth = Image.open(depth_path)

        # randomly crop image and depth
        i, j, h, w = RandomCrop.get_params(image, output_size=(self.crop_size[0], self.crop_size[1]))
        image = TF.crop(image, i, j, h, w)
        depth = TF.crop(depth, i, j, h, w)

        if self.transform:
            image = self.transform(image)
            depth = self.transform(depth)

        return image, depth

In [13]:


def gradient_loss_fn(gen_frames, gt_frames, alpha=1):
    def gradient(x):
        # idea from tf.image.image_gradients(image)
        # https://github.com/tensorflow/tensorflow/blob/r2.1/tensorflow/python/ops/image_ops_impl.py#L3441-L3512
        # x: (b,c,h,w), float32 or float64
        # dx, dy: (b,c,h,w)

        h_x = x.size()[-2]
        w_x = x.size()[-1]
        # gradient step=1
        left = x
        right = F.pad(x, [0, 1, 0, 0])[:, :, :, 1:]
        top = x
        bottom = F.pad(x, [0, 0, 0, 1])[:, :, 1:, :]

        # dx, dy = torch.abs(right - left), torch.abs(bottom - top)
        dx, dy = right - left, bottom - top 
        # dx will always have zeros in the last column, right-left
        # dy will always have zeros in the last row,    bottom-top
        dx[:, :, :, -1] = 0
        dy[:, :, -1, :] = 0

        return dx, dy

    # gradient
    gen_dx, gen_dy = gradient(gen_frames)
    gt_dx, gt_dy = gradient(gt_frames)
    #
    grad_diff_x = torch.abs(gt_dx - gen_dx)
    grad_diff_y = torch.abs(gt_dy - gen_dy)

    # condense into one tensor and avg
    return torch.mean(grad_diff_x ** alpha + grad_diff_y ** alpha)

class DepthEstimationLoss(nn.Module):
    def __init__(self, alpha=0.5):
        super(DepthEstimationLoss, self).__init__()
        self.alpha = alpha


    def forward(self, pred_depth, true_depth):
        pred_depth = torch.clamp(pred_depth, min=1e-8)
        true_depth = torch.clamp(true_depth, min=1e-8)

        # Scale-invariant MSE loss
        diff = torch.log(pred_depth) - torch.log(true_depth)
        mse_loss = torch.mean(diff**2)
        #scale_invariant_mse_loss = mse_loss - (self.alpha * (torch.sum(diff)**2)) / (true_depth.numel()**2)

    

        #gradient_loss = gradient_loss_fn(pred_depth,true_depth,alpha=self.alpha)

        #total_loss = (scale_invariant_mse_loss + gradient_loss)/2

        return (torch.sum((pred_depth - true_depth)**2))**0.5#scale_invariant_mse_loss#total_loss

In [14]:
def conv_relu_block(in_channel,out_channel,kernel,padding):
    return nn.Sequential(
            nn.Conv2d(in_channel,out_channel, kernel_size = kernel, padding=padding),
            nn.ReLU()) #nn.ReLU(inplace=True) #nn.Ge

In [15]:
class vanilla_unet(nn.Module):
    def __init__(self, n_class):
        super().__init__()
        self.input_1 = conv_relu_block(3,3,3,1) ##grayscale inputs
        #self.input_2 = conv_relu_block(64, 64, 3, 1) #no extra channels

        self.base_model = models.resnet18(pretrained=True)
        self.base_layers = list(self.base_model.children())

        self.l0 = nn.Sequential(*self.base_layers[:3])
        self.U0_conv = conv_relu_block(64, 64, 1, 0)
        self.conv_up0 = conv_relu_block(64 + 256, 128, 3, 1)

        self.l1 = nn.Sequential(*self.base_layers[3:5])
        self.U1_conv = conv_relu_block(64, 64, 1, 0)
        self.conv_up1 = conv_relu_block(64 + 256, 256, 3, 1)

        self.l2 = self.base_layers[5]
        self.U2_conv = conv_relu_block(128, 128, 1, 0)
        self.conv_up2 = conv_relu_block(128 + 512, 256, 3, 1)

        self.l3 = self.base_layers[6]
        self.U3_conv = conv_relu_block(256, 256, 1, 0)
        self.conv_up3 = conv_relu_block(256 + 512, 512, 3, 1)

        self.l4 = self.base_layers[7]
        self.U4_conv = conv_relu_block(512, 512, 1, 0)

        self.conv_up4 = conv_relu_block(64 + 128, 64, 3, 1)

        self.out4 = nn.Conv2d(128, n_class, 1)

        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

    def forward(self, x):
        x = torch.cat([x,x,x], axis = 1)
        x = self.input_1(x)
        
        #print(x.shape,'x')
         #concat on channel
        #x_one = self.input_2(x_one)
        block0 = self.l0(x)
        block1 = self.l1(block0)
        block2 = self.l2(block1)
        block3 = self.l3(block2)
        block4 = self.l4(block3)
        #print('b0: ', block0.shape)
        #print('b1: ', block1.shape)
        #print('b2: ', block2.shape)
        #print('b3: ', block3.shape)
        #print('b4: ', block4.shape)
        

        block4 = self.U4_conv(block4)
        x = self.upsample(block4)

        #print(block0.shape, block1.shape, block2.shape,block3.shape,block4.shape)
        block3 = self.U3_conv(block3)
        #print(x.shape, block3.shape)
        x = torch.cat([x, block3], axis=1)
        x = self.conv_up3(x)

        x = self.upsample(x)
        block2 = self.U2_conv(block2)
        #print('x shape: ', x.shape)
        #print('block2 precat: ', block2.shape)
        x = torch.cat([x, block2], axis=1)
        x = self.conv_up2(x)

        x = self.upsample(x)
        block1 = self.U1_conv(block1)
        x = torch.cat([x, block1], axis=1)
        x = self.conv_up1(x)

        x = self.upsample(x)
        block0 = self.U0_conv(block0)
        x = torch.cat([x, block0], axis=1)
        x = self.conv_up0(x)
        out4 = self.out4(x)

        out4_upsampled = F.interpolate(out4, scale_factor=2, mode='bilinear', align_corners=True)
        
        #relu = nn.ReLU()
        out = out4_upsampled#relu(out4_upsampled)
        
        
        return out



In [70]:
v = vanilla_unet(5)

In [71]:
x = torch.ones((1,1,patch_size[0],patch_size[1]))
#print(x.shape)
v.forward(x)

torch.Size([1, 1, 1024, 1024])
b0:  torch.Size([1, 64, 512, 512])
b1:  torch.Size([1, 64, 256, 256])
b2:  torch.Size([1, 128, 128, 128])
b3:  torch.Size([1, 256, 64, 64])
b4:  torch.Size([1, 512, 32, 32])
x shape:  torch.Size([1, 512, 128, 128])
block2 precat:  torch.Size([1, 128, 128, 128])


tensor([[[[ 0.2324,  0.2753,  0.3181,  ...,  0.2258,  0.1913,  0.1567],
          [ 0.1522,  0.1755,  0.1987,  ...,  0.1424,  0.1067,  0.0710],
          [ 0.0720,  0.0757,  0.0793,  ...,  0.0590,  0.0221, -0.0148],
          ...,
          [ 0.1539,  0.1555,  0.1572,  ...,  0.1608,  0.1143,  0.0678],
          [ 0.1300,  0.1384,  0.1468,  ...,  0.1555,  0.1270,  0.0986],
          [ 0.1061,  0.1213,  0.1364,  ...,  0.1502,  0.1398,  0.1293]],

         [[ 0.0386,  0.0779,  0.1171,  ...,  0.1012,  0.0768,  0.0524],
          [ 0.0123,  0.0582,  0.1040,  ...,  0.0794,  0.0827,  0.0860],
          [-0.0140,  0.0384,  0.0909,  ...,  0.0577,  0.0886,  0.1196],
          ...,
          [ 0.0289,  0.0508,  0.0728,  ...,  0.0127,  0.0350,  0.0573],
          [ 0.0424,  0.0579,  0.0733,  ...,  0.0497,  0.0531,  0.0564],
          [ 0.0560,  0.0649,  0.0737,  ...,  0.0867,  0.0711,  0.0555]],

         [[-0.0288,  0.0238,  0.0764,  ...,  0.0560,  0.0364,  0.0169],
          [-0.0580,  0.0121,  

In [16]:
#model = depth_model(num_classes=1).to('cuda')
#model = resunet(n_class=1).to('cuda')
model = vanilla_unet(n_class=1).to('cuda')

#model = effunet(n_class=1).to('cuda')



In [9]:
cd ../../krishna/project

/projectnb/cs585bp/krishna/project


In [18]:
# Set hyperparameters, dataset paths, and other configurations
batch_size = 8
learning_rate = 0.0005
num_epochs = 1
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

transform = transforms.Compose([
    transforms.Resize(patch_size),
    transforms.ToTensor()
])

df = pd.read_csv('train.csv')
train_dataset = MonocularDepthDataset(df, transform = transform)
#val_dataset = MonocularDepthDataset(val_image_paths, val_depth_paths, transform)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=12)
#val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)


criterion = DepthEstimationLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in tqdm.tqdm_notebook(range(num_epochs)):
    #train_loss = train(model, train_dataloader, optimizer, criterion, device)
    
    model.train()
    running_loss = 0.0
    
    for images, depths in tqdm.tqdm_notebook(train_dataloader):
        images = images.to(device)
        depths = depths.to(device)
        
        mask = depths == 0
        f_img = (depths-1)/128.0
        f_img[mask] = 0
        depths = f_img.float()
        
        optimizer.zero_grad()

        outputs = model(images)
        
        loss = criterion(outputs[-1].float(), depths.float())
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
    train_loss = running_loss / len(train_dataloader)
    print('Training loss: ', train_loss)
    print(outputs)
    #val_loss = validate(model, val_dataloader, criterion, device)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for epoch in tqdm.tqdm_notebook(range(num_epochs)):


  0%|          | 0/1 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for images, depths in tqdm.tqdm_notebook(train_dataloader):


  0%|          | 0/224 [00:00<?, ?it/s]

Training loss:  51412.20681544713
tensor([[[[-16.1730, -21.3217, -26.4704,  ..., -30.5780, -24.8909, -19.2038],
          [-20.7305, -27.4752, -34.2199,  ..., -39.8494, -32.4405, -25.0315],
          [-25.2879, -33.6286, -41.9694,  ..., -49.1208, -39.9900, -30.8592],
          ...,
          [-27.5497, -36.9395, -46.3293,  ..., -52.5706, -42.7802, -32.9897],
          [-22.1383, -29.6559, -37.1735,  ..., -42.1398, -34.3145, -26.4892],
          [-16.7270, -22.3723, -28.0176,  ..., -31.7091, -25.8489, -19.9887]]],


        [[[-15.6412, -20.4956, -25.3500,  ..., -39.2741, -31.8625, -24.4510],
          [-19.9818, -26.3617, -32.7417,  ..., -51.6387, -41.8941, -32.1496],
          [-24.3224, -32.2279, -40.1334,  ..., -64.0034, -51.9258, -39.8481],
          ...,
          [-28.8640, -38.7735, -48.6830,  ..., -37.0291, -30.2994, -23.5696],
          [-23.1309, -31.0549, -38.9789,  ..., -29.8096, -24.4076, -19.0056],
          [-17.3979, -23.3363, -29.2747,  ..., -22.5901, -18.5158, -14.441

In [None]:
cd ../../nkono/IVC_MDE

In [None]:
torch.save(model.state_dict(), 'good_small_model.pt')

In [None]:
pwd