## Monocular Depth Estimation Project

This file will follow the full training and evaluation process, report results across experiments, and visualize results.

In [1]:
from torch.optim import Adam
from DPT.dpt.models import DPTDepthModel
from torch.utils.data import Dataset, DataLoader
from dataloaders.nyu_data import NyuDepthV2
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam, Optimizer
from losses.mde_losses import ScaleAndShiftInvariantLoss
from losses.LMR import LMRLoss
from LNRegularizer.LNR import LNR
from torchvision.transforms import v2
from typing import Dict
from tqdm.notebook import tqdm
# functions for displaying results
import matplotlib.pyplot as plt
from typing import List


In [2]:
# load datasets
NYU_DATA_PATH = "data/nyu_data/nyu_depth_v2_labeled.mat"

# download from http://horatio.cs.nyu.edu/mit/silberman/indoor_seg_sup/splits.mat
NYU_SPLIT_PATH = "data/nyu_data/splits.mat"

nyu_test_ds = NyuDepthV2(NYU_DATA_PATH, NYU_SPLIT_PATH, split="test")
nyu_train_ds = NyuDepthV2(NYU_DATA_PATH, NYU_SPLIT_PATH, split="train")
nyu_train_dataloader = DataLoader(nyu_train_ds, batch_size=12)
nyu_test_dataloader = DataLoader(nyu_train_ds, batch_size=12)

## Training Functions

Moving everything to a single notebook simplifies kernel issues and modifying portions of the models.

In [None]:
def train_simple(
    model: nn.Module,
    loader: DataLoader,
    optim: Optimizer,
    epochs: int = 50,
    print_every: int = 1,
) -> None:
    """
    Train depth head on NYU dataset with no additional regularization
    """
    model.train()
    pbar = tqdm(total=epochs * len(loader), desc="Training MDE:")
    i = 0
    # loss function
    loss = ScaleAndShiftInvariantLoss()

    # training loop
    for _ in range(epochs):
        for batch in loader:
            X = batch["image"].float().to("mps")
            y = batch["depth"].float().to("mps")
            mask = batch["mask"].float().to("mps")

            X = X.permute(0, 3, 1, 2)

            # calculate depth
            prediction = model(X)

            # calculate losses
            err = loss(prediction, y, mask)
            mse_loss = F.mse_loss(prediction, y)
            l1_loss = F.smooth_l1_loss(prediction, y)

            composite_loss = (2 * err) + (0.5 * mse_loss) + (0.1 * l1_loss)

            # print(f"composite_loss requires_grad: {composite_loss.requires_grad}")

            # process optimizer
            optim.zero_grad()
            composite_loss.backward()  # back-prop losses
            optim.step()

            # debugging
            grads = []
            for name, p in model.named_parameters():
                if p.grad is not None:
                    grads.append(p.grad.norm().item())
                else:
                    pass
                    # print(f"{name} gradient is none")

            grads = torch.Tensor(grads)

            if i % print_every == 0:
                with torch.no_grad():
                    mse_loss = F.mse_loss(prediction, y)
                pbar.set_postfix_str(
                    f"train_loss: {err:.2f} | mse_loss: {mse_loss:.2f} | l1_loss: {l1_loss:.2f} | composite loss: {composite_loss:.2f} | Average gradient: {grads.mean()} | min grad: {grads.min()} | max grad {grads.max().item():.2f} | min pred. depth: {prediction.min().item():.2f} | max pred. depth: {prediction.max().item():.2f}"
                )
                pbar.update(print_every)

            i += 1


def train_with_lmr(
    model: nn.Module,
    loader: DataLoader,
    optim: Optimizer,
    epochs: int = 50,
    print_every: int = 1,
) -> None:
    """
    Train depth head on NYU dataset with lmr regularizer
    """
    model.train()
    pbar = tqdm(total=epochs * len(loader), desc="Training MDE:")
    i = 0
    # loss function
    loss = ScaleAndShiftInvariantLoss()
    lmr_loss = LMRLoss()

    # training loop
    for _ in range(epochs):
        for batch in loader:
            X = batch["image"].float().to("mps")
            y = batch["depth"].float().to("mps")
            mask = batch["mask"].float().to("mps")

            X = X.permute(0, 3, 1, 2)

            # pass input through LMR model
            output_mask = LNR(X)

            # calculate depth
            prediction = model(X)

            # calculate losses
            err = loss(prediction, y, mask)
            lmr_mask_loss = lmr_loss(
                net_mask=output_mask, depth_hat=prediction.detach(), depth=y, k=100
            )

            err = err + lmr_mask_loss  # combine losses

            # process optimizer
            optim.zero_grad()
            err.backward()  # back-prop losses
            optim.step()

            if i % print_every == 0:
                with torch.no_grad():
                    mse_loss = F.mse_loss(prediction, y)
                pbar.set_postfix_str(
                    f"train_loss: {err:.2f} | mse_loss: {mse_loss:.2f}"
                )
                pbar.update(print_every)

            i += 1


def train_with_cutmix(
    model: nn.Module,
    loader: DataLoader,
    optim: Optimizer,
    epochs: int = 50,
    print_every: int = 1,
) -> None:
    """
    Train depth head on NYU dataset using cutmix regularization
    """
    model.train()
    pbar = tqdm(total=epochs * len(loader), desc="Training MDE:")
    i = 0
    # loss function
    loss = ScaleAndShiftInvariantLoss()
    lmr_loss = LMRLoss()

    # cutmix transform
    cutmix = v2.CutMix()

    # training loop
    for _ in range(epochs):
        for batch in loader:
            X = batch["image"].float().to("mps")
            y = batch["depth"].float().to("mps")
            mask = batch["mask"].float().to("mps")

            X = X.permute(0, 3, 1, 2)

            # apply cutmix to batch
            X, y = cutmix(X, y)

            # calculate depth
            prediction = model(X)

            # calculate losses
            err = loss(prediction, y, mask)
            # process optimizer
            optim.zero_grad()
            err.backward()  # back-prop losses
            optim.step()

            if i % print_every == 0:
                with torch.no_grad():
                    mse_loss = F.mse_loss(prediction, y)
                pbar.set_postfix_str(
                    f"train_loss: {err:.2f} | mse_loss: {mse_loss:.2f}"
                )
                pbar.update(print_every)

            i += 1


def eval(model: nn.Module, loader: DataLoader) -> Dict:
    """
    assess the accuracy of the model
    """
    model.eval()
    pbar = tqdm(total=len(loader), desc="Evaluating MDE:")
    test_err = []
    for batch in loader:
        X = batch["image"].float().to("mps")
        y = batch["depth"].float().to("mps")
        with torch.no_grad():
            X = X.permute(0, 3, 1, 2)
            prediction = model(X)
            err = F.mse_loss(
                prediction, y
            )  # when looking at error for evaluation use MSE loss
            test_err.append(err)

        # process optimizer
        pbar.set_postfix_str(f"mse_loss: {err:.2f}")
        pbar.update(1)

    print(f"Average MSE Loss: {sum(test_err)/len(test_err)}")
    return {"mse_avg": sum(test_err) / len(test_err)}


In [4]:
def plot_test_frames(model: nn.Module, dataset: Dataset, indices: List[int]) -> None:
    """Generate a plot of depth images at specific indices"""
    for i in indices:
        datapoint = dataset[i]
        X = datapoint["image"]
        y = datapoint["depth"]
        mask = datapoint["mask"]

        fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(10, 8))
        ax1.imshow(X)
        ax1.set_title("Image")
        ax2.imshow(y)
        ax2.set_title("Truth depth")

        X = torch.Tensor(X).to("mps").unsqueeze(0).permute(0, 3, 1, 2)
        with torch.no_grad():
            prediction = model(X).permute(1, 2, 0).cpu().numpy()
        ax3.imshow(prediction, cmap="viridis")
        # ax3.title("Predicted depth")
        plt.show()

#### Experiments

Each experiment will use a separate model so that the results can be compared directly in later sections.

In [5]:
simple_model = (
    DPTDepthModel(
        scale=0.000305,
        shift=0.1378,
        invert=True,
        backbone="vitb_rn50_384",
        non_negative=True,
        enable_attention_hooks=False,
    )
    .float()
    .to("mps")
)
optim = Adam(simple_model.named_parameters(), lr=1e-6)

# standard training - no regularization at all
train_simple(
    model=simple_model,
    loader=nyu_train_dataloader,
    optim=optim,
    epochs=1,
)
simple_res = eval(simple_model, nyu_test_dataloader)

  model = create_fn(


Training MDE::   0%|          | 0/67 [00:00<?, ?it/s]

composite_loss requires_grad: True
composite_loss requires_grad: True
composite_loss requires_grad: True
composite_loss requires_grad: True
composite_loss requires_grad: True
composite_loss requires_grad: True


KeyboardInterrupt: 

In [None]:
plot_test_frames(simple_model, nyu_test_ds, indices=[5])

In [None]:
cutmix_model = DPTDepthModel().float().to("mps")
optim = Adam(cutmix_model.parameters(), lr=0.01)

# training of cutmix model
train_with_cutmix(
    model=cutmix_model, loader=nyu_train_dataloader, optim=optim, epochs=1
)
cutmix_res = eval(cutmix_model, nyu_test_dataloader)

In [None]:
# train model with LMR regularization
lmr_model = DPTDepthModel().float().to("mps")
optim = Adam(cutmix_model.parameters(), lr=0.01)

# training of cutmix model
train_with_lmr(model=lmr_model, loader=nyu_train_dataloader, optim=optim, epochs=1)
lmr_res = eval(lmr_model, nyu_test_dataloader)