In [1]:
import os

while "src" not in os.listdir():
    assert "/" != os.getcwd(), "src directory not found"
    os.chdir("..")

import sys
sys.path.append('simclr-pytorch')

import numpy as np
import matplotlib.pyplot as plt

from collections import defaultdict

import torch
from torch.utils.data import DataLoader, random_split

from src.lib.nyu_dataset import NYUDataset, transform
from src.lib.util import convert_depth_to_m

In [2]:
def calc_loss(loader, label, loss_fn):
    for name in models_map:
        model = models_map[name]["model"].to(device)
        model.eval()
        with torch.no_grad():
            # calculate validation loss
            loss = 0
            for batch in loader:
                x, y = batch
                x, y = x.to(device), y.to(device)
                y_pred = model(x)
                loss += loss_fn(y_pred, y).item()
            loss /= len(loader)
            del x, y, y_pred
        models_map[name][label] = loss
    

def get_absolute_errors(loader, label):
    for name in models_map:
        model = models_map[name]["model"].to(device)
        model.eval()
        with torch.no_grad():
            errors = []
            for batch in loader:
                x, y = batch
                x, y = x.to(device), y.to(device)
                y_pred = model(x)
                abs_err = torch.abs(y_pred - y).to("cpu").numpy()
                # flatten
                abs_err = abs_err.reshape(-1)
                errors.append(abs_err)
            errors = np.concatenate(errors)
            del x, y, y_pred
        models_map[name][label] = errors




In [3]:
DATA_DIR = "data"
DATASET_FILE = "nyu_depth_v2_labeled.mat"

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
dataset = NYUDataset(os.path.join(DATA_DIR, DATASET_FILE), transform=transform)

n_train, n_val = int(0.8 * len(dataset)), int(0.1 * len(dataset))
n_test = len(dataset) - n_train - n_val

train_set, val_set, test_set = random_split(dataset, [n_train, n_val, n_test], generator=torch.Generator().manual_seed(0))

train_loader = DataLoader(train_set, batch_size=64, shuffle=True, num_workers=8)
val_loader = DataLoader(val_set, batch_size=64, shuffle=True, num_workers=8)
test_loader = DataLoader(test_set, batch_size=64, shuffle=True, num_workers=8)

In [5]:
experiments_dir = "experiments"
experiment_names = os.listdir(experiments_dir)

models_map = {}
for name in experiment_names:
    # base_name = name[:-2]
    model_path = os.path.join(experiments_dir, name, "model.pth")
    models_map[name] = {"model": torch.load(model_path).to("cpu")}
    

In [6]:
def get_stats(models_map, loader, label):
    """ modifies models_map
        models_map[name][label_errors] = array of all errors for all pixels
        models_map[name][label_loss] = MSE in meters over all data samples

    Args:
        loader (_type_): _description_
        label (_type_): _description_
    """
    stats = defaultdict(dict)
    for name in models_map:
        model = models_map[name]["model"].to(device)
        model.eval()
        with torch.no_grad():
            errors = []
            rel = []
            loss = 0
            for batch in loader:
                x, y = batch
                x, y = x.to(device), y.to(device)
                y_pred = model(x)
                y_pred = convert_depth_to_m(y_pred)
                y = convert_depth_to_m(y)

                loss += torch.nn.functional.mse_loss(y_pred, y).item()

                y_pred = y_pred.to("cpu").numpy()
                y = y.to("cpu").numpy()

                ratio = y / y_pred
                ratio = ratio.reshape(-1)
                errors.append(np.maximum(ratio, 1 / ratio))

                rel_error = np.abs(y_pred - y) / y
                rel_error = rel_error.reshape(-1)
                rel.append(rel_error)


                
            errors = np.concatenate(errors)
            rel = np.concatenate(rel)
            del x, y, y_pred
        stats[name][label + "_errors"] = errors
        stats[name][label + "_rel"] = rel
        stats[name][label + "_loss"] = loss / len(loader)
    return stats

stats = get_stats(models_map, test_loader, "test")

In [7]:

thresholds = [1.25, 1.25**2, 1.25**3]

mean_stats = defaultdict(lambda : defaultdict(list))
for name in stats:
    base_name = " ".join(name.split("_")[:2])
    mean_stats[base_name]["test_loss"].append(stats[name]["test_loss"])
    mean_stats[base_name]["test_rel"].append(np.mean(stats[name]["test_rel"]))

    errors = stats[name]["test_errors"]
    for threshold in thresholds:
        mean_stats[base_name][f"test_acc_{threshold}"].append(np.mean(errors < threshold))
    
    

for name in mean_stats:
    mean_stats[name]["test_errors"] = np.mean(mean_stats[name]["test_errors"])
    mean_stats[name]["test_rel"] = np.mean(mean_stats[name]["test_rel"])
    mean_stats[name]["test_loss"] = np.mean(mean_stats[name]["test_loss"])
    for threshold in thresholds:
        mean_stats[name][f"test_acc_{threshold}"] = np.mean(mean_stats[name][f"test_acc_{threshold}"])


for name in sorted(mean_stats):
    print(f"test_loss\t{name}:\t{mean_stats[name]['test_loss']:.4f}")
    print(f"test_rel\t{name}:\t{mean_stats[name]['test_rel']:.4f}")
    for threshold in thresholds:
        key = f"test_acc_{threshold}"
        print(f"{key}\t{name}:\t{mean_stats[name][key]:.4f}")

test_loss	classification finetuning:	0.4121
test_rel	classification finetuning:	0.1775
test_acc_1.25	classification finetuning:	0.7442
test_acc_1.5625	classification finetuning:	0.9482
test_acc_1.953125	classification finetuning:	0.9886
test_loss	classification probing:	0.8462
test_rel	classification probing:	0.2453
test_acc_1.25	classification probing:	0.5536
test_acc_1.5625	classification probing:	0.8546
test_acc_1.953125	classification probing:	0.9593
test_loss	classification switch:	0.4599
test_rel	classification switch:	0.1832
test_acc_1.25	classification switch:	0.7135
test_acc_1.5625	classification switch:	0.9406
test_acc_1.953125	classification switch:	0.9868
test_loss	contrastive finetuning:	0.4786
test_rel	contrastive finetuning:	0.1895
test_acc_1.25	contrastive finetuning:	0.7037
test_acc_1.5625	contrastive finetuning:	0.9346
test_acc_1.953125	contrastive finetuning:	0.9866
test_loss	contrastive probing:	0.8140
test_rel	contrastive probing:	0.2655
test_acc_1.25	contrastive p

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [8]:
import pandas as pd 
df = pd.DataFrame(mean_stats).T
# drop col test_errors
df = df.drop("test_errors", axis=1)
df.columns = ["loss", "rel", "delta < 1.25", "delta < 1.25^2", "delta < 1.25^3"]
# sort by rel desc
df = df.sort_values("rel", ascending=False)
df.to_csv("results/metrics.csv")
# print in latex with 3 decimal places
print(df.to_latex(index=True, float_format="%.3f"))

\begin{tabular}{lrrrrr}
\toprule
{} &  loss &   rel &  delta < 1.25 &  delta < 1.25\textasciicircum 2 &  delta < 1.25\textasciicircum 3 \\
\midrule
contrastive probing       & 0.814 & 0.266 &         0.566 &           0.856 &           0.954 \\
classification probing    & 0.846 & 0.245 &         0.554 &           0.855 &           0.959 \\
contrastive switch        & 0.570 & 0.210 &         0.663 &           0.914 &           0.979 \\
contrastive finetuning    & 0.479 & 0.190 &         0.704 &           0.935 &           0.987 \\
classification switch     & 0.460 & 0.183 &         0.713 &           0.941 &           0.987 \\
classification finetuning & 0.412 & 0.178 &         0.744 &           0.948 &           0.989 \\
\bottomrule
\end{tabular}



  print(df.to_latex(index=True, float_format="%.3f"))
