In [20]:
import numpy as np
import os
import pandas as pd
import time
import torch
import torchvision.transforms as TT
from torch.utils.data import DataLoader

In [2]:
param = {
    "seed": 4242,
    "img_res": (3, 256, 256),
    "depth_img_res": (1, 64, 64),
    "n_workers": 2,
    
    "batch_size": 64,
    "batch_size_eval": 1,
    "lr": 1e-3,
    "lr_patience": 15,
    "e_stop_epochs": 30,
    "epochs": 120,
}

dataset_root = './data/NYUv2/'

# Utils

In [3]:
def hardware_check(gpu=False):
    if gpu:
        if not torch.cuda.is_available():
            raise "GPU not present"
        return "cuda:0"
    return "cpu"

# Dataset

In [4]:
class NYU2_Dataset:
    """
      * Indoor img (480, 640, 3) depth (480, 640, 1) both in png -> range between 0.5 to 10 meters
      * 654 Test and 50688 Train images
    """

    def __init__(self, path, dts_type, aug, rgb_h_res, d_h_res, dts_size=0, scenarios='indoor'):
        self.dataset = path
        self.x = []
        self.y = []
        self.info = 0
        self.dts_type = dts_type
        self.aug = aug
        self.rgb_h_res = rgb_h_res
        self.d_h_res = d_h_res
        self.scenarios = scenarios

        # Handle dataset
        if self.dts_type == 'test':
            img_path = self.dataset + self.dts_type + '/eigen_test_rgb.npy' # '/content/drive/MyDerive/....FOLDER X .../test/carica_file_test.npy
            depth_path = self.dataset + self.dts_type + '/eigen_test_depth.npy'

            rgb = np.load(img_path)
            depth = np.load(depth_path)

            self.x = rgb
            self.y = depth

            if dts_size != 0:
                self.x = rgb[:dts_size]
                self.y = depth[:dts_size]

            self.info = len(self.x)

        elif self.dts_type == 'train':
            scenarios = os.listdir(self.dataset + self.dts_type + '/')
            for scene in scenarios:
                elem = os.listdir(self.dataset + self.dts_type + '/' + scene)
                for el in elem:
                    if 'jpg' in el:
                        self.x.append(self.dts_type + '/' + scene + '/' + el)
                    elif 'png' in el:
                        self.y.append(self.dts_type + '/' + scene + '/' + el)
                    else:
                        raise SystemError('Type image error (train)')

            if len(self.x) != len(self.y):
                raise SystemError('Problem with Img and Gt, no same train_size')

            self.x.sort()
            self.y.sort()

            if dts_size != 0:
                self.x = self.x[:dts_size]
                self.y = self.y[:dts_size]

            self.info = len(self.x)

        else:
            raise SystemError('Problem in the path')

    def __len__(self):
        return self.info

    def __getitem__(self, index=None, print_info_aug=False):
        if index is None:
            index = np.random.randint(0, self.info)

        # Load Image
        if self.dts_type == 'test':
            img = self.x[index]
        else:
            img_name = self.dataset + self.x[index]
            try:
                raw_img = Image.open(img_name)
                img = np.array(raw_img.convert('RGB'))
                raw_img.close()
            except:
                exit(f"Failed opening {img_name}")

        # Load Depth Image
        if self.dts_type == 'test':
            depth = np.expand_dims(self.y[index] * 100, axis=-1)
        else:
            depth = Image.open(self.dataset + self.y[index])
            depth = np.array(depth) / 255
            depth = np.clip(depth * 1000, 50, 1000)
            depth = np.expand_dims(depth, axis=-1)

        # Augmentation
        if self.aug:
            img, depth = augmentation2D(img, depth, print_info_aug)

        img_post_processing = TT.Compose([
            TT.ToTensor(),
            TT.Resize((param['img_res'][1], param['img_res'][2]), antialias=True),
            TT.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Imagenet
        ])
        depth_post_processing = TT.Compose([
            TT.ToTensor(),
            TT.Resize((param['depth_img_res'][1], param['depth_img_res'][2]), antialias=True),
        ])

        img = img_post_processing(img/255)
        depth = depth_post_processing(depth)

        return img.float(), depth.float()

In [5]:
def init_train_test_loader(dts_root_path, rgb_h_res, d_h_res, bs_train, bs_eval, num_workers, size_train=0, size_test=0):
    # Load Datasets
    test_Dataset = NYU2_Dataset(
        path=dts_root_path, dts_type='test', aug=False, rgb_h_res=rgb_h_res, d_h_res=d_h_res, dts_size=size_test
    )
    training_Dataset = NYU2_Dataset(
        path=dts_root_path, dts_type='train', aug=True, rgb_h_res=rgb_h_res, d_h_res=d_h_res, dts_size=size_train
    )
    # Create Dataloaders
    training_DataLoader = DataLoader(
        training_Dataset, batch_size=bs_train, shuffle=True, pin_memory=True, num_workers=num_workers
    )
    test_DataLoader = DataLoader(
        test_Dataset, batch_size=bs_eval, shuffle=False, num_workers=num_workers, pin_memory=True
    )

    return training_DataLoader, test_DataLoader, training_Dataset, test_Dataset

# Evaluation

In [22]:
def compute_evaluation(test_dataloader, model, name, gpu=False):
    times = np.zeros(len(test_dataloader))

    model.eval()

    for i, (inputs, depths) in enumerate(test_dataloader):
        if gpu:
            inputs, depths = inputs.cuda(), depths.cuda()
        else:
            inputs, depths = inputs.cpu(), depths.cpu()

        start = time.time()
        with torch.no_grad():
            predictions = model(inputs)
        end = time.time()

        total = end - start
        times[i] = total

    average = np.average(times)
    median = np.percentile(times, 50)
    p90 = np.percentile(times, 90)

    return [name, gpu, average, median, p90]

In [18]:
_, test_dataloader, __, ___ = init_train_test_loader(
    dts_root_path=dataset_root,
    rgb_h_res=param['img_res'][1],
    d_h_res=param['depth_img_res'][1],
    bs_train=param['batch_size'],
    bs_eval=param['batch_size_eval'],
    num_workers=param['n_workers'],
)

print(f"{len(test_dataloader)} samples")

654 samples


In [36]:
results = pd.DataFrame(columns=['name', 'gpu', 'average', 'median', 'p90'])

index = 0
def evaluate_model(build_model, gpu, test_name, device, arch_type=None):
    global index
    
    if arch_type:
        model = build_model(device=device, arch_type='s').to(device=device)
    else:
        model = build_model(device=device).to(device=device)
    
    evaluation = compute_evaluation(test_dataloader, model, test_name, gpu)
    results.loc[index] = evaluation
    index += 1

for gpu in [True, False]:
    device = hardware_check(gpu)

    # Training notebooks have been exported into Python scripts to make the import works
    from original import build_model
    evaluate_model(build_model, gpu, "METER original", device, 's')

    from efficient_vit import build_model
    evaluate_model(build_model, gpu, "EfficientVit", device)

    from mob_eff import build_model
    evaluate_model(build_model, gpu, "Mobile + EfficientVitBlock", device)

    from meta_meter import build_model
    evaluate_model(build_model, gpu, "Meta METER", device)

    from meta_eff import build_model
    evaluate_model(build_model, gpu, "Meta EfficientVit", device)

results.to_csv("results_time.csv", index=False)