In [1]:
import argparse
import os

In [2]:
def get_args_parser():
    parser = argparse.ArgumentParser(
        'EfficientFormer training and evaluation script', add_help=False)
    parser.add_argument('--batch-size', default=8, type=int)
    parser.add_argument('--epochs', default=50, type=int)

    # Model parameters
    parser.add_argument('--model', default='S_EleViTDecoder', type=str, metavar='MODEL',
                        help='Name of model to train')
    parser.add_argument('--input-size', default=[256, 512],
                        type=int, help='images input size')


    # Optimizer parameters
    parser.add_argument('--opt', default='AdamW', type=str, metavar='OPTIMIZER',
                        help='Optimizer (default: "adamw"')
    
    # Learning rate schedule parameters
    parser.add_argument('--sched', default='CosineAnnealingLR', type=str, metavar='SCHEDULER',
                        help='LR scheduler (default: "cosine"')
    parser.add_argument('--lr', type=float, default=1e-3, metavar='LR',
                        help='learning rate (default: 1e-3)')


    # Loss parameters
    parser.add_argument('--loss', default='DepthL1Loss', choices=['DepthL1Loss',  'FullLoss'],
                        type=str, help='Image Net dataset path')
    # semantic Loss parameters
    parser.add_argument('--se_loss', default='CrossEntropyLoss', choices=['CrossEntropyLoss',  'FullLoss'],
                        type=str, help='Image Net dataset path')
    # Dataset parameters
    parser.add_argument('--validation_split', default=0.15,
                        type=float, help='images input size')
    parser.add_argument('--metric_scale', default=512.0,
                        type=float, help='images input size')
    parser.add_argument('--dataset', default='s3d', choices=['s3d', 'ade20k'],
                        type=str, help='Image Net dataset path')
    parser.add_argument('--folders', default=['Data/full'], type = int, 
                        nargs="*",help='Image Net dataset path')
    
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start_epoch', default=0, type=int, metavar='N',
                        help='start epoch')
    parser.add_argument('--output_dir', default='check_points',
                        help='path where to save, empty for no saving')
    parser.add_argument('--eval', default='',
                        help='path from where to load')
    parser.add_argument('--device', default='cuda',
                        help='device to use for training / testing')
    parser.add_argument('--seed', default=2023, type=int)
    parser.add_argument('--dist-eval', action='store_true',
                        default=False, help='Enabling distributed evaluation')
    parser.add_argument('--num_workers', default=10, type=int)
    parser.add_argument('--option', default='all', type=str, choices=['all',  'semantic', 'depth', 'alb_shading'])
    parser.add_argument('--outsize', default=1, type=int)
    parser.add_argument('--pin-mem', action='store_true',
                        help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.')
    parser.add_argument('--no-pin-mem', action='store_false', dest='pin_mem',
                        help='')
    parser.set_defaults(pin_mem=True)
    
    # distributed training parameters
    parser.add_argument('--world_size', default=1, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist_url', default='env://',
                        help='url used to set up distributed training')
    
    return parser
parser = argparse.ArgumentParser(parents=[get_args_parser()])

In [3]:
args = get_args_parser()
args.input_size = [256, 512]
args.outsize = 1
args.dataset = 's3d'
args.folders = ["../Data/full"]
args.validation_split = 0.1
args.option = "all"

In [13]:
from datasets import build_dataset
import numpy as np
train_dataset, test_dataset = build_dataset(args)

In [14]:
from torch.utils.data import DataLoader

In [15]:
train_loader = DataLoader(train_dataset, batch_size=4)

In [16]:
import numpy as np
from skimage import filters
def reconstruct(d_batch):
    """
    Compute equirectangular normal map from the equirectangular depth map for a batch of depth maps.

    Parameters:
    d_batch (numpy.ndarray): Batch of depth maps (equirectangular projection) in millimeters.
                            Shape: (batch_size, height, width)

    Returns:
    n_hat_batch (numpy.ndarray): Batch of equirectangular normal maps.
                                Shape: (batch_size, height, width, 3)
    """
    batch_size, _, height, width = d_batch.shape

    # Compute gradients of the depth maps using Sobel filters
    d_theta = np.array([filters.scharr_h(d.squeeze(0).numpy()) for d in d_batch])
    d_phi = np.array([filters.scharr_v(d.squeeze(0).numpy()) for d in d_batch])

    # Constants for converting pixel indices to spherical angles
    k_u = np.pi / height
    k_v = 2.0 * np.pi / width

    # Create meshgrids for theta and phi
    j = np.arange(height) + 0.5
    i = np.arange(width) + 0.5
    theta, phi = np.meshgrid(k_u * j, k_v * i - np.pi, indexing='ij')

    # Compute vectors in spherical coordinates
    r = np.stack((np.sin(theta) * np.cos(phi),
                  np.cos(theta),
                  -np.sin(theta) * np.sin(phi)), axis=-1)
    r_theta = np.stack((np.cos(theta) * np.cos(phi),
                        -np.sin(theta),
                        -np.cos(theta) * np.sin(phi)), axis=-1)
    r_phi = np.stack((-np.sin(phi),
                      np.zeros_like(phi),
                      -np.cos(phi)), axis=-1)

    # Reshape perturbation vectors
    p_t = np.concatenate([(d_theta[..., np.newaxis] * r)[..., np.newaxis],
                          (k_u * d_batch[..., np.newaxis] * r_theta)[..., np.newaxis]], axis=-1)
    
    p_p = np.concatenate([(d_phi[..., np.newaxis] * r)[..., np.newaxis],
                          (k_v * d_batch[..., np.newaxis] * r_phi)[..., np.newaxis]], axis=-1)

    # Compute cross product of perturbation vectors to get normals
    n_hat = np.cross(p_p, p_t)

    # Normalize the normals
    n_hat_norm = np.linalg.norm(n_hat, axis=-1, keepdims=True)
    n_hat_norm = np.where(n_hat_norm > 0, n_hat_norm, 1)
    n_hat = n_hat / n_hat_norm

    return n_hat

In [17]:
data = next(iter(train_loader))

In [18]:
out = reconstruct(data['depth'])

NameError: name 'filters' is not defined

In [None]:
data['depth'].shape