In [59]:
cd ..

/home/av/gitprojects


In [60]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [33]:
import argparse
import copy
import os
import os.path as osp
import time

import mmcv
import torch
from mmcv.runner import init_dist
from mmcv.utils import Config, DictAction, get_git_hash

from mmseg import __version__
from mmseg.apis import set_random_seed, train_segmentor
from mmseg.datasets import build_dataset
from mmseg.models import build_segmentor
from mmseg.utils import collect_env, get_root_logger


In [34]:
def parse_args():
    parser = argparse.ArgumentParser(description='Train a segmentor')
    parser.add_argument('config', help='train config file path')
    parser.add_argument('--work-dir', help='the dir to save logs and models')
    parser.add_argument(
        '--load-from', help='the checkpoint file to load weights from')
    parser.add_argument(
        '--resume-from', help='the checkpoint file to resume from')
    parser.add_argument(
        '--no-validate',
        action='store_true',
        help='whether not to evaluate the checkpoint during training')
    group_gpus = parser.add_mutually_exclusive_group()
    group_gpus.add_argument(
        '--gpus',
        type=int,
        help='number of gpus to use '
        '(only applicable to non-distributed training)')
    group_gpus.add_argument(
        '--gpu-ids',
        type=int,
        nargs='+',
        help='ids of gpus to use '
        '(only applicable to non-distributed training)')
    parser.add_argument('--seed', type=int, default=None, help='random seed')
    parser.add_argument(
        '--deterministic',
        action='store_true',
        help='whether to set deterministic options for CUDNN backend.')
    parser.add_argument(
        '--options', nargs='+', action=DictAction, help='custom options')
    parser.add_argument(
        '--launcher',
        choices=['none', 'pytorch', 'slurm', 'mpi'],
        default='none',
        help='job launcher')
    parser.add_argument('--local_rank', type=int, default=0)
    return parser

parser = parse_args()
args = parser.parse_known_args()[0]

In [35]:
cfg = Config.fromfile('configs/depth_estimation/swin_depth_v1.py')# args

In [36]:
if args.options is not None:
    cfg.merge_from_dict(args.options)
# set cudnn_benchmark
if cfg.get('cudnn_benchmark', False):
    torch.backends.cudnn.benchmark = True

# work_dir is determined in this priority: CLI > segment in file > filename
if args.work_dir is not None:
    # update configs according to CLI args if args.work_dir is not None
    cfg.work_dir = args.work_dir
elif cfg.get('work_dir', None) is None:
    # use config filename as default work_dir if cfg.work_dir is None
    cfg.work_dir = osp.join('./work_dirs',
                            osp.splitext(osp.basename(args.config))[0])
if args.load_from is not None:
    cfg.load_from = args.load_from
if args.resume_from is not None:
    cfg.resume_from = args.resume_from
if args.gpu_ids is not None:
    cfg.gpu_ids = args.gpu_ids
else:
    cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)

# init distributed env first, since logger depends on the dist info.
if args.launcher == 'none':
    distributed = False
else:
    distributed = True
    init_dist(args.launcher, **cfg.dist_params)

In [37]:
# create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
# dump config
cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))
# init the logger before other steps
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)

# init the meta dict to record some important information such as
# environment info and seed, which will be logged
meta = dict()
# log env info
env_info_dict = collect_env()
env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()])
dash_line = '-' * 60 + '\n'
logger.info('Environment info:\n' + dash_line + env_info + '\n' +
            dash_line)
meta['env_info'] = env_info

# log some basic info
# logger.info(f'Distributed training: {distributed}')
# logger.info(f'Config:\n{cfg.pretty_text}')

# set random seeds
if args.seed is not None:
    logger.info(f'Set random seed to {args.seed}, deterministic: '
                f'{args.deterministic}')
    set_random_seed(args.seed, deterministic=args.deterministic)
cfg.seed = args.seed
meta['seed'] = args.seed
meta['exp_name'] = osp.basename(args.config)

model = build_segmentor(
    cfg.model,
    train_cfg=cfg.get('train_cfg'),
    test_cfg=cfg.get('test_cfg'))

2021-09-27 15:11:41,712 - mmseg - INFO - Environment info:
------------------------------------------------------------
sys.platform: linux
Python: 3.8.11 (default, Aug  3 2021, 15:09:35) [GCC 7.5.0]
CUDA available: True
GPU 0,1,2,3,4,5,6,7: Tesla V100-SXM2-32GB
CUDA_HOME: /usr/local/cuda
NVCC: Cuda compilation tools, release 10.1, V10.1.243
GCC: gcc (Ubuntu 5.4.0-6ubuntu1~16.04.12) 5.4.0 20160609
PyTorch: 1.9.1
PyTorch compiling details: PyTorch built with:
  - GCC 7.3
  - C++ Version: 201402
  - Intel(R) oneAPI Math Kernel Library Version 2021.3-Product Build 20210617 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.1.2 (Git Hash 98be7e8afa711dc9b66c8ff3504129cb82013cdb)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 10.2
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=c

# Define dataset

In [38]:
from mmseg.datasets.custom import *

@DATASETS.register_module(force=True)
class WaymoDepthDataset(CustomDataset):
    def __init__(self, *args,  depth_dir=None, depth_map_suffix='.npy',**kwargs):
        self.depth_dir = depth_dir
        self.depth_map_suffix = depth_map_suffix
        super(WaymoDepthDataset, self).__init__(*args,**kwargs)
    
    
    def pre_pipeline(self, results):
        super().pre_pipeline(results)
        results['depth_prefix'] = self.depth_dir
        results['flip'] = False
        results['flip_direction'] = None
        
    def load_annotations(self, img_dir, img_suffix, ann_dir, seg_map_suffix,
                         split):
        """Load annotation from directory.

        Args:
            img_dir (str): Path to image directory
            img_suffix (str): Suffix of images.
            ann_dir (str|None): Path to annotation directory.
            seg_map_suffix (str|None): Suffix of segmentation maps.
            split (str|None): Split txt file. If split is specified, only file
                with suffix in the splits will be loaded. Otherwise, all images
                in img_dir/ann_dir will be loaded. Default: None

        Returns:
            list[dict]: All image info of dataset.
        """
        depth_dir = self.depth_dir
        depth_map_suffix = self.depth_map_suffix
        img_infos = []
        if split is not None:
            with open(split) as f:
                for line in f:
                    img_name = line.strip()
                    img_info = dict(filename=img_name + img_suffix)
                    if ann_dir is not None:
                        seg_map = img_name + seg_map_suffix
                        img_info['ann'] = dict(seg_map=seg_map)
                    img_infos.append(img_info)
        else:
            for img in mmcv.scandir(img_dir, img_suffix, recursive=True):
                img_info = dict(filename=img)
                if ann_dir is not None:
                    seg_map = img.replace(img_suffix, seg_map_suffix)
                    img_info['ann'] = dict(seg_map=seg_map)
                if depth_dir is not None:
                    depth_map = img.replace(img_suffix, depth_map_suffix)
                    img_info['ann'] = dict(depth_map=depth_map)
                img_infos.append(img_info)
        print_log(f'Loaded {len(img_infos)} images', logger=get_root_logger())
        return img_infos

## Load Depth anno

In [39]:
from mmseg.datasets.pipelines.loading import LoadAnnotations, PIPELINES

@PIPELINES.register_module(force=True)
class LoadDepthAnnotations(LoadAnnotations):
    def __call__(self, results):
        """Call function to load multiple types annotations.

            # constuct depth map from npy format
        """
        print(results.keys())
        if self.file_client is None:
            self.file_client = mmcv.FileClient(**self.file_client_args)
        
        if results.get('depth_prefix', None) is not None:
            filename = osp.join(results['depth_prefix'],
                                results['ann_info']['depth_map'])
        else:
            filename = results['ann_info']['depth_map']

        height, width = results['ori_shape'][:2]

        data = np.load(filename)
        in_side_img = (data[:,:2]<np.array([[width, height]])).sum(1)==2

        depth = np.zeros([height, width], np.float32)
        mask = np.zeros([height, width], bool)
        
        x = data[:,0].astype(int)[in_side_img]
        y = data[:,1].astype(int)[in_side_img]
        v = data[:,2][in_side_img]

        depth[y, x] = v
        mask[y,x] = True
        
        results['gt_depth'] = depth
        results['depth_mask'] = (y,x)
#         results['seg_fields'].append('gt_semantic_seg')
        return results


# Build DS

In [40]:
train_cfg = {'type': 'WaymoDepthDataset',
 'data_root': 'data/waymo-depth/',
 'img_dir': 'val/image',
 'depth_dir': 'data/waymo-depth/val/depth',
 'pipeline': [{'type': 'LoadImageFromFile'},
  {'type': 'LoadDepthAnnotations', 'reduce_zero_label': True},
#   {'type': 'Resize', 'img_scale': (2048, 512), 'ratio_range': (0.5, 2.0)},
#   {'type': 'RandomCrop', 'crop_size': (512, 512), 'cat_max_ratio': 0.75},
#   {'type': 'RandomFlip', 'prob': 0.5},
  {'type': 'PhotoMetricDistortion'},
  {'type': 'Normalize',
   'mean': [123.675, 116.28, 103.53],
   'std': [58.395, 57.12, 57.375],
   'to_rgb': True},
#   {'type': 'Pad', 'size': (512, 512), 'pad_val': 0, 'seg_pad_val': 255},
  {'type': 'DefaultFormatBundle'},
  {'type': 'Collect', 'keys': ['img', 'gt_depth', 'depth_mask']}]}

In [41]:
train_ds = build_dataset(train_cfg)

2021-09-27 15:11:46,355 - mmseg - INFO - Loaded 39791 images


In [42]:
item = train_ds[0]

LoadImageFromFile(to_float32=False,color_type='color',imdecode_backend='cv2')
dict_keys(['img_info', 'ann_info', 'seg_fields', 'img_prefix', 'seg_prefix', 'depth_prefix', 'flip', 'flip_direction', 'filename', 'ori_filename', 'img', 'img_shape', 'ori_shape', 'pad_shape', 'scale_factor', 'img_norm_cfg'])
LoadDepthAnnotations(reduce_zero_label=True,imdecode_backend='pillow')
PhotoMetricDistortion(brightness_delta=32, contrast_range=(0.5, 1.5), saturation_range=(0.5, 1.5), hue_delta=18)
Normalize(mean=[123.675 116.28  103.53 ], std=[58.395 57.12  57.375], to_rgb=True)
DefaultFormatBundle
Collect(keys=['img', 'gt_depth', 'depth_mask'], meta_keys=('filename', 'ori_filename', 'ori_shape', 'img_shape', 'pad_shape', 'scale_factor', 'flip', 'flip_direction', 'img_norm_cfg'))


In [43]:
# item['depth_mask']

# Model 

In [44]:
cfg.model

{'type': 'EncoderDecoder',
 'pretrained': None,
 'backbone': {'type': 'SwinTransformer',
  'embed_dim': 96,
  'depths': [2, 2, 6, 2],
  'num_heads': [3, 6, 12, 24],
  'window_size': 7,
  'mlp_ratio': 4.0,
  'qkv_bias': True,
  'qk_scale': None,
  'drop_rate': 0.0,
  'attn_drop_rate': 0.0,
  'drop_path_rate': 0.3,
  'ape': False,
  'patch_norm': True,
  'out_indices': (0, 1, 2, 3),
  'use_checkpoint': False},
 'decode_head': {'type': 'UPerHead',
  'in_channels': [96, 192, 384, 768],
  'in_index': [0, 1, 2, 3],
  'pool_scales': (1, 2, 3, 6),
  'channels': 512,
  'dropout_ratio': 0.1,
  'num_classes': 1,
  'norm_cfg': {'type': 'SyncBN', 'requires_grad': True},
  'align_corners': False,
  'loss_decode': {'type': 'CrossEntropyLoss',
   'use_sigmoid': False,
   'loss_weight': 1.0}},
 'auxiliary_head': None,
 'train_cfg': {},
 'test_cfg': {'mode': 'whole'}}

In [56]:
model = build_segmentor(
{'type': 'EncoderDecoder',
 'pretrained': None,
 'backbone': {'type': 'SwinTransformer',
  'embed_dim': 96,
  'depths': [2, 2, 6, 2],
  'num_heads': [3, 6, 12, 24],
  'window_size': 7,
  'mlp_ratio': 4.0,
  'qkv_bias': True,
  'qk_scale': None,
  'drop_rate': 0.0,
  'attn_drop_rate': 0.0,
  'drop_path_rate': 0.3,
  'ape': False,
  'patch_norm': True,
  'out_indices': (0, 1, 2, 3),
  'use_checkpoint': False},
 'decode_head': {'type': 'UPerHead',
  'in_channels': [96, 192, 384, 768],
  'in_index': [0, 1, 2, 3],
  'pool_scales': (1, 2, 3, 6),
  'channels': 512,
  'dropout_ratio': 0.1,
  'num_classes': 1,
  'norm_cfg': {'type': 'BN', 'requires_grad': True},
  'align_corners': False,
  'loss_decode': {'type': 'CrossEntropyLoss',
   'use_sigmoid': False,
   'loss_weight': 1.0}},
 'auxiliary_head': None,
 'train_cfg': {},
 'test_cfg': {'mode': 'whole'}}
).cuda()

In [57]:
with torch.no_grad():
    dummy_inputs = torch.randn(2,3,1280,1920).cuda()
    out = model.forward_dummy(dummy_inputs)

In [62]:
# out.shape

In [52]:
# type(model)

mmseg.models.segmentors.encoder_decoder.EncoderDecoder

In [61]:
# model(dummy_inputs, {})

TypeError: forward_train() missing 1 required positional argument: 'gt_semantic_seg'