In [1]:
import os
print('current directory', os.getcwd())
abspath = os.path.abspath('/project/CollabRoboGroup/mi8uu/repo/MAME/src')
dir_name = os.path.dirname(abspath)
os.chdir(dir_name)
print(f'cwd change to: {os.getcwd()}')

from argparse import ArgumentParser
from datetime import datetime
import torch
import wandb
from pytorch_lightning import loggers, Trainer, seed_everything
from sklearn.model_selection import LeaveOneOut

from src.models.mog_model_trainer import *
from src.models.base_model_trainer import *
from src.datasets.data_module import *
from src.utils.model_saving import *
from src.utils.debug_utils import *
from src.utils.log import TextLogger
from src.configs import config

from collections import defaultdict
import numpy as np
import torch
import json
import random

current directory /home/mi8uu
cwd change to: /project/CollabRoboGroup/mi8uu/repo/MAME


In [2]:
parser = ArgumentParser()

parser.add_argument("-compute_mode", "--compute_mode", help="compute_mode",
                    default='gpu')
parser.add_argument("--fast_dev_run", help="fast_dev_run",
                    action="store_true", default=False)    
parser.add_argument("-num_nodes", "--num_nodes", help="num_nodes",
                    type=int, default=1)
parser.add_argument("--strategy", help="strategy",
                    default='ddp_spawn')
parser.add_argument("--gpus", help="number of gpus or gpus list",
                    default="-1")
parser.add_argument("--float_precision", help="float precision",
                    type=int, default=32)
parser.add_argument("--dataset_name", help="dataset_name",
                    default=None)
parser.add_argument("--dataset_filename", help="dataset_name",
                    default='train.csv')
parser.add_argument("-ws", "--window_size", help="windows size",
                    type=int, default=5)
parser.add_argument("-wst", "--window_stride", help="windows stride",
                    type=int, default=5)
parser.add_argument("-bs", "--batch_size", help="batch size",
                    type=int, default=2)
parser.add_argument("-nw", "--num_workers", help="num_workers",
                    type=int, default=2)
parser.add_argument("-ep", "--epochs", help="epoch per validation cycle",
                    type=int, default=200)
parser.add_argument("-lr", "--learning_rate", help="learning rate",
                    type=float, default=3e-4)
parser.add_argument("-sml", "--seq_max_len", help="maximum sequence length",
                    type=int, default=200)
parser.add_argument("-rt", "--resume_training", help="resume training",
                    action="store_true", default=False)
parser.add_argument("-sl", "--strict_load", help="partially or strictly load the saved model",
                    action="store_true", default=False)
parser.add_argument("-dfp", "--data_file_dir_base_path", help="data_file_dir_base_path",
                    default=None)

parser.add_argument("-enl", "--encoder_num_layers", help="LSTM encoder layer",
                    type=int, default=2)
parser.add_argument("-lstm_bi", "--lstm_bidirectional", help="LSTM bidirectional [True/False]",
                    action="store_true", default=False)

parser.add_argument("-mcp", "--model_checkpoint_prefix", help="model checkpoint filename prefix",
                    default='uva_dar')
parser.add_argument("-mcf", "--model_checkpoint_filename", help="model checkpoint filename",
                    default=None)
parser.add_argument("-rcf", "--resume_checkpoint_filename", help="resume checkpoint filename",
                    default=None)

parser.add_argument("-logf", "--log_filename", help="execution log filename",
                    default='exe_uva_dar.log')
parser.add_argument("-logbd", "--log_base_dir", help="execution log base dir",
                    default='log/uva_dar')
parser.add_argument("-tb_wn", "--tb_writer_name", help="tensorboard writer name",
                    default=None)
parser.add_argument("-wdbln", "--wandb_log_name", help="wandb_log_name",
                    default=None)
parser.add_argument("--wandb_entity", help="wandb_entity",
                    default='crg')
parser.add_argument("--wandb_project_name", help="wandb_project_name",
                    default='MAME')
parser.add_argument("--log_model_archi", help="log model",
                    action="store_true", default=False)
parser.add_argument("--print_data_id", help="print_data_id",
                    action="store_true", default=False)

parser.add_argument("-ipf", "--is_pretrained_fe", help="is_pretrained_fe",
                    action="store_true", default=False)
parser.add_argument("-edbp", "--embed_dir_base_path", help="embed_dir_base_path",
                    default=None)
parser.add_argument("--pt_vis_encoder_archi_type", help="pt_vis_encoder_archi_type",
                    default='resnet18')

parser.add_argument("-msbd", "--model_save_base_dir", help="model_save_base_dir",
                    default="trained_model")
parser.add_argument("-exe_mode", "--exe_mode", help="exe_mode[dl_test/train]",
                    default='train')
parser.add_argument("--train_percent_check", help="train_percent_check",
                    type=float, default=1.0)
parser.add_argument("--num_sanity_val_steps", help="num_sanity_val_steps",
                    type=int, default=5)
parser.add_argument("--val_percent_check", help="val_percent_check",
                    type=float, default=1.0)
parser.add_argument("--limit_test_batches", help="limit_test_batches",
                    type=float, default=1.0)
parser.add_argument("--no_validation", help="no_validation",
                    action="store_true", default=False)
parser.add_argument("--slurm_job_id", help="slurm_job_id",
                    default=None)

# Model
parser.add_argument("--model_name", help="model_name",
                    default=None)
parser.add_argument("--is_decoders", help="is_decoders",
                    action="store_true", default=False)
parser.add_argument("--is_bbox_embed", help="is_bbox_embed",
                    action="store_true", default=False)
parser.add_argument("--is_bbox_cord_embed", help="is_bbox_cord_embed",
                    action="store_true", default=False)
parser.add_argument("--is_bbox_image_mask_encode", help="is_bbox_image_mask_encode",
                    action="store_true", default=False)
parser.add_argument("--combine_view_context_bbox", help="combine_view_context_bbox",
                    action="store_true", default=False)
parser.add_argument("--is_only_target_box", help="is_only_target_box",
                    action="store_true", default=False)

# Dataset Config
parser.add_argument("--setting_names", help="setting_names",
                    default=None)
parser.add_argument("--view_modalities", help="view_modalities",
                    default=None)
parser.add_argument("--random_contrastive_data", help="random_contrastive_data",
                    action="store_true", default=False)
parser.add_argument("--indi_modality_embedding_size", help="indi_modality_embedding_size",
                    type=int, default=None)

# Verbal Instruction Encoder Prop
parser.add_argument("--tokenizer_name", help="tokenizer_name",
                    default='bert-base-uncased')

# View Encoders Prop
parser.add_argument("--view_encoder_name", help="tokenizer_name",
                    default='resnet34')

# Multimodal Fusion Prop
parser.add_argument("--fusion_model_name", help="fusion_model_name",
                    default='concat')
parser.add_argument("--fusion_model_nhead", help="fusion_model_nhead",
                    type=int, default=4)
parser.add_argument("--fusion_model_dropout", help="fusion_model_dropout",
                    type=float, default=0.1)

# Guided Projection Prop
parser.add_argument("--guided_projection_nhead", help="guided_projection_nhead",
                    type=int, default=1)
parser.add_argument("--guided_projection_dropout", help="guided_projection_dropout",
                    type=float, default=0.1)

# Guided Fusion Prop
parser.add_argument("--guided_fusion_nhead", help="guided_fusion_nhead",
                    type=int, default=1)
parser.add_argument("--guided_fusion_dropout", help="guided_fusion_dropout",
                    type=float, default=0.1)

# Multi-task Config
parser.add_argument("--task_list", help="task_list",
                    default=None)
parser.add_argument("--bbox_format", help="bbox_format",
                    default='xyxy')
parser.add_argument("--multitask_modal_nhead", help="multitask_modal_nhead",
                    type=int, default=1)
parser.add_argument("--multitask_modal_dropout", help="multitask_modal_dropout",
                    type=float, default=0.1)
parser.add_argument("--instruction_template", help="instruction_template",
                    default=None)
parser.add_argument("--restrict_instruction_template", help="instruction_template",
                    default=None)
# possible value for instruction_template
# ['template_null', 'template_1_1', 'ego_template_2_2', 'ego_template_3_1',
# 'template_1_2', 'exo_template_2_1', 'exo_template_3_2',
# 'ego_template_3_2', 'exo_template_2_2',
# 'ego_template_2_1', 'exo_template_3_1']

# Data preprocessing
parser.add_argument("--data_split_type", help="data_split_type",
                    default=None)
parser.add_argument("--valid_split_pct", help="valid_split_pct",
                    type=float, default=0.15)
parser.add_argument("--test_split_pct", help="test_split_pct",
                    type=float, default=0.2)
parser.add_argument("--share_train_dataset", help="share_train_dataset",
                    action="store_true", default=False)
parser.add_argument("--skip_frame_len", help="skip_frame_len",
                    type=int, default=1)
parser.add_argument("-rimg_w", "--resize_image_width", help="resize to image width",
                    type=int, default=config.image_width)
parser.add_argument("-rimg_h", "--resize_image_height", help="resize to image height",
                    type=int, default=config.image_height)
parser.add_argument("-cimg_w", "--crop_image_width", help="crop to image width",
                    type=int, default=config.image_width)
parser.add_argument("-cimg_h", "--crop_image_height", help="crop to image height",
                    type=int, default=config.image_height)
parser.add_argument("--image_scale", help="image scale ration [0.1-1.0]",
                    type=float, default=0.35)

# Optimization
parser.add_argument("--lr_find", help="learning rate finder",
                    action="store_true", default=False)
parser.add_argument("--lr_scheduler", help="lr_scheduler",
                    default=None)
parser.add_argument("-cl", "--cycle_length", help="total number of executed iteration",
                    type=int, default=100)
parser.add_argument("-cm", "--cycle_mul", help="total number of executed iteration",
                    type=int, default=2)
parser.add_argument("--is_random_seed", help="is_random_seed",
                    action="store_true", default=False)

# Losses
parser.add_argument("--loss_align_weight", help="loss_align_weight",
                    type=float, default=0.3)
parser.add_argument("--loss_diff_weight", help="loss_diff_weight",
                    type=float, default=0.3)
parser.add_argument("--loss_decoder_weight", help="loss_decoder_weight",
                    type=float, default=0.3)
parser.add_argument("--loss_multitask_weight", help="loss_multitask_weight",
                    type=float, default=1.0)
parser.add_argument("--bbox_loss_type", help="bbox_loss_type",
                    default=None)

# Testing Config
parser.add_argument("--test_models", help="test_models",
                    default='valid_loss,valid_accuracy,train_loss')
parser.add_argument("--test_metrics", help="test_metrics",
                    default='loss,accuracy,f1_scores,precision,recall_scores')
parser.add_argument("--is_test", help="evaluate on test dataset",
                    action="store_true", default=False)
parser.add_argument("--only_testing", help="Perform only test on the pretrained model",
                    action="store_true", default=False)


_StoreTrueAction(option_strings=['--only_testing'], dest='only_testing', nargs=0, const=True, default=False, type=None, choices=None, help='Perform only test on the pretrained model', metavar=None)

In [3]:
#without noise
args = parser.parse_args(args=['--batch_size','1',
                              '--dataset_name', 'mog',
                              '--dataset_filename', 'none',
                               '--valid_split_pct', '0.2' ,
                              '--view_modalities', 'exo_view_image,ego_view_image,top_view_image',
                               '--setting_names', 'both_gaze_gesture,wrong_gaze_gesture',
                               '--task_list', 'is_contrastive',
                               '--model_name', 'base_model_vl_clip',
                               '--fusion_model_name', 'concat',
                              '--exe_mode', 'train',
                               '--val_percent_check', '1' ,
                                '--num_sanity_val_steps', '0' ,
                               '--limit_test_batches', '1.0',
                                '--train_percent_check', '1' ,
                                '--compute_mode', 'gpu' ,
                                '--strategy', 'ddp_spawn' ,
                                '--float_precision', '32' ,
                                '--num_workers', '8' ,
                                '--gpus', '-1' ,
                               '--image_scale', '0.5',
                               '--view_encoder_name', 'resnet50',
                              '--indi_modality_embedding_size', '512',
                              '--bbox_format', 'xyxy',
                               '--is_only_target_box',
                               '--bbox_loss_type', 'l1_loss',
                               '--restrict_instruction_template', 'template_null',
                              '-dfp', '/project/CollabRoboGroup/datasets/official_data',
                              '-msbd', 'trained_model/vl',
                              '-mcp', 'test_run_base',
                              #'-rcf', 'best_epoch_train_loss_test_run_base_1652583629.837785.pth',
#                                '-rcf', 'best_epoch_train_loss_test_run_base_1652647041.889894.pth',
                               '-rcf', 'best_epoch_train_loss_base_model_vl_1654001226.307563.pth',
                              '-logbd', 'log/base/test_run',
                              '-logf', 'testing.log',
                              '--log_model_archi'])

In [4]:
txt_logger = TextLogger(args.log_base_dir, 
                        args.log_filename,
                        print_console=True)

if args.model_checkpoint_filename is None:
    args.model_checkpoint_filename = f'{args.model_checkpoint_prefix}_{datetime.utcnow().timestamp()}.pth'

txt_logger.log(f'model_checkpoint_prefix:{args.model_checkpoint_prefix}\n')
txt_logger.log(f'model_checkpoint_filename:{args.model_checkpoint_filename}, resume_checkpoint_filename:{args.resume_checkpoint_filename}\n')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
txt_logger.log(f'pytorch version: {torch.__version__}\n')
txt_logger.log(f'GPU Availability: {device}, gpus: {args.gpus}\n')

# Set dataloader class and prop 
args.setting_names = args.setting_names.strip().split(',')
args.view_modalities = args.view_modalities.strip().split(',')
args.task_list = args.task_list.strip().split(',')

if args.dataset_name=='mog':

    if args.resume_checkpoint_filename is not None:
        args.resume_checkpoint_filepath = f'{args.model_save_base_dir}/{args.resume_checkpoint_filename}'
        if os.path.exists(args.resume_checkpoint_filepath):
            args.resume_training = True
        else:
            txt_logger.log(f'Checkpoint is not exists: {args.resume_checkpoint_filename}\n')
            args.resume_training = False

model_checkpoint_prefix:test_run_base

model_checkpoint_filename:test_run_base_1655369731.464758.pth, resume_checkpoint_filename:best_epoch_train_loss_base_model_vl_1654001226.307563.pth

pytorch version: 1.9.1+cu102

GPU Availability: cuda, gpus: -1



In [5]:
dataset = MOG_Dataset_VL(hparams=args,
                dataset_type='test')
collate_fn = MOG_Collator_VL(args.model_name,
                            args.view_modalities,
                            args.task_list,
                            args.is_bbox_embed,
                            args.is_bbox_cord_embed,
                            args.is_bbox_image_mask_encode,
                            args.combine_view_context_bbox)

data_loader = DataLoader(dataset,
                batch_size=args.batch_size,
                collate_fn=collate_fn,
                num_workers=args.num_workers,
                shuffle=False,
                drop_last=True)

In [6]:
dl_iter = iter(data_loader)
sample_data = next(dl_iter)
sample_data.keys()

dict_keys(['exo_view_image_context', 'ego_view_image_context', 'top_view_image_context', 'bboxes_mask', 'is_contrastive_labels', 'task_ids', 'exo_view_image_processed_input', 'ego_view_image_processed_input', 'top_view_image_processed_input', 'scores', 'box_labels'])

In [7]:
args.resume_checkpoint_filepath = f'{args.model_save_base_dir}/{args.resume_checkpoint_filename}'
if os.path.exists(args.resume_checkpoint_filepath):
    print('checkpoint is found')
else:
    print('checkpoint is not found')

checkpoint is found


In [8]:
dataModule = MOGDataModule(args)

In [9]:
if(args.model_name=='mog_model'):
    ModelTrainer = MOG_Model_Trainer
elif('base' in args.model_name):
    ModelTrainer = Base_Model_Trainer
model = ModelTrainer(hparams=args)

In [10]:
model = ModelTrainer.load_from_checkpoint(args.resume_checkpoint_filepath, hparams=args)
txt_logger.log(f'Reload model from chekpoint: {args.resume_checkpoint_filename}\n model_checkpoint_filename: {args.model_checkpoint_filename}\n')

Reload model from chekpoint: best_epoch_train_loss_base_model_vl_1654001226.307563.pth
 model_checkpoint_filename: test_run_base_1655369731.464758.pth



In [11]:
# device = "cpu"
# model = model.to(device)
# _ = model.eval()
# next(model.parameters()).is_cuda

In [12]:
args.compute_mode

'gpu'

In [13]:
trainer = Trainer(accelerator="cpu")

  rank_zero_warn(
GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


In [None]:
trainer.test(model, datamodule=dataModule)

train dataset len: 257880

valid dataset len: 28080

test dataset len: 27822



Testing: 0it [00:00, ?it/s]