In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from alfred.data.zoo.alfred import AlfredDataset

In [3]:
class ARGS():
    name = 'default'
    # model to use
    model = 'transformer'
    # which device to use
    device = 'cuda'
    # number of data loading workers or evaluation processes (0 for main thread)
    num_workers = 0
    # we can fine-tune a pre-trained model
    pretrained_path = None
    # run the code on a small chunk of data
    fast_epoch = False

    # DATA SETTINGS
    data = {
        # dataset name(s) for training and validation
        'train': None,
        # additional dataset name(s) can be specified for validation only
        'valid': '',
        # specify the length of each dataset
        'length': 800,
        # what to use as annotations: {'lang', 'lang_frames', 'frames'}
        'ann_type': 'lang',
    }
    
    seed = 14
    # load a checkpoint from a previous epoch (if available)
    resume = True
    # whether to print execution time for different parts of the code
    profile = False

    # HYPER PARAMETERS
    # batch size
    batch = 8
    # number of epochs
    epochs = 200
    # optimizer type, must be in ('adam', 'adamw')
    optimizer = 'adamw'
    # L2 regularization weight
    weight_decay = 0.33
    # learning rate settings
    lr = {
        # learning rate initial value
        'init': 1e-3,
        # lr scheduler type: {'linear', 'cosine', 'triangular', 'triangular2'}
        'profile': 'linear',
        # (LINEAR PROFILE) num epoch to adjust learning rate
        'decay_epoch': 10,
        # (LINEAR PROFILE) scaling multiplier at each milestone
        'decay_scale': 0.8,
        # (COSINE & TRIANGULAR PROFILE) learning rate final value
        'final': 1e-5,
        # (TRIANGULAR PROFILE) period of the cycle to increase the learning rate
        'cycle_epoch_up': 0,
        # (TRIANGULAR PROFILE) period of the cycle to decrease the learning rate
        'cycle_epoch_down': 0,
        # warm up period length in epochs
        'warmup_epoch': 0,
        # initial learning rate will be divided by this value
        'warmup_scale': 1,
    }
    # weight of action loss
    action_loss_wt = 1.
    # weight of object loss
    object_loss_wt = 1.
    # weight of subgoal completion predictor
    subgoal_aux_loss_wt = 0.1
    # weight of progress monitor
    progress_aux_loss_wt = 0.1
    # maximizing entropy loss (by default it is off)
    entropy_wt = 0.0

    # TRANSFORMER settings
    # size of transformer embeddings
    demb = 768
    # number of heads in multi-head attention
    encoder_heads = 12
    # number of layers in transformer encoder
    encoder_layers = 2
    # how many previous actions to use as input
    num_input_actions = 1
    # which encoder to use for language encoder (by default no encoder)
    encoder_lang = {
        'shared': True,
        'layers': 2,
        'pos_enc': True,
        'instr_enc': False,
    }
    # which decoder to use for the speaker model
    decoder_lang = {
        'layers': 2,
        'heads': 12,
        'demb': 768,
        'dropout': 0.1,
        'pos_enc': True,
    }
    # do not propagate gradients to the look-up table and the language encoder
    detach_lang_emb = False


In [4]:
args = ARGS()
dataset = AlfredDataset('lmdb_human', 'train', args, 'lang')
    


The dataset was recorded using model at /home/georgiynefedov/RP//logs/pretrained/fasterrcnn_model.pth
train dataset size = 1068826


For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [5]:
dataset.counts


defaultdict(int,
            {'LookDown_15': 63619,
             'MoveAhead_25': 624869,
             'RotateLeft_90': 89704,
             'LookUp_15': 39997,
             'PickupObject': 39427,
             'RotateRight_90': 94301,
             'ToggleObjectOn': 8013,
             '<<stop>>': 20958,
             'OpenObject': 20922,
             'CloseObject': 21148,
             'PutObject': 37064,
             'SliceObject': 3042,
             'ToggleObjectOff': 5762})

In [6]:
dataset.sampler_weights[:100]

[1.5718574639651676e-05,
 1.6003354303061922e-06,
 1.6003354303061922e-06,
 1.1147774904129135e-05,
 2.5001875140635547e-05,
 2.5363329697922742e-05,
 1.5718574639651676e-05,
 1.1147774904129135e-05,
 1.6003354303061922e-06,
 1.0604341417376274e-05,
 2.5001875140635547e-05,
 0.00012479720454261825,
 4.7714476572192006e-05,
 1.5718574639651676e-05,
 1.6003354303061922e-06,
 1.6003354303061922e-06,
 1.1147774904129135e-05,
 2.5001875140635547e-05,
 2.5363329697922742e-05,
 1.5718574639651676e-05,
 1.1147774904129135e-05,
 1.6003354303061922e-06,
 1.0604341417376274e-05,
 2.5001875140635547e-05,
 0.00012479720454261825,
 4.7714476572192006e-05,
 1.5718574639651676e-05,
 1.6003354303061922e-06,
 1.6003354303061922e-06,
 1.1147774904129135e-05,
 2.5001875140635547e-05,
 2.5363329697922742e-05,
 1.5718574639651676e-05,
 1.1147774904129135e-05,
 1.6003354303061922e-06,
 1.0604341417376274e-05,
 2.5001875140635547e-05,
 0.00012479720454261825,
 4.7714476572192006e-05,
 1.5718574639651676e-05,
