In [13]:
import os
import json
import yaml
import errno
import argparse
import numpy as np

import torch
import torch.nn as nn

import data
from config import cfg

from tensorboardX import SummaryWriter
# magic keyword for tensorboard
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


## Configuration

In [14]:
config_path = "/root/TMLGA_refactoring/experiments/charades_sta_train.yaml"
with open(config_path) as f:
    config_file = yaml.full_load(f)
for item, doc in config_file.items():
    print("---"*30)
    print(item, ":", doc)

------------------------------------------------------------------------------------------
ENGINE_STAGE : TRAINER
------------------------------------------------------------------------------------------
SENTENCE : {'MIN_COUNT': 1, 'TRAIN_MAX_LENGTH': 30, 'TEST_MAX_LENGTH': 30}
------------------------------------------------------------------------------------------
DYNAMIC_FILTER : {'MODEL': 'GRU', 'POOLING': 'MeanPoolingLayer', 'HEAD_MODEL': 'MLP', 'TAIL_MODEL': 'GRU', 'GRU': {'NUM_LAYERS': 1, 'HIDDEN_SIZE': 256, 'BIAS': False, 'BIDIRECTIONAL': True, 'BATCH_FIRST': True, 'DROPOUT': 0.0}, 'MLP': {'INPUT_DIM': 512, 'OUTPUT_DIM': 512}}
------------------------------------------------------------------------------------------
REDUCTION : {'INPUT_SIZE': 1024, 'OUTPUT_SIZE': 512}
------------------------------------------------------------------------------------------
LOCALIZATION : {'INPUT_SIZE': 512, 'HIDDEN_SIZE': 256, 'NUM_LAYERS': 2, 'BIAS': False, 'DROPOUT': 0.5, 'BIDIRECTIONAL': 

## Miscellaneuous

In [15]:
def mkdir(path):
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno != errno.EEXIST:
            raise

## Model building

In [16]:
from utils.rnns import feed_forward_rnn
from utils.gru import GRU
from utils.mlp import MLP
from utils.pooling import MeanPoolingLayer

In [17]:
class DynamicFilter(nn.Module):
    def __init__(self, cfg):
        super(DynamicFilter, self).__init__()
        self.cfg = cfg
        
        self.tail_df = GRU(cfg)
        self.pooling_layer = MeanPoolingLayer()
        self.head_df = MLP(cfg)

    def forward(self, sequences, lengths=None):
        output, _ = self.tail_df(sequences, lengths)
        output = self.pooling_layer(output, lengths)
        output = self.head_df(output)
        return output, lengths 

In [18]:
class NLVL(nn.Module):
    def __init__(self, cfg):
        super(NLVL, self).__init__()
        self.cfg = cfg
        self.batch_size = cfg.BATCH_SIZE_TRAIN
        self.model_df  = DynamicFilter(cfg)

        self.reduction  = nn.Linear(cfg.REDUCTION.INPUT_SIZE, cfg.REDUCTION.OUTPUT_SIZE)
        self.multimodal_fc1 = nn.Linear(512*2, 1)
        self.multimodal_fc2 = nn.Linear(512, 1)

        self.rnn_localization = nn.GRU(input_size   = cfg.LOCALIZATION.INPUT_SIZE,
                                        hidden_size  = cfg.LOCALIZATION.HIDDEN_SIZE,
                                        num_layers   = cfg.LOCALIZATION.NUM_LAYERS,
                                        bias         = cfg.LOCALIZATION.BIAS,
                                        dropout      = cfg.LOCALIZATION.DROPOUT,
                                        bidirectional= cfg.LOCALIZATION.BIDIRECTIONAL,
                                        batch_first = cfg.LOCALIZATION.BATCH_FIRST)

        self.pooling = MeanPoolingLayer()
        
        self.starting = nn.Linear(cfg.CLASSIFICATION.INPUT_SIZE, cfg.CLASSIFICATION.OUTPUT_SIZE) # 512 -> 1
        self.ending = nn.Linear(cfg.CLASSIFICATION.INPUT_SIZE, cfg.CLASSIFICATION.OUTPUT_SIZE)   # 512 -> 1

    def attention(self, videoFeat, filter, lengths):
        pred_local = torch.bmm(videoFeat, filter.unsqueeze(2)).squeeze()
        return pred_local

    def get_mask_from_sequence_lengths(self, sequence_lengths: torch.Tensor, max_length: int):
        ones = sequence_lengths.new_ones(sequence_lengths.size(0), max_length)
        range_tensor = ones.cumsum(dim=1)
        return (sequence_lengths.unsqueeze(1) >= range_tensor).long()

    def masked_softmax(self, vector: torch.Tensor, mask: torch.Tensor, dim: int = -1, memory_efficient: bool = False, mask_fill_value: float = -1e32):
        if mask is None:
            result = torch.nn.functional.softmax(vector, dim=dim)
        else:
            mask = mask.float()
            while mask.dim() < vector.dim():
                mask = mask.unsqueeze(1)
            if not memory_efficient:
                # To limit numerical errors from large vector elements outside the mask, we zero these out.
                result = torch.nn.functional.softmax(vector * mask, dim=dim)
                result = result * mask
                result = result / (result.sum(dim=dim, keepdim=True) + 1e-13)
            else:
                masked_vector = vector.masked_fill((1 - mask).byte(), mask_fill_value)
                result = torch.nn.functional.softmax(masked_vector, dim=dim)

        return result + 1e-13

    def mask_softmax(self, feat, mask):
        return self.masked_softmax(feat, mask, memory_efficient=False)

    def kl_div(self, p, gt, length):
        individual_loss = []
        for i in range(length.size(0)):
            vlength = int(length[i])
            ret = gt[i][:vlength] * torch.log(p[i][:vlength]/gt[i][:vlength])
            individual_loss.append(-torch.sum(ret))
        individual_loss = torch.stack(individual_loss)
        return torch.mean(individual_loss)

    def forward(self, videoFeat, videoFeat_lengths, tokens, tokens_lengths, start, end, localiz):

        mask = self.get_mask_from_sequence_lengths(videoFeat_lengths, int(videoFeat.shape[1]))

        filter_start, lengths = self.model_df(tokens, tokens_lengths)

        videoFeat   = self.reduction(videoFeat)

        attention = self.attention(videoFeat, filter_start, lengths)
        rqrt_length = torch.rsqrt(lengths.float()).unsqueeze(1).repeat(1, attention.shape[1])
        attention = attention * rqrt_length

        attention = self.mask_softmax(attention, mask)
        videoFeat_hat = attention.unsqueeze(2).repeat(1,1,self.cfg.REDUCTION.OUTPUT_SIZE) * videoFeat
        output, _ = feed_forward_rnn(self.rnn_localization, videoFeat_hat, lengths=videoFeat_lengths)


        pred_start = self.starting(output.view(-1, output.size(2))).view(-1,output.size(1),1).squeeze()
        pred_start = self.mask_softmax(pred_start, mask)

        pred_end = self.ending(output.view(-1, output.size(2))).view(-1,output.size(1),1).squeeze()
        pred_end = self.mask_softmax(pred_end, mask)

        start_loss = self.kl_div(pred_start, start, videoFeat_lengths)
        end_loss   = self.kl_div(pred_end, end, videoFeat_lengths)

        atten_loss = torch.sum(-( (1-localiz) * torch.log((1-attention) + 1E-12)), dim=1)
        atten_loss = torch.mean(atten_loss)

        total_loss = start_loss + end_loss + atten_loss

        return total_loss, pred_start, pred_end, attention, atten_loss

In [19]:
# Optimizer
def make_optimizer(cfg, model):
    params = []
    for key, value in model.named_parameters():
        if not value.requires_grad:
            continue
        lr = cfg.SOLVER.BASE_LR
        weight_decay = cfg.SOLVER.WEIGHT_DECAY
        if "bias" in key:
            lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
            weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
        params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]

    if cfg.SOLVER.TYPE == "SGD":
        optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM, weight_decay=cfg.SOLVER.WEIGHT_DECAY)
    elif cfg.SOLVER.TYPE == "ADAM":
        optimizer = torch.optim.Adam(params, lr, eps=cfg.SOLVER.EPSILON, weight_decay=cfg.SOLVER.WEIGHT_DECAY)

    return optimizer

## Metric tIoU class

In [20]:
class MetricIoU(object):
    def __init__(self, cfg, dataset_size, is_train=True):

        self.loss = []
        self.IoU  = []
        self.mIoU = []
        self.aux_mIoU = []
        self.vis_dir = "{}{}".format(cfg.VISUALIZATION_DIRECTORY, cfg.EXPERIMENT_NAME)
        mkdir(self.vis_dir)
        self.cfg = cfg

        if is_train == True:
            self.state = "training"
        else:
            self.state = "testing"

    def tIoU(self, start, end, pred_start, pred_end):
        tt1 = np.maximum(start, pred_start)
        tt2 = np.minimum(end, pred_end)
        # Intersection including Non-negative overlap score.
        segments_intersection = (tt2 - tt1).clip(0)
        # Segment union.
        segments_union = (pred_end - pred_start) \
          + (end - start) - segments_intersection
        # Compute overlap as the ratio of the intersection
        # over union of two segments.
        tIoU = segments_intersection.astype(float) / segments_union
        return tIoU

    def run(self, indexes, pred_start, pred_end, start, end, loss, time_starts, time_ends, factors, fps):
        l = loss.detach().item()
        self.loss.append(l)

        startings = np.argmax(pred_start.detach().cpu().numpy(), axis=1)
        endings   = np.argmax(pred_end.detach().cpu().numpy(), axis=1)
        
        startings = factors * (startings) / fps
        endings = factors * (endings + 1) / fps

        gt_start = np.array(time_starts)
        gt_end   = np.array(time_ends)
        
        iou = self.tIoU(gt_start, gt_end, startings, endings)
        self.IoU.append(iou)
        mIoU = np.mean(iou)
        self.mIoU.append(mIoU)

    def iou_print(self):
        new_ious = []

        for batch in self.IoU:
            for p in batch:
                new_ious.append(p)

        th = {0.1: 0, 0.3: 0, 0.5: 0, 0.7: 0}
        for i in range(len(new_ious)):
            for k in th.keys():
                if round(new_ious[i],2) >= k:
                    th[k] += 1

        if self.state == "training":
            a = {str(k): round(v * 100 / self.cfg.DATASETS.TRAIN_SAMPLES,2) for k, v in th.items()}
        else:
            a = {str(k): round(v * 100 / self.cfg.DATASETS.TEST_SAMPLES,2) for k, v in th.items()}

        self.IoU = []
        self.mIoU = []
        return a

# Training code

In [21]:
def trainer(cfg):
    print('trainer')
    # Dataloader load
    dataloader_train, dataset_size_train = data.make_dataloader(cfg, is_train=True)
    dataloader_test, dataset_size_test   = data.make_dataloader(cfg, is_train=False)

    # Model & Optimizer (default: Adam with weight decay)
    model = NLVL(cfg)
    model.cuda()
    optimizer = make_optimizer(cfg, model)

    # Metric tIoU
    tiou_train = MetricIoU(cfg, dataset_size_train)
    tiou_test  = MetricIoU(cfg, dataset_size_test, is_train=False)

    # Tensorboard logging
    writer_path = os.path.join(cfg.VISUALIZATION_DIRECTORY, cfg.EXPERIMENT_NAME)
    writer = SummaryWriter(writer_path)

    total_iterations = 0
    total_iterations_val = 0

    for epoch in range(cfg.EPOCHS):
        print("Epoch {}".format(epoch))
        # Training
        model.train()
        for iteration, batch in enumerate(dataloader_train):
            index     = batch[0]

            videoFeat = batch[1].cuda()
            videoFeat_lengths = batch[2].cuda()
            tokens         = batch[3].cuda()
            tokens_lengths = batch[4].cuda()
            start    = batch[5].cuda()
            end      = batch[6].cuda()
            localiz  = batch[7].cuda()
            
            localiz_lengths = batch[8]
            time_starts = batch[9]
            time_ends = batch[10]
            factors = batch[11]
            fps = batch[12]

            loss, pred_start, pred_end, attention, atten_loss = model(videoFeat, videoFeat_lengths, tokens, tokens_lengths, start, end, localiz)
            
            if iteration % 10 == 0 or iteration == len(dataloader_train)-1:
                print("Epoch: {} / Iteration: {}/{} / Loss :{}".format(str(epoch), str(iteration), str(len(dataloader_train)), loss))
            
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5)
            optimizer.step()

            tiou_train.run(index, pred_start, pred_end, start, end, loss.detach(), time_starts, time_ends, factors, fps)

            writer.add_scalar(f'mlnlp/Progress_Loss', loss.item(), total_iterations)
            writer.add_scalar(f'mlnlp/Progress_Attention_Loss', atten_loss.item(), total_iterations)

            total_iterations += 1.

        print("Epoch: {} / Total Loss: {}".format(str(epoch), np.mean(tiou_train.loss)))
        writer.add_scalar(f'mlnlp/Train_Loss', np.mean(tiou_train.loss), epoch)
        writer.add_scalar(f'mlnlp/Train_Mean_IoU', np.mean(tiou_train.mIoU), epoch)

        tiou_train.iou_print()
        torch.save(model, "./checkpoints/{}/model_epoch_{}".format(cfg.EXPERIMENT_NAME,epoch))

        # Validation
        model.eval()
        for iteration, batch in enumerate(dataloader_test):
            index     = batch[0]

            videoFeat = batch[1].cuda()
            videoFeat_lengths = batch[2].cuda()
            tokens         = batch[3].cuda()
            tokens_lengths = batch[4].cuda()
            start    = batch[5].cuda()
            end      = batch[6].cuda()
            localiz  = batch[7].cuda()
            
            localiz_lengths = batch[8]
            time_starts = batch[9]
            time_ends = batch[10]
            factors = batch[11]
            fps = batch[12]

            loss, pred_start, pred_end, attention,atten_loss = model(videoFeat, videoFeat_lengths, tokens, tokens_lengths, start, end, localiz)
            tiou_test.run(index, pred_start, pred_end, start, end, loss.detach(), time_starts, time_ends, factors, fps)

            total_iterations_val += 1

        writer.add_scalar(f'mlnlp/Valid_Loss', np.mean(tiou_test.loss), epoch)
        writer.add_scalar(f'mlnlp/Valid_Mean_IoU', np.mean(tiou_test.mIoU), epoch)
        writer.add_scalars(f'mlnlp/Valid_tIoU_th', tiou_test.iou_print(), epoch)

In [22]:
def tester(cfg):
    print('testing')
    # Dataloader
    dataloader_test, dataset_size_test   = data.make_dataloader(cfg, is_train=False)

    # Model & Pretrained checkpoint load
    model = NLVL(cfg)

    if cfg.TEST.MODEL.startswith('.'):
        load_path = cfg.TEST.MODEL.replace(".", os.path.realpath("."))
    else:
        load_path = cfg.TEST.MODEL

    model = torch.load(load_path)
    model.cuda()
    
    # Metric tIoU
    tiou_test  = MetricIoU(cfg, dataset_size_test, is_train=False)

    # Tensorboard logging
    writer_path = os.path.join(cfg.VISUALIZATION_DIRECTORY, cfg.EXPERIMENT_NAME)
    writer = SummaryWriter(writer_path)

    total_iterations = 0
    total_iterations_val = 0

    model.eval()
    epoch = 1
    for iteration, batch in enumerate(dataloader_test):
        index     = batch[0]

        videoFeat = batch[1].cuda()
        videoFeat_lengths = batch[2].cuda()
        tokens         = batch[3].cuda()
        tokens_lengths = batch[4].cuda()
        start    = batch[5].cuda()
        end      = batch[6].cuda()
        localiz  = batch[7].cuda()
        
        localiz_lengths = batch[8]
        time_starts = batch[9]
        time_ends = batch[10]
        factors = batch[11]
        fps = batch[12]

        loss, pred_start, pred_end, attention, atten_loss = model(videoFeat, videoFeat_lengths, tokens, tokens_lengths, start, end, localiz)
        tiou_test.run(index, pred_start, pred_end, start, end, loss.detach(), time_starts, time_ends, factors, fps)
        total_iterations_val += 1
    
    tiou_test.iou_print()

In [23]:
def main(config_path):
    parser = argparse.ArgumentParser(description="TMLGA")
    parser.add_argument(
        "--config-file",
        default=config_path,
        type=str,)
    args = parser.parse_args(args=[])

    experiment_name = args.config_file.split("/")[-1]
    log_directory   = args.config_file.replace(experiment_name,"logs/")
    vis_directory   = args.config_file.replace(experiment_name,"visualization/")
    experiment_name = experiment_name.replace(".yaml","")
    
    cfg.merge_from_list(['EXPERIMENT_NAME', experiment_name, 'LOG_DIRECTORY', log_directory, "VISUALIZATION_DIRECTORY", vis_directory])
    cfg.merge_from_file(args.config_file)

    output_dir = "./{}".format(cfg.LOG_DIRECTORY)

    # reproductibility
    np.random.seed(0)
    torch.manual_seed(0)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    if cfg.ENGINE_STAGE == "TRAINER":
        trainer(cfg)
    elif cfg.ENGINE_STAGE == "TESTER":
        tester(cfg)

In [24]:
config_path = "/root/TMLGA_refactoring/experiments/charades_sta_train.yaml"
main(config_path)

trainer
True
loading annotations into memory... Done (t=0.10s)
charades_vocab_1_30.pickle True
Creating index.. {'id': 8251, 'video': 'LEOL6', 'time_start': 8.0, 'time_end': 6.25, 'frame_start': 193.28, 'frame_end': 151.0, 'feature_start': 23.04, 'feature_end': 17, 'description': 'person is holding medicine.', 'number_features': 18, 'number_frames': 151, 'fps': 24.16, 'label': 107, 'tokens': ['person', 'is', 'holding', 'medicine', '.'], 'preprocessing': 'edison_v1'}
{'id': 8439, 'video': 'IOL8Q', 'time_start': 20.9, 'time_end': 12.0, 'frame_start': 502.20539828834757, 'frame_end': 288.3475971033575, 'feature_start': 59.8518762343647, 'feature_end': 34.364713627386436, 'description': 'a person is throwing a bag out of the room.', 'number_features': 87, 'number_frames': 730, 'fps': 24.02896642527979, 'label': 24, 'tokens': ['a', 'person', 'is', 'throwing', 'a', 'bag', 'out', 'of', 'the', 'room', '.'], 'preprocessing': 'edison_v1'}
{'id': 9622, 'video': 'AKKWU', 'time_start': 38.0, 'time_

Traceback (most recent call last):
  File "/opt/conda/lib/python3.6/multiprocessing/queues.py", line 240, in _feed
    send_bytes(obj)
  File "/opt/conda/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/opt/conda/lib/python3.6/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/opt/conda/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
BrokenPipeError: [Errno 32] Broken pipe


KeyboardInterrupt: 

In [14]:
%tensorboard --logdir "experiments/visualization/"

Reusing TensorBoard on port 6006 (pid 408621), started 0:00:02 ago. (Use '!kill 408621' to kill it.)