In [1]:
import os
import sys
import csv
import numpy as np
import torch
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision.transforms import Compose
import math
import copy
import random

sys.path.append('./backbones/asrf')
from libs import models
from libs.optimizer import get_optimizer
from libs.loss_fn import ActionSegmentationLoss, BoundaryRegressionLoss
from libs.class_weight import get_class_weight, get_pos_weight
from libs.dataset import ActionSegmentationDataset, collate_fn
from libs.transformer import TempDownSamp, ToTensor
from libs.helper import train, validate, evaluate
from libs.checkpoint import resume, save_checkpoint

from src.utils import eval_txts, load_meta
from src.predict import predict_backbone
import configs.asrf_config as cfg

In [2]:
random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
torch.backends.cudnn.deterministic=True

In [3]:
device = 'cuda'

In [4]:
dataset = 'breakfast'     # choose from gtea, 50salads, breakfast
split = 4            # gtea : 1~4, 50salads : 1~5, breakfast : 1~4
model_name = 'asrf'  # always "asrf" in this notebook

In [5]:
actions_dict, \
num_actions, \
gt_path, \
features_path, \
vid_list_file, \
vid_list_file_tst, \
sample_rate,\
model_dir,\
result_dir, \
record_dir = load_meta(cfg.dataset_root, cfg.model_root, cfg.result_root, cfg.record_root, dataset, split, model_name)

Created :./model/asrf/breakfast/split_4
Created :./result/asrf/breakfast/split_4
Created :./record/asrf/breakfast


In [6]:
train_data = ActionSegmentationDataset(
        dataset,
        transform=Compose([ToTensor(), TempDownSamp(sample_rate)]),
        mode="trainval" if not cfg.param_search else "training",
        split=split,
        dataset_dir=cfg.dataset_root,
        csv_dir=cfg.csv_dir,
    )
train_loader = DataLoader(
        train_data,
        batch_size=cfg.batch_size,
        shuffle=True,
        drop_last=True if cfg.batch_size > 1 else False,
        collate_fn=collate_fn,
    )

In [7]:
model = models.ActionSegmentRefinementFramework(
    in_channel = cfg.in_channel,
    n_features = cfg.n_features,
    n_classes = num_actions,
    n_stages = cfg.n_stages,
    n_layers = cfg.n_layers,
    n_stages_asb = cfg.n_stages_asb,
    n_stages_brb = cfg.n_stages_brb
)
model.to(device)

ActionSegmentRefinementFramework(
  (conv_in): Conv1d(2048, 64, kernel_size=(1,), stride=(1,))
  (shared_layers): ModuleList(
    (0): DilatedResidualLayer(
      (conv_dilated): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(1,))
      (conv_in): Conv1d(64, 64, kernel_size=(1,), stride=(1,))
      (dropout): Dropout(p=0.5, inplace=False)
    )
    (1): DilatedResidualLayer(
      (conv_dilated): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(2,), dilation=(2,))
      (conv_in): Conv1d(64, 64, kernel_size=(1,), stride=(1,))
      (dropout): Dropout(p=0.5, inplace=False)
    )
    (2): DilatedResidualLayer(
      (conv_dilated): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(4,), dilation=(4,))
      (conv_in): Conv1d(64, 64, kernel_size=(1,), stride=(1,))
      (dropout): Dropout(p=0.5, inplace=False)
    )
    (3): DilatedResidualLayer(
      (conv_dilated): Conv1d(64, 64, kernel_size=(3,), stride=(1,), padding=(8,), dilation=(8,))
      (conv_in): Conv1d(64, 6

In [8]:
optimizer = get_optimizer(
        'Adam',
        model,
        cfg.learning_rate,
        momentum=cfg.momentum,
        dampening=cfg.dampening,
        weight_decay=cfg.weight_decay,
        nesterov=cfg.nesterov,
)

Adam will be used as an optimizer.


In [9]:
if cfg.class_weight:
    class_weight = get_class_weight(
        dataset=dataset,
        split=split,
        dataset_dir=cfg.dataset_root,
        csv_dir=cfg.csv_dir,
        mode="training" if cfg.param_search else "trainval",
    )
    class_weight = class_weight.to(device)
else:
    class_weight = None
print(class_weight)

tensor([ 0.1552,  0.8315,  0.3031,  5.9319,  2.8529,  0.9975,  2.2579,  9.8273,
        11.1337, 52.5587,  0.8571,  0.2720,  0.6552,  0.1236,  0.6082,  0.5465,
        11.7209,  0.5953,  5.3678,  1.9034,  0.3270,  1.1679,  3.5533, 20.8757,
         0.7867,  0.8999,  0.9478,  0.2666,  0.6144,  0.0954,  1.2525,  5.1186,
         0.1471,  0.9486,  0.2799,  6.8839,  1.0002,  0.3717,  3.9691,  0.5981,
         8.7898, 29.0969,  1.1855,  1.6171,  0.1975,  1.0854,  1.0000, 57.0182],
       device='cuda:0')


In [10]:
criterion_cls = ActionSegmentationLoss(
        ce=cfg.ce,
        focal=cfg.focal,
        tmse=cfg.tmse,
        gstmse=cfg.gstmse,
        weight=class_weight,
        ignore_index=255,
        ce_weight=cfg.ce_weight,
        focal_weight=cfg.focal_weight,
        tmse_weight=cfg.tmse_weight,
        gstmse_weight=cfg.gstmse,
    )

In [11]:
pos_weight = get_pos_weight(
        dataset=dataset,
        split=split,
        csv_dir=cfg.csv_dir,
        mode="training" if cfg.param_search else "trainval",
    ).to(device)

In [12]:
criterion_bound = BoundaryRegressionLoss(pos_weight=pos_weight)

In [None]:
for epoch in range(0, cfg.max_epoch):
    # training
    train_loss = train(
        train_loader,
        model,
        criterion_cls,
        criterion_bound,
        cfg.lambda_b,
        optimizer,
        epoch,
        device,
    )
    torch.save(model.state_dict(), os.path.join(model_dir, "epoch-"+str(epoch+1)+".model"))
    print("epoch: {}\tlr: {:.4f}\ttrain loss: {:.4f}".format(epoch+1, optimizer.param_groups[0]["lr"], train_loss))


epoch: 1	lr: 0.0005	train loss: 3.2587
epoch: 2	lr: 0.0005	train loss: 2.4676
epoch: 3	lr: 0.0005	train loss: 2.0347
epoch: 4	lr: 0.0005	train loss: 1.7857
epoch: 5	lr: 0.0005	train loss: 1.5826
epoch: 6	lr: 0.0005	train loss: 1.4069
epoch: 7	lr: 0.0005	train loss: 1.3307
epoch: 8	lr: 0.0005	train loss: 1.2021
epoch: 9	lr: 0.0005	train loss: 1.1158
epoch: 10	lr: 0.0005	train loss: 1.0834
epoch: 11	lr: 0.0005	train loss: 1.0402
epoch: 12	lr: 0.0005	train loss: 0.9058


In [None]:
max_epoch = -1
max_val = 0.0
max_results = dict()

f = open(os.path.join(record_dir, 'split_{}_all.csv'.format(split)), 'w')

writer = csv.writer(f, delimiter='\t')
writer.writerow(['epoch', 'accu', 'edit', 
                 'F1@{}'.format(cfg.iou_thresholds[0]),
                 'F1@{}'.format(cfg.iou_thresholds[1]), 
                 'F1@{}'.format(cfg.iou_thresholds[2])])

for epoch in range(1, cfg.max_epoch+1):
    print('======================EPOCH {}====================='.format(epoch))
    predict_backbone(model_name, model, model_dir, result_dir, features_path, vid_list_file_tst, 
                     epoch, actions_dict, device, sample_rate)    
    results = eval_txts(cfg.dataset_root, result_dir, dataset, split, model_name)
    
    writer.writerow([epoch, '%.4f'%(results['accu']), '%.4f'%(results['edit']),
                    '%.4f'%(results['F1@%0.2f'%(cfg.iou_thresholds[0])]),
                    '%.4f'%(results['F1@%0.2f'%(cfg.iou_thresholds[1])]),
                    '%.4f'%(results['F1@%0.2f'%(cfg.iou_thresholds[2])])])

    curr_val = sum([results[k] for k in results.keys()])
    max_val = max(max_val, curr_val)

    if curr_val == max_val:
        max_epoch = epoch
        max_results = results

print('EARNED MAXIMUM PERFORMANCE IN EPOCH {}'.format(max_epoch))
print(max_results)

f.close()

In [None]:
f = open(os.path.join(record_dir, 'split_{}_best.csv'.format(split)), 'w')
writer = csv.writer(f, delimiter='\t')
writer.writerow(['epoch', 'accu', 'edit', 
                 'F1@{}'.format(cfg.iou_thresholds[0]),
                 'F1@{}'.format(cfg.iou_thresholds[1]), 
                 'F1@{}'.format(cfg.iou_thresholds[2])])
writer.writerow([max_epoch, '%.4f'%(max_results['accu']), '%.4f'%(max_results['edit']),
                '%.4f'%(max_results['F1@%0.2f'%(cfg.iou_thresholds[0])]),
                '%.4f'%(max_results['F1@%0.2f'%(cfg.iou_thresholds[1])]),
                '%.4f'%(max_results['F1@%0.2f'%(cfg.iou_thresholds[2])])])
f.close()