In [1]:
import os
import sys
import inspect
from pathlib import Path
import time

current_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parent_dir = os.path.dirname(current_dir)
sys.path.insert(0, parent_dir) 

PARENT_DIR = Path(parent_dir)

# App
# from dataloader.dali_loader import DaliLoader

# Third Party
import numpy as numpy
import torchvision.transforms as transforms
import torch
from tqdm import tqdm, tqdm_notebook

In [2]:
import os
import argparse

# Third Party
import numpy as numpy
import torchvision.transforms as transforms
import torch

# NVIDIA
from nvidia.dali.pipeline import Pipeline
import nvidia.dali.ops as ops
import nvidia.dali.types as types
from nvidia.dali.plugin import pytorch


class VideoReaderPipeline(Pipeline):
    def __init__(self, batch_size, sequence_length, num_threads, device_id, file_root, crop_size, transforms=None, filenames=[]):
        super(VideoReaderPipeline, self).__init__(batch_size, num_threads, device_id, seed=12)
        self.reader = ops.VideoReader(
            device='gpu',
            file_root=file_root,
            sequence_length=sequence_length,
            normalized=True,
            random_shuffle=True,
            image_type=types.RGB,
            dtype=types.UINT8,
            initial_fill=16,
#             filenames=filenames
        )

        self.crop = ops.Crop(device="gpu", crop=crop_size, output_dtype=types.FLOAT)
        self.transpose = ops.Transpose(device="gpu", perm=[3, 0, 1, 2])
        self.uniform = ops.Uniform(range=(0.0, 1.0))
        self.flip = ops.Flip(device="gpu", horizontal=1, vertical=0)
        # self.normalize = ops.NormalizePermute(
        #     device="gpu",
        #     mean=[0.485, 0.456, 0.406],
        #     std=[0.229, 0.224, 0.225],
        #     width=224,
        #     height=224
        # )
        self.cmn = ops.CropMirrorNormalize(
             device="gpu",
             output_dtype=types.FLOAT,
        #     # output_layout=types.NCHW,
             crop=(224, 224),
             image_type=types.RGB,
             mean=[0.485, 0.456, 0.406],
             std=[0.229, 0.224, 0.225]
        )

    def define_graph(self):
        inputs, labels = self.reader(name='Reader')
        # output = self.flip(inputs)
        output = self.crop(inputs, crop_pos_x=self.uniform(), crop_pos_y=self.uniform())
#         output = self.transpose(output)
        # flipped = self.flip(inputs)
        # output = self.cmn(inputs)
        return output, labels


class DaliLoader():
    def __init__(self, batch_size, file_root, sequence_length, crop_size, transforms=None, filenames=[]):
        # container_files = [file_root + '/' + f for f in os.listdir(file_root)]

        self.pipeline = VideoReaderPipeline(
            batch_size=batch_size,
            sequence_length=sequence_length,
            num_threads=2,
            device_id=0,
            file_root=file_root,
            filenames=filenames,
            crop_size=crop_size,
            transforms=transforms
        )
        self.pipeline.build()
        self.epoch_size = self.pipeline.epoch_size('Reader')
        self.dali_iterator = pytorch.DALIGenericIterator(
            self.pipeline,
            ["data", "label"],
            self.epoch_size,
            auto_reset=True
        )

    def __len__(self):
        return int(self.epoch_size)

    def __iter__(self):
        return self.dali_iterator.__iter__()

In [3]:
batch_size = 10
file_root = Path(parent_dir) / 'datasets'/ 'UCF101-MP4-Sample'
sequence_length = 1
crop_size = 224
print([str(file_root / 'Archery'/ f ) for f in os.listdir(str(file_root / 'Archery'))])
loader = DaliLoader(
    batch_size,
    str(file_root),
    sequence_length,
    crop_size,
    filenames=[str(file_root / 'Archery'/ f ) for f in os.listdir(str(file_root / 'Archery'))]
)

['/two-stream-action-recognition/datasets/UCF101-MP4-Sample/Archery/v_Archery_g01_c06.mp4', '/two-stream-action-recognition/datasets/UCF101-MP4-Sample/Archery/v_Archery_g01_c07.mp4', '/two-stream-action-recognition/datasets/UCF101-MP4-Sample/Archery/v_Archery_g01_c02.mp4', '/two-stream-action-recognition/datasets/UCF101-MP4-Sample/Archery/v_Archery_g01_c05.mp4', '/two-stream-action-recognition/datasets/UCF101-MP4-Sample/Archery/v_Archery_g01_c01.mp4', '/two-stream-action-recognition/datasets/UCF101-MP4-Sample/Archery/v_Archery_g01_c03.mp4', '/two-stream-action-recognition/datasets/UCF101-MP4-Sample/Archery/v_Archery_g01_c04.mp4']


In [4]:
for i, inputs in enumerate(loader):
    data = inputs[0]["data"]
    label = inputs[0]["label"]
    break

In [5]:
x = torch.squeeze(data).permute(0, 3, 1, 2)
# tr = transforms.Compose([
#     transforms.Normalize(
#         mean=[0.485, 0.456, 0.406],
#         std=[0.229, 0.224, 0.225]
#     )
# ])
# x = tr(x)
x.shape

torch.Size([10, 3, 224, 224])

In [6]:
# App
from network import resnet101

# Pytorch
import torch.nn as nn
import torch
import torch.backends.cudnn as cudnn
from torch.autograd import Variable
from torch.optim.lr_scheduler import ReduceLROnPlateau

import torchvision.transforms as transforms
import torchvision.models as models

cudnn.benchmark = True

In [7]:
LR = 5e-4
RESUME_PATH = PARENT_DIR / 'models' / 'spatial_resnet101.tar'
NB_EPOCHS = 2

In [8]:
def build_model():
    model = resnet101(pretrained=True, channel=3).cuda()
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(
        model.parameters(),
        LR,
        momentum=0.9
    )
    scheduler = ReduceLROnPlateau(
        optimizer,
        'min',
        patience=1,
        verbose=True
    )
    return model, criterion, optimizer, scheduler

In [9]:
model, criterion, optimizer, scheduler = build_model()

In [10]:
def get_resume():
    if not RESUME_PATH.exists():
        print("==> no checkpoint found at '{}'".format(RESUME_PATH))
        return
    
    checkpoint = torch.load(RESUME_PATH)
    start_epoch = checkpoint['epoch']
    best_prec1 = checkpoint['best_prec1']
    
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    return checkpoint, start_epoch, best_prec1

In [11]:
checkpoint, start_epoch, best_prec1 = get_resume()

In [12]:
checkpoint.keys()

dict_keys(['optimizer', 'epoch', 'best_prec1', 'state_dict'])

In [13]:
# print(start_epoch, best_prec1, checkpoint['optimizer'].keys(), checkpoint['optimizer']['param_groups'])
# print(model.parameters())
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [14]:
def train_epoch():
    start = time.time()
    current_time = start
    model.train()
    
    progress = tqdm_notebook(loader)
    
    for i, inputs in enumerate(progress):
        data = inputs[0]["data"]
        label = inputs[0]["label"].flatten()
        
        data_tr = torch.squeeze(data).permute(0, 3, 1, 2)
#         data_tr = tr(data_tr)
        
        label = label.cuda(async=True)
        
        label = Variable(label).cuda().type(torch.long)
        data_var = Variable(data_tr).cuda()
        
        output = model(data_var)
        loss = criterion(output, label)
        
        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
#         print(input_var)
        
        current_time = time.time()
#         if i > 500:
#             break
        
    return data, label

In [15]:
data, label = train_epoch()

HBox(children=(IntProgress(value=0, max=3367), HTML(value='')))




In [16]:
label

tensor([2, 0, 2, 2, 1, 0, 0, 1, 0, 2], device='cuda:0')

In [17]:
import dataloader

In [18]:
data_loader = dataloader.spatial_dataloader(
    BATCH_SIZE=20,
    num_workers=1,
    path='/UCF101/jpegs/jpegs_256/',
    ucf_list='/two-stream-action-recognition/UCF_list/',
    ucf_split='01'
)

==> (Training video, Validation video):( 287 117 )


In [19]:
train, val, test = data_loader.run()

==> sampling testing frames
==> Training data : 287 frames
torch.Size([3, 224, 224])
==> Validation data : 2223 frames
torch.Size([3, 224, 224])


  "please use transforms.Resize instead.")


In [20]:
for item in train:
    break

In [21]:
item[1]

tensor([2, 2, 0, 0, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 0, 1, 1, 1, 1])