In [7]:
import argparse
import os
import time
import shutil
import torch
import torchvision
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
from torch.nn.utils import clip_grad_norm
from torchvision.models import resnet101

from dataset import TSNDataSet
from models import TSN
from transforms import *
from opts import parser

best_prec1 = 0

class Params():
    def __init__(self):
        self.dataset = 'ucf101'
        self.arch    = 'resnet50'
        self.num_segments = 3
        self.gd       = 20
        self.lr       = 0.001
        self.momentum = 0.9
        self.weight_decay = 0.001
        self.lr_steps = [30, 60]
        self.epochs   = 80
        self.b        = 8
        self.j        = 8
        self.dropout  = 0.8
        self.snapshot_pref = 'ucf101_bninception_'
        self.workers   = 2
        self.root_path  = '/home/irfan/Desktop/Data/ucf101_standard_split_1/ucf101_standard_split_1'
        self.train_list = f'{self.root_path}/train.csv'
        self.val_list   = f'{self.root_path}/test.csv'
        self.modality   = 'RGB'
        self.consensus_type = 'avg'
        self.no_partialbn   = False
        self.gpus           = [torch.device('cuda')]
        self.device         = torch.device('cuda')
        self.start_epoch    = 0
        self.resume         = False
        self.evaluate       = True
        self.loss_type      = 'nll'
        self.clip_gradient  = None
        self.print_freq     = 300
        self.eval_freq      = 1
        
def main():
    global args, best_prec1
    args = Params()#parser.parse_args()

    if args.dataset == 'ucf101':
        num_class = 101
    elif args.dataset == 'hmdb51':
        num_class = 51
    elif args.dataset == 'kinetics':
        num_class = 400
    else:
        raise ValueError('Unknown dataset '+args.dataset)

    model = TSN(num_class, args.num_segments, args.modality,
                base_model=args.arch,
                consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn)
    
    #https://download.pytorch.org/models/resnet101-63fe2227.pth
    #model = resnet101(weights=torchvision.models.resnet.ResNet101_Weights)#.classifier
    #model.fc = torch.nn.Linear(in_features=2048, out_features=101, bias=True)

    crop_size = model.crop_size
    scale_size = model.scale_size
    input_mean = model.input_mean
    input_std = model.input_std
    policies = model.get_optim_policies()
    train_augmentation = model.get_augmentation()

    #model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    if args.resume:
        if os.path.isfile(args.resume):
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print(("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.evaluate, checkpoint['epoch'])))
        else:
            print(("=> no checkpoint found at '{}'".format(args.resume)))

    cudnn.benchmark = True

    # Data loading code
    if args.modality != 'RGBDiff':
        normalize = GroupNormalize(input_mean, input_std)
    else:
        normalize = IdentityTransform()

    if args.modality == 'RGB':
        data_length = 1
    elif args.modality in ['Flow', 'RGBDiff']:
        data_length = 5
        
    train_transform = torchvision.transforms.Compose([
                       train_augmentation,
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       normalize,
                   ])
    val_transform = torchvision.transforms.Compose([
                       GroupScale(int(scale_size)),
                       GroupCenterCrop(crop_size),
                       Stack(roll=args.arch == 'BNInception'),
                       ToTorchFormatTensor(div=args.arch != 'BNInception'),
                       normalize,
                   ])
    
    train_dataset = TSNDataSet(args.root_path, args.train_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
                   transform=train_transform)
    val_dataset  =  TSNDataSet(args.root_path, args.val_list, num_segments=args.num_segments,
                   new_length=data_length,
                   modality=args.modality,
                   image_tmpl="img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
                   random_shift=False,
                   transform=val_transform)
    
    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.b, shuffle=True,
        num_workers=args.j, pin_memory=True)
    
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=args.b//2, shuffle=False,
        num_workers=args.j, pin_memory=True)
    
    # define loss function (criterion) and optimizer
    if args.loss_type == 'nll':
        criterion = torch.nn.CrossEntropyLoss().cuda()
    else:
        raise ValueError("Unknown loss type")

    for group in policies:
        print(('group: {} has {} params, lr_mult: {}, decay_mult: {}'.format(
            group['name'], len(group['params']), group['lr_mult'], group['decay_mult'])))

    optimizer = torch.optim.SGD(policies,
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    state_dict = torch.load('_ucf101_bninception__rgb_model_best.pth.tar',map_location=args.device)['state_dict']
    model.load_state_dict(state_dict)
    validate(val_loader, model, criterion, 0)
    return model,val_loader




def validate(val_loader, model, criterion, iter, logger=None):
    batch_time = AverageMeter()
    losses     = AverageMeter()
    top1       = AverageMeter()
    top5       = AverageMeter()

    # switch to evaluate mode
    model.eval()
    model = model.to(args.device)
    end = time.time()
    for i, (input_var, target_var) in enumerate(val_loader):
        input_var  = input_var.to(args.device)
        target_var = target_var.to(args.device)

        # compute output
        output = model(input_var)
        loss = criterion(output, target_var)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(output.data, target_var, topk=(1,5))

        losses.update(loss.data, input_var.size(0))
        top1.update(prec1, input_var.size(0))
        top5.update(prec5, input_var.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            print(('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                   i, len(val_loader), batch_time=batch_time, loss=losses,
                   top1=top1, top5=top5)))

    print(('Testing Results: Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Loss {loss.avg:.5f}'
          .format(top1=top1, top5=top5, loss=losses)))

    return top1.avg


def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    filename = '_'.join((args.snapshot_pref, args.modality.lower(), filename))
    torch.save(state, filename)
    if is_best:
        best_name = '_'.join((args.snapshot_pref, args.modality.lower(), 'model_best.pth.tar'))
        shutil.copyfile(filename, best_name)


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    #import pdb;pdb.set_trace()
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].reshape(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res


if __name__ == '__main__':
    model,dataser = main()


Initializing TSN with base model: resnet50.
TSN Configurations:
    input_modality:     RGB
    num_segments:       3
    new_length:         1
    consensus_module:   avg
    dropout_ratio:      0.8
        
group: first_conv_weight has 1 params, lr_mult: 1, decay_mult: 1
group: first_conv_bias has 0 params, lr_mult: 2, decay_mult: 0
group: normal_weight has 53 params, lr_mult: 1, decay_mult: 1
group: normal_bias has 1 params, lr_mult: 2, decay_mult: 0
group: BN scale/shift has 2 params, lr_mult: 1, decay_mult: 0
Freezing BatchNorm2D except the first one.
Test: [0/946]	Time 0.936 (0.936)	Loss 1.3796 (1.3796)	Prec@1 50.000 (50.000)	Prec@5 100.000 (100.000)
Test: [300/946]	Time 0.030 (0.072)	Loss 1.3883 (1.1783)	Prec@1 25.000 (69.601)	Prec@5 100.000 (90.698)
Test: [600/946]	Time 0.028 (0.072)	Loss 0.0000 (1.4223)	Prec@1 100.000 (64.767)	Prec@5 100.000 (86.980)
Test: [900/946]	Time 0.041 (0.072)	Loss 0.6683 (1.4092)	Prec@1 50.000 (64.151)	Prec@5 100.000 (86.765)
Testing Results: Prec@1 

In [9]:
for img,lbl in dataser:
    break

In [12]:
out = model(img.to('cuda'))

In [14]:
img.shape,out.shape

(torch.Size([4, 9, 224, 224]), torch.Size([4, 101]))

In [18]:
batch_size = 1
x = torch.randn(batch_size, 9, 224, 224, requires_grad=True)
model = model.to('cpu')
torch_out = model(x.to('cpu'))
torch.onnx.export(model,               # model being run
                  x,                         # model input (or a tuple for multiple inputs)
                  "_ucf101_bninception__rgb_model_best.onnx",   # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=10,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
                                'output' : {0 : 'batch_size'}})

Freezing BatchNorm2D except the first one.




Freezing BatchNorm2D except the first one.


In [31]:
sm = torch.jit.script(model)

IndentationError: unexpected indent (<unknown>, line 1)

In [29]:
import onnxruntime

ort_session = onnxruntime.InferenceSession("_ucf101_bninception__rgb_model_best.onnx", providers=["CPUExecutionProvider"])

def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()

# compute ONNX Runtime output prediction
y = torch.randn(batch_size, 9, 224, 224, requires_grad=True)
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(y)}
ort_outs = ort_session.run(None, ort_inputs)

# compare ONNX Runtime and PyTorch results
np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)

print("Exported model has been tested with ONNXRuntime, and the result looks good!")

AssertionError: 
Not equal to tolerance rtol=0.001, atol=1e-05

Mismatched elements: 98 / 101 (97%)
Max absolute difference: 0.07035995
Max relative difference: 2.403498
 x: array([[-0.882186,  2.370037,  4.181107,  3.86592 ,  0.129871,  1.492176,
         0.007689, -1.299026,  0.798733, -2.507371, -2.653229, -0.514497,
        -1.101331,  0.681547, -1.615307,  0.053302, -0.320427, -3.424017,...
 y: array([[-0.916885,  2.353652,  4.179123,  3.909896,  0.148668,  1.458838,
         0.020534, -1.303972,  0.817456, -2.476382, -2.688036, -0.501032,
        -1.118268,  0.700103, -1.597446,  0.058127, -0.32084 , -3.455338,...

In [32]:
import torch
import torchvision

# An instance of your model.
#model = torchvision.models.resnet18()

# An example input you would normally provide to your model's forward() method.
example = torch.rand(1, 9, 224, 224)

# Use torch.jit.trace to generate a torch.jit.ScriptModule via tracing.
traced_script_module = torch.jit.trace(model, example)

In [33]:
traced_script_module.save("_ucf101_bninception__rgb_model_best.pt")

In [25]:
ort_inputs['input'].shape

(1, 9, 224, 224)

In [30]:
torch_out,ort_outs[0]

(tensor([[-0.8822,  2.3700,  4.1811,  3.8659,  0.1299,  1.4922,  0.0077, -1.2990,
           0.7987, -2.5074, -2.6532, -0.5145, -1.1013,  0.6815, -1.6153,  0.0533,
          -0.3204, -3.4240,  1.6528, -0.3679, -1.8884,  5.0871, -1.2560, -2.6041,
          -1.2286,  0.0591,  2.6265, -3.4647, -2.2038,  1.2696, -1.2700,  2.8332,
          -0.0432, -1.0135,  0.2152,  3.7315, -1.7265,  1.5266,  0.5255, -0.2066,
          -0.8698, -2.0443,  2.6716, -2.1056, -0.2042,  0.8584,  3.4560,  1.9974,
          -0.3578, -0.6230, -0.7251,  2.2614,  7.3260,  1.3420,  0.2488, -1.1402,
          -0.7861, -3.2351, -0.9871,  0.0257, -0.3929,  2.3378, -1.1859, -1.5633,
           0.1496, -0.8328, -0.9717,  1.9937, -1.2638, -0.4265, -1.9989, -0.8412,
          -0.2346, -1.4394,  1.2998, -1.1403, -0.2121,  0.7893, -1.0935, -3.1302,
          -0.9206,  0.8007, -1.5965,  0.9180,  0.6712,  0.9593,  0.3821, -2.7236,
           1.5280, -1.4498, -0.1849,  0.1984,  1.8024, -0.9313, -0.0371,  1.8654,
          -1.846

In [1]:
import onnx, onnxruntime

In [None]:
ort_session = onnxruntime.InferenceSession("super_resolution.onnx", providers=["CPUExecutionProvider"])

In [None]:
for i, (input, target) in enumerate(tl):
    print(i)

In [None]:
import cv2
cap = cv2.VideoCapture('/home/irfan/Desktop/Data/ucf101_standard_split_1/ucf101_standard_split_1/media/v_Kayaking_g17_c01.avi')

In [None]:
success,frame = cap.read()
success

In [None]:
for i, (input, target) in enumerate(val_loader):
    print()

In [None]:
#python main.py ucf101 RGB <ucf101_rgb_train_list> <ucf101_rgb_val_list> \
#   --arch BNInception --num_segments 3 \
#   --gd 20 --lr 0.001 --lr_steps 30 60 --epochs 80 \
#   -b 128 -j 8 --dropout 0.8 \
#   --snapshot_pref ucf101_bninception_ 
import torch
import torchvision
from dataset import TSNDataSet
from models import TSN
from transforms import Stack, ToTorchFormatTensor, IdentityTransform

class Params():
    def __init__(self):
        self.dataset = 'ucf101'
        self.arch    = 'BNInception'
        self.num_segments = 3
        self.gd       = 20
        self.lr       = 0.001
        self.lr_steps = [30, 60]
        self.epochs   = 80
        self.b        = 8
        self.j        = 8
        self.dropout  = 0.8
        self.snapshot_pref = 'ucf101_bninception_'
        self.workers   = 2
        self.train_list = '/home/irfan/Desktop/Data/ucf101_standard_split_1/ucf101_standard_split_1/test.csv'
        self.modality   = 'RGB'
        self.consensus_type = 'avg'
        self.no_partialbn   = False
        self.gpus           = [torch.device('cuda')]

In [None]:
args = Params()

num_class = 101
model = TSN(num_class, args.num_segments, args.modality,
                base_model     = args.arch,
                consensus_type = args.consensus_type, dropout = args.dropout, partial_bn = not args.no_partialbn)

crop_size  = model.crop_size
scale_size = model.scale_size
input_mean = model.input_mean
input_std  = model.input_std
policies   = model.get_optim_policies()
train_augmentation = model.get_augmentation()
model              = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()


data_length = 1
normalize = IdentityTransform()
transform = torchvision.transforms.Compose([
               train_augmentation,
               Stack(roll=args.arch == 'BNInception'),
               ToTorchFormatTensor(div=args.arch != 'BNInception'),
               normalize,
           ])

dataset = TSNDataSet("/home/irfan/Desktop/Data/ucf101_standard_split_1/ucf101_standard_split_1", args.train_list, num_segments=args.num_segments,
           new_length = data_length,
           modality   = args.modality,
           image_tmpl = "img_{:05d}.jpg" if args.modality in ["RGB", "RGBDiff"] else args.flow_prefix+"{}_{:05d}.jpg",
           transform  = transform)

train_loader = torch.utils.data.DataLoader(dataset,
                                           batch_size=args.b, 
                                           shuffle=True,
                                           num_workers=1,#args.j, 
                                           pin_memory=True)

In [None]:
for inp,tgt in train_loader:
    break

In [None]:
device = torch.device('cuda')
inp   = inp.to(device)
model = model.to(device)

In [None]:
out = model(inp)

In [None]:
_inp = inp.view((-1, 3) + inp.size()[-2:])

In [None]:
inp.shape,_inp.shape

In [None]:
import cv2
cap = cv2.VideoCapture('/home/irfan/Desktop/Data/ucf101_standard_split_1/ucf101_standard_split_1/media/v_Swing_g01_c04.avi')
success,frame = cap.read()
success

In [None]:
cap = cv2.VideoCapture('/home/irfan/Desktop/Data/ucf101_standard_split_1/ucf101_standard_split_1/media/v_Swing_g01_c04.avi')
success,frame = cap.read()
success

In [None]:
for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        #target = target.cuda(async=True)

In [None]:
target.cuda(async=True)

In [None]:
x,y = dataset.__getitem__(1)

In [None]:
y

In [None]:
def denormalize(img):
    img = img + img.min()
    img = img / img.max()
    img = torch.permute(img,(1,2,0))
    return img

In [None]:
from matplotlib import pyplot as plt
plt.imshow(denormalize(x[0][:3]))
plt.show()
plt.imshow(denormalize(x[0][3:6]))
plt.show()
plt.imshow(denormalize(x[0][6:9]))

In [None]:
from torchvision import transforms as T

In [None]:
T.ToPILImage()

In [None]:
from dataset import VideoRecord
record = VideoRecord(dataset.video_list[500],dataset.root_path)
r = dataset._get_test_indices(record)

In [None]:
record.num_frames//3,r

In [None]:
import numpy as np
tick = (record.num_frames - dataset.new_length + 1) / float(dataset.num_segments)
offsets = np.array([int(tick / 2.0 + tick * x) for x in range(dataset.num_segments)])
tick,offsets

In [None]:
dataset.num_segments

In [None]:
import numpy as np
if record.num_frames > dataset.num_segments + dataset.new_length - 1:
    tick = (record.num_frames - dataset.new_length + 1) / float(dataset.num_segments)
    offsets = np.array([int(tick / 2.0 + tick * x) for x in range(dataset.num_segments)])
else:
    offsets = np.zeros((dataset.num_segments,))

In [None]:
record.path

In [None]:
offsets

In [None]:
import cv2
lst = []
for i in range(5):
    lst+= []

In [None]:
import cv2
root = '/home/irfan/Desktop/Data/ucf101_standard_split_1/ucf101_standard_split_1/media/'
caps = []
pc,fc = 0,0
for x in open('/home/irfan/Desktop/Data/ucf101_standard_split_1/ucf101_standard_split_1/test.csv'):
    path =x.strip().split(',')[1]
    #print(f'{root}{path}')
    if '.avi' in path: 
        caps += [cv2.VideoCapture(f'{root}{path}')]
        success,_=caps[-1].read()
        if not success:
            fc+=1
            print('Failed !!!',fc,path)
        else:
            pc+=1
            print('Passed' , pc,path)
        #if pc >1000 or fc > 1000: break

In [None]:
x = open('/home/irfan/Desktop/Data/ucf101_standard_split_1/ucf101_standard_split_1/train.csv')
len(x.readlines())

In [None]:
import cv2
from matplotlib import pyplot as plt

In [None]:
cap = cv2.VideoCapture('/home/irfan/Desktop/Data/ucf101_standard_split_1/ucf101_standard_split_1/media/v_FieldHockeyPenalty_g09_c02.avi')
start_frame_number = 0
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame_number)
_,fm = cap.read()
plt.imshow(fm)

In [None]:
cap = cv2.VideoCapture('/home/irfan/Desktop/Data/ucf101_standard_split_1/ucf101_standard_split_1/media/v_FieldHockeyPenalty_g09_c02.avi')
start_frame_number = 100
cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame_number)
_,fm = cap.read()
plt.imshow(fm)

In [None]:
cap.set(cv2.CAP_PROP_POS_FRAMES, 50)
_,fm = cap.read()
plt.imshow(fm)

In [None]:
python main.py ucf101 RGB <ucf101_rgb_train_list> <ucf101_rgb_val_list> \
   --arch BNInception --num_segments 3 \
   --gd 20 --lr 0.001 --lr_steps 30 60 --epochs 80 \
   -b 128 -j 8 --dropout 0.8 \
   --snapshot_pref ucf101_bninception_ 

weight_url = 'bn_inception-9f5701afb96c8044.pth'
wts = torch.utils.model_zoo.load_url(weight_url)

In [None]:
/home/irfan/Desktop/Data/ucf101_standard_split_1/ucf101_standard_split_1/

In [None]:
wts['inception_4b_3x3_bn.bias'].squeeze().shape

In [None]:
import yaml
yaml.load('tf_model_zoo/bninception/bn_inception.yaml',Loader=yaml.Loader)

In [None]:
import tf_model_zoo

In [None]:
tf_model_zoo.__dict__