In [1]:
import os, sys, argparse
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname('..'))))


In [2]:
from torchvision.models import resnet50
from thop import profile
import torch
import torch.nn as nn
from models import TSN
from opts import parser

In [3]:
train_path="data/something_train.txt"
val_path="data/something_val.txt"

In [4]:
dataset_name="something"
netType1="TSM"
netType2="MS"

batch_size=1
learning_rate=0.01
num_segments_8=8
num_segments_16=16
num_segments_32=32
num_segments_128=128
mode=1
dropout=0.3
iter_size=1
num_workers=5

In [5]:
input1 = torch.rand(num_segments_8,3,224,224).cuda()
input2 = torch.rand(num_segments_16,3,224,224).cuda()
input3 = torch.rand(num_segments_32,3,224,224).cuda()
input4 = torch.rand(num_segments_128,3,224,224).cuda()

In [6]:
sys.argv = ['main.py', dataset_name, 'RGB', train_path, val_path, '--arch',
            str(netType1), '--num_segments', str(num_segments_8), '--mode', str(mode),
            '--gd', '200', '--lr', str(learning_rate), '--lr_steps',
            '20', '30', '--epochs', '35', '-b', str(batch_size), '-i',
            str(iter_size), '-j', str(num_workers), '--dropout',
            str(dropout),
            '--consensus_type', 'avg', '--eval-freq', '1', '--rgb_prefix', 'img_',
            '--pretrained_parts', 'finetune', '--no_partialbn',
            '-p', '20', '--nesterov', 'True']

In [7]:
args = parser.parse_args()

In [8]:
args_dict = args.__dict__
print("------------------------------------")
print(args.arch+" Configurations:")
for key in args_dict.keys():
    print("- {}: {}".format(key, args_dict[key]))
print("------------------------------------")

------------------------------------
TSM Configurations:
- dataset: something
- modality: RGB
- train_list: data/something_train.txt
- val_list: data/something_val.txt
- arch: TSM
- num_segments: 8
- mode: 1
- consensus_type: avg
- pretrained_parts: finetune
- k: 3
- dropout: 0.3
- loss_type: nll
- rep_flow: False
- epochs: 35
- batch_size: 1
- iter_size: 1
- lr: 0.01
- lr_steps: [20.0, 30.0]
- momentum: 0.9
- weight_decay: 0.0005
- clip_gradient: 200.0
- no_partialbn: True
- nesterov: True
- print_freq: 20
- eval_freq: 1
- workers: 5
- resume: 
- evaluate: False
- snapshot_pref: 
- val_output_folder: 
- start_epoch: 0
- gpus: None
- flow_prefix: img_
- rgb_prefix: img_
------------------------------------


In [9]:
if args.dataset == 'ucf101':
    num_class = 101
    rgb_read_format = "{:05d}.jpg"
elif args.dataset == 'hmdb51':
    num_class = 51
    rgb_read_format = "{:05d}.jpg"        
elif args.dataset == 'kinetics':
    num_class = 400
    rgb_read_format = "{:05d}.jpg"
elif args.dataset == 'something':
    num_class = 174
    rgb_read_format = "{:05d}.jpg"
elif args.dataset == 'tinykinetics':
    num_class = 150
    rgb_read_format = "{:05d}.jpg"        
else:
    raise ValueError('Unknown dataset '+args.dataset)

In [10]:
TSM_8frame = TSN(num_class, args.num_segments, args.pretrained_parts, args.modality,
                base_model=netType1,
                consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn).cuda()
MS_8frame = TSN(num_class, args.num_segments, args.pretrained_parts, args.modality,
                base_model=netType2,
                consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn).cuda()


Initializing TSN with base model: TSM.
TSN Configurations:
    input_modality:     RGB
    num_segments:       8
    new_length:         1
    consensus_module:   avg
    dropout_ratio:      0.3
        

Initializing TSN with base model: MS.
TSN Configurations:
    input_modality:     RGB
    num_segments:       8
    new_length:         1
    consensus_module:   avg
    dropout_ratio:      0.3
        


In [11]:
args.num_segments=num_segments_16

In [12]:
TSM_16frame = TSN(num_class, args.num_segments, args.pretrained_parts, args.modality,
                base_model=netType1,
                consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn).cuda()
MS_16frame = TSN(num_class, args.num_segments, args.pretrained_parts, args.modality,
                base_model=netType2,
                consensus_type=args.consensus_type, dropout=args.dropout, partial_bn=not args.no_partialbn).cuda()



Initializing TSN with base model: TSM.
TSN Configurations:
    input_modality:     RGB
    num_segments:       16
    new_length:         1
    consensus_module:   avg
    dropout_ratio:      0.3
        

Initializing TSN with base model: MS.
TSN Configurations:
    input_modality:     RGB
    num_segments:       16
    new_length:         1
    consensus_module:   avg
    dropout_ratio:      0.3
        


In [13]:
# temperature
temperature = 100

In [14]:
flops1, params1 = profile(TSM_8frame, inputs=(input1, temperature), verbose=False)
flops2, params2 = profile(MS_8frame, inputs=(input1, temperature), verbose=False)
flops3, params3 = profile(TSM_16frame, inputs=(input2, temperature), verbose=False)
flops4, params4 = profile(MS_16frame, inputs=(input2, temperature), verbose=False)

No BN layer Freezing.
No BN layer Freezing.
No BN layer Freezing.
No BN layer Freezing.
No BN layer Freezing.
No BN layer Freezing.
No BN layer Freezing.
No BN layer Freezing.


In [15]:
def human_format(num):
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    # add more suffixes if you need them
    return '%.3f%s' % (num, ['', 'K', 'M', 'G', 'T', 'P'][magnitude])

In [16]:
print('Models\tFrames\tFLOPs\tParams')
print('='*30)
print('%s\t%d\t%s\t%s' % (netType1, num_segments_8, human_format(flops1), human_format(params1)))
print('%s\t%d\t%s\t%s' % (netType2, num_segments_8, human_format(flops2), human_format(params2)))
print('%s\t%d\t%s\t%s' % (netType1, num_segments_16, human_format(flops3), human_format(params3)))
print('%s\t%d\t%s\t%s' % (netType2, num_segments_16, human_format(flops4), human_format(params4)))

Models	Frames	FLOPs	Params
TSM	8	14.589G	11.266M
MS	8	14.883G	11.287M
TSM	16	29.178G	11.266M
MS	16	29.788G	11.287M
