In [1]:
from time import time
from tqdm import tqdm
import torch
import torch.nn as nn

from models.modules import SeparableConv2d

In [2]:
def test_block(block, ch=128):
    model = nn.Sequential(*[block(ch, ch, kernel_size=3, stride=1, padding=1) for _ in range(50)])
    model = model.cuda()
    model.train()
    
    # warmup
    noise = torch.rand(4, ch, 100, 100).cuda()
    noise = model(noise)
    noise.sum().backward()

    torch.cuda.reset_max_memory_allocated()
    tic = time()
    for _ in tqdm(range(100)):
        noise = torch.rand(4, ch, 80, 80).cuda()
        noise = model(noise)
        noise.sum().backward()
    elapsed = time() - tic
    memory = torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024

    print('number of parameters:', sum([p.numel() for p in model.parameters()]))
    print(f'time = {elapsed:2f}s')
    print(f'memory = {memory:2f}GB')

In [3]:
test_block(nn.Conv2d, ch=32)
test_block(SeparableConv2d, ch=32)

100%|██████████| 100/100 [00:05<00:00, 19.18it/s]
  2%|▏         | 2/100 [00:00<00:05, 16.70it/s]

number of parameters: 462400
time = 5.217695s
memory = 0.179130GB


100%|██████████| 100/100 [00:04<00:00, 20.68it/s]

number of parameters: 67200
time = 4.836754s
memory = 0.314942GB





In [4]:
test_block(nn.Conv2d, ch=128)
test_block(SeparableConv2d, ch=128)

100%|██████████| 100/100 [00:29<00:00,  3.42it/s]


number of parameters: 7379200
time = 29.224858s
memory = 0.759631GB


100%|██████████| 100/100 [00:19<00:00,  5.09it/s]

number of parameters: 883200
time = 19.647464s
memory = 1.263967GB



