In [1]:
import catalyst
import collections
import os
import sys
import torch
from torchvision import transforms as T

from catalyst.dl import utils, AlchemyLogger, SupervisedRunner
from catalyst.dl.callbacks import AccuracyCallback, AUCCallback
from typing import Callable, Iterable, List, Tuple

print(f"torch: {torch.__version__}, catalyst: {catalyst.__version__}")

torch: 1.3.1, catalyst: 20.03



AlchemyRunner and SupervisedAlchemyRunner are deprecated; use AlchemyLogger instead (`from catalyst.dl import AlchemyLogger`)



In [2]:
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [3]:
BASE_DIR = '/home/dmitry/projects/dfdc'
SRC_DIR = os.path.join(BASE_DIR, 'src')
HDF5_DIR = '/media/dmitry/other/dfdc-crops/hdf5'

In [4]:
# src
sys.path.insert(0, os.path.join(BASE_DIR, 'vendors/Pytorch_Retinaface'))
sys.path.insert(0, SRC_DIR)

In [5]:
from dataset.hdf5 import HDF5Dataset
from dataset.sample import FrameSampler, BalancedSampler
from model.detector import basic_detector_256
from model.loss import combined_loss


can't resolve package from __spec__ or __package__, falling back on __name__ and __path__



In [6]:
is_fp16_used = False
is_alchemy_used = False

In [7]:
if is_alchemy_used:
    monitoring_params = dict(
        token   = None,
        project = 'hello_dawg',
        group   = 'baby_steps',
        experiment = 'like_a_catalyst')
    assert monitoring_params["token"] is not None
else:
    monitoring_params = None

In [8]:
def create_dataloader(bs: int, num_frames: int, real_fake_ratio: float, 
                      p_sparse_frames: float, chunks: Iterable[int]
                     ) -> torch.utils.data.DataLoader:
    dirs = [f'dfdc_train_part_{i}' for i in chunks]
    
    sampler = FrameSampler(num_frames, 
                           real_fake_ratio=real_fake_ratio, 
                           p_sparse=p_sparse_frames)
    tfms = T.Compose([
        T.ToTensor(),
    ])
    ds = HDF5Dataset(HDF5_DIR, size=(num_frames, 256), 
                     sampler=sampler, x_tfms=tfms, sub_dirs=dirs)
    print('Num samples: {}'.format(len(ds)))
    
    batch_sampler = torch.utils.data.BatchSampler(
        BalancedSampler(ds),
        batch_size=bs, 
        drop_last=True
    )
    dl = torch.utils.data.DataLoader(ds, batch_sampler=batch_sampler)
    return dl

In [9]:
train_dl = create_dataloader(
    bs=12, 
    num_frames=10, 
    real_fake_ratio=100/30, 
    p_sparse_frames=0.75, 
    chunks=range(5,30)
)

Num samples: 61779


In [10]:
valid_dl = create_dataloader(
    bs=12, 
    num_frames=10, 
    real_fake_ratio=100/30, 
    p_sparse_frames=1., 
    chunks=range(0,5)
)

Num samples: 7937


In [11]:
loaders = collections.OrderedDict()
loaders["train"] = train_dl
loaders["valid"] = valid_dl

In [12]:
model = basic_detector_256()

Using Conv3D pooling: 2 layers


In [13]:
optim = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = combined_loss
scheduler = torch.optim.lr_scheduler.MultiStepLR(
    optim, milestones=[9], gamma=0.3
)

In [14]:
num_epochs = 10
logdir = "./logs/classification"

if is_fp16_used:
    fp16_params = dict(opt_level="O1")
else:
    fp16_params = None

In [15]:
callbacks = [
    AccuracyCallback(num_classes=2),
]

if is_alchemy_used:
    callbacks.append(AlchemyLogger(**monitoring_params))

In [16]:
device = torch.device('cuda')
# model = model.to(device)

In [17]:
# torch.cuda.set_device(device)

In [18]:
class XyevyRunner(SupervisedRunner):
    
    def forward(self, batch, **kwargs):
        output = self.model(
            batch[self.input_key], 
            batch[self.target_key], 
            **kwargs)
        output = self._process_output(output)
        return output 

In [19]:
runner = XyevyRunner(device=device)

In [20]:
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optim,
    scheduler=scheduler,
    loaders=loaders,
    # callbacks=[],
    logdir=logdir,
    num_epochs=num_epochs,
#     main_metric='loss',
#     minimize_metric=True,
    fp16=fp16_params,
    verbose=True,
)

1/10 * Epoch (train):   0% 0/5148 [00:00<?, ?it/s]

KeyError: 'loss'

In [None]:
SupervisedRunner??