In [1]:
from typing import Callable, Dict, Optional
import functools
import glob
from pathlib import Path

from catalyst import utils
from catalyst.contrib.data.cv import ImageReader
from catalyst.data import PathsDataset, ScalarReader, ReaderCompose


# def get_label_fn(fn: str) -> str:
#     return Path(fn).parent.name
#
#
# def get_class_fn(fn: str, label2class: Dict[str, int]) -> int:
#     return label2class[get_label_fn(fn)]
#
#
class ImageFolderDataset(PathsDataset):
    def __init__(
        self, rootpath: str, transform: Optional[Callable[[Dict], Dict]] = None
    ) -> None:
        files = glob.iglob(f"{rootpath}/**/*", recursive=False)  # TODO: set to `true`?
        images = sorted(filter(utils.has_image_extension, files))

        labels = sorted({Path(f).parent.name for f in images})
        label2class = {label: index for index, label in enumerate(labels)}

        super().__init__(
            filenames=images,
            open_fn=ReaderCompose([
                ImageReader(input_key="image", rootpath=rootpath),
                ScalarReader(
                    input_key="targets",  # TODO: add `target_key`
                    output_key="targets",
                    dtype=int,
                    default_value=-1,
                ),
            ]),
            label_fn=lambda fn: label2class[Path(fn).parent.name],  # TODO: replace lambda with func
            # label_fn=functools.partial(get_class_fn, label2class=label2class),
            features_key="image",
            dict_transform=transform,
        )

  from pandas import Panel


---

In [2]:
from typing import Dict
import collections
import albumentations as A
import cv2
import torch


bs = 3
num_workers = 0

image_size = 160

transform = A.Compose([
    A.LongestMaxSize(max_size=image_size),
    A.PadIfNeeded(
        min_height=image_size,
        min_width=image_size,
        border_mode=cv2.BORDER_CONSTANT,
        value=255,  # pad with white as in original dataset
    ),
    A.Normalize(),
    A.pytorch.ToTensor(),
])

def data_transfrom(dict_: Dict) -> Dict:
    return transform(**dict_)


loaders = collections.OrderedDict()

trainset = ImageFolderDataset(rootpath=f'data/imagenette2-{image_size}/train/', transform=data_transfrom)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=bs, shuffle=True, num_workers=num_workers)

testset = ImageFolderDataset(rootpath=f'data/imagenette2-{image_size}/val/', transform=data_transfrom)
testloader = torch.utils.data.DataLoader(testset, batch_size=bs, shuffle=False, num_workers=num_workers)

loaders["train"] = trainloader
loaders["valid"] = testloader

In [3]:
import pretrainedmodels
from torch import nn


def get_model(model_name: str, num_classes: int, pretrained: str = "imagenet"):
    model_fn = pretrainedmodels.__dict__[model_name]
    model = model_fn(num_classes=1000, pretrained=pretrained)
    
    model.fc = nn.Sequential()
    dim_feats = model.last_linear.in_features
    model.last_linear = nn.Linear(dim_feats, num_classes)

    return model


model = get_model(model_name="resnet18", num_classes=10)

In [4]:
from catalyst.dl.runner import SupervisedRunner

# experiment setup
num_epochs = 3
logdir = "./logs/cifar_simple_notebook_1"

# model, criterion, optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

# model runner
runner = SupervisedRunner(input_key='image')

# model training
runner.train(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    loaders=loaders,
    logdir=logdir,
    num_epochs=num_epochs,
    verbose=True
)

1/3 * Epoch (train): 100% 3157/3157 [22:48<00:00,  2.31it/s, loss=1.898]
1/3 * Epoch (valid): 100% 1309/1309 [01:18<00:00, 16.64it/s, loss=10.976]   
[2020-07-28 16:23:15,104] 
1/3 * Epoch 1 (_base): lr=0.0010 | momentum=0.9000
1/3 * Epoch 1 (train): loss=1.8749
1/3 * Epoch 1 (valid): loss=2.4104
Early exiting                                                          
2/3 * Epoch (train):  25% 792/3157 [05:43<17:58,  2.19it/s, loss=3.044]