## CIFAR 10

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
from fastai.conv_learner import *
from fastai.models.cifar10.preact_resnet import *
torch.backends.cudnn.benchmark = True
PATH = Path("data/cifar10/")
os.makedirs(PATH,exist_ok=True)

In [3]:
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
stats = (np.array([ 0.4914 ,  0.48216,  0.44653]), np.array([ 0.24703,  0.24349,  0.26159]))

bs=512
sz=32
workers=7

In [4]:
import torchvision.transforms as transforms
import torchvision.datasets as datasets

def pad(img, p=4, padding_mode='reflect'):
        return Image.fromarray(np.pad(np.asarray(img), ((p, p), (p, p), (0, 0)), padding_mode))
def to_pil(img): return Image.fromarray(img)

def torch_tfms(size, conv_pil=False, to_numpy=False):
    mean,std=[0.4914 , 0.48216, 0.44653], [0.24703, 0.24349, 0.26159]
    normalize = transforms.Normalize(mean=mean, std=std)
    tfms = [transforms.ToTensor(), normalize]
    
    # Torch transforms with fastai dl
    if to_numpy: tfms = [np.array, Normalize(mean,std), lambda x: x[0].T]
        
    aug_tfms = [
#         pad, # TODO: use `padding` rather than assuming 4
#         transforms.RandomCrop(size),
# #         transforms.ColorJitter(.25,.25,.25),
# #         transforms.RandomRotation(2),
#         transforms.RandomHorizontalFlip(),
    ]
    scale_size = 40
    padding = int((scale_size - size) / 2)
    
    train_tfms = transforms.Compose(aug_tfms + tfms)
    train_tfms.sz = size
    val_tfms = transforms.Compose(tfms)
    val_tfms.sz = size
    if conv_pil:
        train_tfms.transforms.insert(0, to_pil)
        val_tfms.transforms.insert(0, to_pil)
    return train_tfms, val_tfms
    
def torch_ds(data_path, tfms):
    train_tfms, val_tfms = tfms
    
    # Data loading code
    traindir = os.path.join(data_path, 'train')
    valdir = os.path.join(data_path, 'test')

    train_dataset = datasets.ImageFolder(traindir, train_tfms)
    val_dataset = datasets.ImageFolder(valdir, val_tfms)
    return train_dataset, val_dataset

def torch_loader(data_path, datasets):
    train_dataset, val_dataset = datasets
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=bs, shuffle=True,
        num_workers=workers, pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=bs*2, shuffle=False,
        num_workers=workers, pin_memory=True)

    data = ModelData(data_path, train_loader, val_loader)
    return data


In [5]:
def get_learner(data):
    m = PreActResNet18()
    learn = ConvLearner.from_model_data(m, data)
    learn.crit = nn.CrossEntropyLoss()
    learn.metrics = [accuracy]
    learn.half()
    return learn

### ZombieLoader

In [85]:
import torch, queue
from torch.utils.data.sampler import SequentialSampler, RandomSampler, BatchSampler
# from .imports import *
# from .core import *
import collections,sys,traceback,threading
from fastai.executors import LazyThreadPoolExecutor

string_classes = (str, bytes)


def get_tensor(batch, pin, half=False):
    if isinstance(batch, (np.ndarray, np.generic)):
        batch = T(batch, half=half, cuda=False).contiguous()
        if pin: batch = batch.pin_memory()
        return to_gpu(batch)
    elif isinstance(batch, string_classes):
        return batch
    elif isinstance(batch, collections.Mapping):
        return {k: get_tensor(sample, pin, half) for k, sample in batch.items()}
    elif isinstance(batch, collections.Sequence):
        return [get_tensor(sample, pin, half) for sample in batch]
    raise TypeError(f"batch must contain numbers, dicts or lists; found {type(batch)}")


class ZombieDataLoader(object):
    def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None, pad_idx=0,
                 num_workers=None, pin_memory=False, drop_last=False, pre_pad=True, half=False,
                 transpose=False, transpose_y=False, collate_fn=None, multiprocess=False):
        self.dataset,self.batch_size,self.num_workers = dataset,batch_size,num_workers
        self.pin_memory,self.drop_last,self.pre_pad = pin_memory,drop_last,pre_pad
        self.transpose,self.transpose_y,self.pad_idx,self.half = transpose,transpose_y,pad_idx,half

#         if batch_sampler is not None:
#             if batch_size > 1 or shuffle or sampler is not None or drop_last:
#                 raise ValueError('batch_sampler is mutually exclusive with '
#                                  'batch_size, shuffle, sampler, and drop_last')

        if sampler is not None and shuffle:
            raise ValueError('sampler is mutually exclusive with shuffle')

        if batch_sampler is None:
            if sampler is None:
                sampler = RandomSampler(dataset) if shuffle else SequentialSampler(dataset)
            batch_sampler = BatchSampler(sampler, batch_size, drop_last)

        if num_workers is None:
            self.num_workers = num_cpus()

        self.sampler = sampler
        self.batch_sampler = batch_sampler
        self.collate_fn = self.np_collate if collate_fn is None else collate_fn
        self.multiprocess = multiprocess

    def __len__(self): return len(self.batch_sampler)

    def jag_stack(self, b):
        if len(b[0].shape) not in (1,2): return np.stack(b)
        ml = max(len(o) for o in b)
        if min(len(o) for o in b)==ml: return np.stack(b)
        res = np.zeros((len(b), ml), dtype=b[0].dtype) + self.pad_idx
        for i,o in enumerate(b):
            if self.pre_pad: res[i, -len(o):] = o
            else:            res[i,  :len(o)] = o
        return res

    def np_collate(self, batch):
        b = batch[0]
        if isinstance(b, (np.ndarray, np.generic)): return self.jag_stack(batch)
        elif isinstance(b, (int, float)): return np.array(batch)
        elif isinstance(b, string_classes): return batch
        elif isinstance(b, collections.Mapping):
            return {key: self.np_collate([d[key] for d in batch]) for key in b}
        elif isinstance(b, collections.Sequence):
            return [self.np_collate(samples) for samples in zip(*batch)]
        raise TypeError(("batch must contain numbers, dicts or lists; found {}".format(type(b))))

    def get_batch(self, indices):
        res = self.collate_fn([self.dataset[i] for i in indices])
        if self.transpose:   res[0] = res[0].T
        if self.transpose_y: res[1] = res[1].T
        return res

    def __iter__(self):
        if self.num_workers==0:
            for batch in map(self.get_batch, iter(self.batch_sampler)):
                yield get_tensor(batch, self.pin_memory, self.half)
        else:
            if self.multiprocess:
                with ProcessPoolExecutor(max_workers=self.num_workers) as e:
                    # avoid py3.6 issue where queue is infinite and can result in memory exhaustion
                    for batch in e.map(self.get_batch, iter(self.batch_sampler)): 
                        if self.collate_fn == self.np_collate: 
                            yield get_tensor(batch, self.pin_memory, self.half)
                        else:
                            yield batch
            else:
                with LazyThreadPoolExecutor(max_workers=self.num_workers) as e:
                    # avoid py3.6 issue where queue is infinite and can result in memory exhaustion
                    for batch in e.map(self.get_batch, iter(self.batch_sampler)): 
                        if self.collate_fn == self.np_collate: 
                            yield get_tensor(batch, self.pin_memory, self.half)
                        else:
                            yield batch
#                     # avoid py3.6 issue where queue is infinite and can result in memory exhaustion
#                     for c in chunk_iter(iter(self.batch_sampler), self.num_workers*10):
#                         for batch in e.map(self.get_batch, c):
# #                             yield batch
#                             yield get_tensor(batch, self.pin_memory, self.half)



In [86]:
def fake_collate(indices):
    return torch.ones(512, 3, 32, 32).cuda(), torch.ones(512).long().cuda()

In [87]:
tfms = tfms_from_stats(stats, sz, aug_tfms=[])
# tfms = tfms_from_stats(stats, sz, aug_tfms=[RandomCrop(sz), RandomFlip()], pad=sz//8)
zombiedl = ImageClassifierData.from_paths(PATH, val_name='test', tfms=tfms, bs=bs, num_workers=workers)

In [88]:
fake_sampler = np.ones((98,2), dtype=np.int).tolist()

In [89]:
collate_fn = torch.utils.data.DataLoader(None).collate_fn
tdl = ZombieDataLoader(zombiedl.trn_ds, batch_size=bs, shuffle=True,
                 num_workers=workers, pin_memory=True, collate_fn=fake_collate, multiprocess=False, batch_sampler=fake_sampler)
vdl = ZombieDataLoader(zombiedl.val_ds, batch_size=bs, shuffle=False,
                 num_workers=workers, pin_memory=True, collate_fn=fake_collate, multiprocess=False, batch_sampler=fake_sampler)

In [90]:
# default_collate = torch.utils.data.DataLoader(None).collate_fn
# tdl = ZombieDataLoader(zombiedl.trn_ds, batch_size=bs, shuffle=True,
#                  num_workers=workers, pin_memory=True, collate_fn=default_collate, multiprocess=False)
# vdl = ZombieDataLoader(zombiedl.val_ds, batch_size=bs, shuffle=False,
#                  num_workers=workers, pin_memory=True, collate_fn=default_collate, multiprocess=False)

In [91]:
n_batches = len(list(iter(tdl.batch_sampler))); n_batches

98

In [92]:
x,y = next(iter(tdl)); x.shape, y.shape

(torch.Size([512, 3, 32, 32]), torch.Size([512]))

In [93]:
zombiedl.trn_dl = tdl
zombiedl.val_dl = vdl

In [94]:
learn = get_learner(zombiedl)

In [95]:
%time learn.fit(lrs=1, n_cycle=1, wds=1e-4, cycle_len=2)

HBox(children=(IntProgress(value=0, description='Epoch', max=2), HTML(value='')))

epoch      trn_loss   val_loss   accuracy                    
    0      0.006953   0.0        1.0       
    1      0.000844   0.0        1.0                          

CPU times: user 48.7 s, sys: 1min 55s, total: 2min 44s
Wall time: 24.2 s


[array([0.]), 1.0]

In [85]:
torch.__version__

'0.3.1.post2'