In [1]:
#default_exp data.pipeline

In [2]:
#export
from local.imports import *
from local.test import *
from local.core import *
from local.notebook.showdoc import show_doc

In [3]:
#hide
torch.cuda.set_device(int(os.environ.get('DEFAULT_GPU') or 0))

# Transforms and Pipeline

> Low-level transform pipelines

The classes here provide functionality for creating *partially reversible functions*, which we call `Transform`s. By "partially reversible" we mean that a transform can be `decode`d, creating a form suitable for display. This is not necessarily identical to the original form (e.g. a transform that changes a byte tensor to a float tensor does not recreate a byte tensor when decoded, since that may lose precision, and a float tensor can be displayed already.)

Classes are also provided and for composing transforms, and mapping them over collections. The following functionality is provided:

- A `Transform` is created with an `encodes` and potentially `decodes` function. 
- `Pipeline` is a transform which composes transforms
- `TfmdList` takes a collection and a transform, and provides an indexer (`__getitem__`) which dynamically applies the transform to the collection items.
- `Tuplify` is a special `Trannsform` that takes a list of list of transforms or a list of `Pipeline`s, then aapplies them to the element it receives to return a tuple.

## Convenience functions

In [4]:
#export
def get_func(t, name, *args, **kwargs):
    "Get the `t.name` (potentially partial-ized with `args` and `kwargs`) or `noop` if not defined"
    f = getattr(t, name, noop)
    return f if not (args or kwargs) else partial(f, *args, **kwargs)

This works for any kind of `t` supporting `getattr`, so a class or a module.

In [5]:
test_eq(get_func(operator, 'neg', 2)(), -2)
test_eq(get_func(operator.neg, '__call__')(2), -2)
test_eq(get_func(list, 'foobar')([2]), [2])
t = get_func(torch, 'zeros', dtype=torch.int64)(5)
test_eq(t.dtype, torch.int64)
a = [2,1]
get_func(list, 'sort')(a)
test_eq(a, [1,2])

In [6]:
#export
def show_title(o, ax=None, ctx=None):
    "Set title of `ax` to `o`, or print `o` if `ax` is `None`"
    ax = ifnone(ax,ctx)
    if ax is None: print(o)
    else: ax.set_title(o)

In [7]:
test_stdout(lambda: show_title("title"), "title")

## Func -

Tranforms, are built with multiple-dispatch: a given function can have several methods depending on the type of the object received. This is done directly with the `multimethod` module and type-annotation in `Transofrm`, but you can also use the following class.

In [8]:
#export
class Func():
    "Basic wrapper around a `name` with `args` and `kwargs` to call on a given type"
    def __init__(self, name, *args, **kwargs): self.name,self.args,self.kwargs = name,args,kwargs
    def __repr__(self): return f'sig: {self.name}({self.args}, {self.kwargs})'
    def _get(self, t): return get_func(t, self.name, *self.args, **self.kwargs)
    def __call__(self,t): return L(t).mapped(self._get) if is_listy(t) else self._get(t)

You can call the `Func` object on any module name or type, even a list of types. It will return the corresponding function (with a default to `noop` if nothing is found) or list of functions.

In [9]:
test_eq(Func('sqrt')(math), math.sqrt)
test_eq(Func('sqrt')(torch), torch.sqrt)

@patch
def powx(x:math, a): return math.pow(x,a)
@patch
def powx(x:torch, a): return torch.pow(x,a)
tst = Func('powx',a=2)([math, torch])
test_eq([f.func for f in tst], [math.powx, torch.powx])
for t in tst: test_eq(t.keywords, {'a': 2})

In [10]:
#export
class _Sig():
    def __getattr__(self,k):
        def _inner(*args, **kwargs): return Func(k, *args, **kwargs)
        return _inner

Sig = _Sig()

In [11]:
show_doc(Sig, name="Sig")

<h4 id="<code>Sig</code>" class="doc_header"><code>Sig</code><a href="https://github.com/fastai/fastai_docs/tree/master/dev/__main__.py#L4" class="source_link" style="float:right">[source]</a></h4>

> <code>Sig</code>(**\*`args`**, **\*\*`kwargs`**)



`Sig` is just sugar-syntax to create a `Func` object more easily with the syntax `Sig.name(*args, **kwargs)`.

In [12]:
f = Sig.sqrt()
test_eq(f(math), math.sqrt)
test_eq(f(torch), torch.sqrt)

In [13]:
#export
class SelfFunc():
    "Search for `name` attribute and call it with `args` and `kwargs` on any object it's passed."
    def __init__(self, nm, *args, **kwargs): self.nm,self.args,self.kwargs = nm,args,kwargs
    def __repr__(self): return f'self: {self.nm}({self.args}, {self.kwargs})'
    def __call__(self, o):
        if not is_listy(o): return getattr(o,self.nm)(*self.args, **self.kwargs)
        else: return [getattr(o_,self.nm)(*self.args, **self.kwargs) for o_ in o]

The difference between `Func` and `SelfFunc` is that `Func` will generate a function when you call it on a type. On the other hand, `SelfFunc` is already a function and each time you call it on an object it looks for the `name` attribute and call it on `args` and `kwargs`.

In [14]:
tst = SelfFunc('sqrt')
x = torch.tensor([4.])
test_eq(tst(x), torch.tensor([2.]))
assert isinstance(tst(x), Tensor)

In [15]:
#export
class _SelfFunc():
    def __getattr__(self,k):
        def _inner(*args, **kwargs): return SelfFunc(k, *args, **kwargs)
        return _inner
    
Self = _SelfFunc()

In [16]:
show_doc(Self, name="Self")

<h4 id="<code>Self</code>" class="doc_header"><code>Self</code><a href="https://github.com/fastai/fastai_docs/tree/master/dev/__main__.py#L4" class="source_link" style="float:right">[source]</a></h4>

> <code>Self</code>(**\*`args`**, **\*\*`kwargs`**)



`Self` is just syntax sugar to create a `SelfFunc` object more easily with the syntax `Self.name(*args, **kwargs)`.

In [17]:
f = Self.sqrt()
x = torch.tensor([4.])
test_eq(f(x), torch.tensor([2.]))
assert isinstance(f(x), Tensor)

## Transform -

In [18]:
#export
def positional_annotations(f):
    "Get list of annotated types for all positional params, or None if no annotation"
    sig = inspect.signature(f)
    return [p.annotation if p.annotation != inspect._empty else None 
            for p in sig.parameters.values() if p.default == inspect._empty and p.kind != inspect._VAR_KEYWORD]

In [19]:
def f1(x, y:float): return x+y
def f2(a, b=2): return a
def f3(a:int, b:float=2): return a
test_eq(positional_annotations(f1), [None, float])
test_eq(positional_annotations(f2), [None])
test_eq(positional_annotations(f3), [int])

In [20]:
#export
from multimethod import multimeta,DispatchError

In [21]:
#export
class Transform(metaclass=multimeta):
    "A function that `encodes` if `filt` matches, and optionally `decodes`"
    order,add_before_setup,filt,t = 0,False,None,None
    def __init__(self,encodes=None,decodes=None):
        self.encodes = getattr(self, 'encodes', noop) if encodes is None else encodes 
        self.decodes = getattr(self, 'decodes', noop) if decodes is None else decodes
    
    def _apply(self, fs, x, filt):
        if self.filt is not None and self.filt!=filt: return x
        if self.t: 
            gs = self._get_func(fs, self.t)
            if is_listy(self.t) and len(positional_annotations(gs)) != len(self.t):
                gs = [self._get_func(fs,t_) for t_ in self.t]
                if len(gs) == 1: gs = gs[0]
        else: gs=fs
        if is_listy(gs): return tuple(f(x_) for f,x_ in zip(gs,x))
        return gs(*L(x))

    def _get_func(self,f,t):
        if not hasattr(f,'__func__'): return f
        idx = (object,) + tuple(t) if is_listy(t) else (object,t)
        try: f = f.__func__[idx]
        except DispatchError: return noop
        return partial(f,self)
    
    def accept_types(self, t): self.t = t
        # We can't create encodes/decodes here since patching might change things later
        # So we call _get_func in _apply instead

    def __call__(self, x, filt=None): return self._apply(self.encodes, x, filt)
    def decode  (self, x, filt=None): return self._apply(self.decodes, x, filt)
    def __getitem__(self, x): return self(x) # So it can be used as a `Dataset`

add_docs(Transform,
         __call__="Dispatch and apply the proper encodes to `x` if `filt` matches",
         decode="Dispatch and apply the proper decodes to `x` if `filt` matches",
         accept_types="Indicate the type of input received by the transform is `t`")

In a transformation pipeline some steps need to be reversible - for instance, if you turn a string (such as *dog*) into an int (such as *1*) for modeling, then for display purposes you'll want to turn it back to a string again (e.g. when you have a prediction). In addition, you may wish to only run the transformation for a particular data subset, such as the training set.

`Transform` provides all this functionality. `filt` is some dataset index (e.g. provided by `DataSource`), and you provide `encodes` and optional `decodes` functions for your code. You can pass `encodes` and `decodes` functions directly to the constructor for quickly creating simple transforms. You can also create several `encodes` or `decodes` methods for different types of objects with proper type annotations.

In [22]:
tfm = Transform(operator.neg, decodes=operator.neg)
start = 4
t = tfm(start)
test_eq(t, -4)
test_eq(t, tfm[start]) #You can use a transform as a dataset
test_eq(tfm.decode(t), start)

In [25]:
def dummy_tfm(x:float,y:float): return [x+y,y]
tfm = Transform(dummy_tfm)
tfm.accept_types([float,float])
test_eq(tfm([2,3]), [5,3])
#tfm.accept_types([int,float]) Fails for now and needs a class with encodes
#test_eq(tfm([2,3]), [2,3])

In [None]:
class _AddOne(Transform):
    filt=1
    def encodes(self, x): return x+1
    def decodes(self, x): return x-1

addt = _AddOne()
test_eq(addt(start,filt=1), 5)
test_eq(addt(start,filt=0), start)

In [None]:
show_doc(Transform.__call__)

<h4 id="<code>Transform.__call__</code>" class="doc_header"><code>Transform.__call__</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#Transform--" class="source_link" style="float:right">[source]</a></h4>

> <code>Transform.__call__</code>(**`x`**, **`filt`**=*`None`*)

Dispatch and apply the proper encodes to `x` if `filt` matches

In [None]:
show_doc(Transform.decode)

<h4 id="<code>Transform.decode</code>" class="doc_header"><code>Transform.decode</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#Transform--" class="source_link" style="float:right">[source]</a></h4>

> <code>Transform.decode</code>(**`x`**, **`filt`**=*`None`*)

Dispatch and apply the proper decodes to `x` if `filt` matches

In [None]:
show_doc(Transform.accept_types)

<h4 id="<code>Transform.accept_types</code>" class="doc_header"><code>Transform.accept_types</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#Transform--" class="source_link" style="float:right">[source]</a></h4>

> <code>Transform.accept_types</code>(**`t`**)

Indicate the type of input received by the transform is `t`

At some point in the data-collection pipeline, your objects will be tuples (usually input,label). There are then different behaviors you might want your `Transform` to adopt such as:
- being applied to the tuple and returning a new tuple
- being applied to each part of the tuple
- being applied to some parts of the tuple but not all

You can control which behavior will be used with the signature of your `encodes` function. If it accepts several arguments (without defaults), then the transform will be applied on the tuple and expected to return a tuple. If your `encodes` function only accepts one argument, it will be applied on every part of the tuple. You can even control which part of the tuples with a type annotation: the tranform will only be applied to the items in the tuple that correspond to that type.

All of this is enabled the method `accept_types` that is called in the setup of a `Pipeline` (so out of the blue your transform object won't have this behavior). The `Pipeline` will analyze the type of objects (as given by the return annotation of any transform) and pass them along, wich tells the transform it will receive a given type (or a tuple of given types).

In [None]:
#Apply on the tuple as a whole
class _Add(Transform):
    def encodes(self, x, y): return (x+y,y)
    def decodes(self, x, y): return (x-y,y)

addt = _Add()
addt.accept_types([float,float])
t = addt([1,2])
test_eq(t, (3,2))
test_eq(addt.decode(t), (1,2))

In [None]:
#Apply on all part of the tuple
class _AddOne(Transform):
    def encodes(self, x): return x+1
    def decodes(self, x): return x-1

addt = _AddOne()
addt.accept_types([float,float])
t = addt([1,2])
test_eq(t, (2,3))
test_eq(addt.decode(t), (1,2))

In [None]:
#Apply on all integers of the tuple
#Also note that your tuples can have more than two elements
class _AddOne(Transform):
    def encodes(self, x:numbers.Integral): return x+1
    def encodes(self, x:float): return x*2
    def decodes(self, x:numbers.Integral): return x-1

addt = _AddOne()
addt.accept_types(float)
start = 1
t = addt(start)
test_eq(t, 2)
test_eq(addt.decode(t), 2)

addt.accept_types([float, int, float])
start = [1,2,3]
t = addt(start)
test_eq(t, (2,3,6))
test_eq(addt.decode(t), (2,2,6))

In [None]:
def transform(cls):
    "Decorator for registering a new `encodes` or `decodes` function in a tranform `cls`"
    def _inner(f):
        if   f.__name__=='encodes': cls.encodes.register(f)
        elif f.__name__=='decodes': cls.decodes.register(f)
        else: raise Exception('Function must be "encodes" or "decodes"')
    return _inner

In [None]:
@transform(_AddOne)
def decodes(self, x:float): return x/2

In [None]:
t = addt(start)
test_eq(t, (2,3,6))
test_eq(addt.decode(t), start)

## Pipeline -

In [None]:
#export
def compose_tfms(x, tfms, func_nm='__call__', reverse=False, **kwargs):
    "Apply all `func_nm` attribute of `tfms` on `x`, maybe in `reverse` order"
    if reverse: tfms = reversed(tfms)
    for tfm in tfms: x = getattr(tfm,func_nm,noop)(x, **kwargs)
    return x

In [None]:
class _AddOne(Transform):
    def encodes(self, x): return x+1
    def decodes(self, x): return x-1
    
tfms = [_AddOne(), Transform(torch.sqrt)]
t = compose_tfms(tensor([3.]), tfms)
test_eq(t, tensor([2.]))
test_eq(compose_tfms(t, tfms, 'decodes'), tensor([1.]))
test_eq(compose_tfms(tensor([4.]), tfms, reverse=True), tensor([3.]))

In [None]:
#export
def _get_ret(func):
    "Get the return annotation of `func`"
    ann = getattr(func,'__annotations__', None)
    if not ann: return None
    typ = ann.get('return')
    return list(typ.__args__) if getattr(typ, '_name', '')=='Tuple' else typ

In [None]:
#hide
def f1(x) -> float: return x
test_eq(_get_ret(f1), float)
def f2(x) -> Tuple[float,float]: return x
test_eq(_get_ret(f2), [float,float])

In [None]:
#export
class Pipeline():
    "A pipeline of composed (for encode/decode) transforms, setup with types"
    def __init__(self, funcs=None, t=None): 
        if isinstance(funcs, Pipeline): funcs = funcs.raws
        self.raws,self.fs,self.t_show = L(funcs),[],None
        if len(self.raws) == 0: self.final_t = t
        else:
            for i,f in enumerate(self.raws.sorted(key='order')):
                if not isinstance(f,Transform): f = Transform(f)
                f.accept_types(t)
                self.fs.append(f)
                if self.t_show is None and hasattr(t, 'show'): self.t_idx,self.t_show = i,t
                t = _get_ret(f.encodes) or t
            if self.t_show is None and hasattr(t, 'show'): self.t_idx,self.t_show = i+1,t
            self.final_t = t
    
    def new(self, t=None): return Pipeline(self, t)
    def __repr__(self): return f"Pipeline over {self.fs}"
    
    def setup(self, items=None):
        tfms,raws,self.fs,self.raws = self.fs,self.raws,[],[]
        for t,r in zip(tfms,raws.sorted(key='order')):
            if t.add_before_setup:     self.fs.append(t) ; self.raws.append(r)
            if hasattr(t, 'setup'):    t.setup(items)
            if not t.add_before_setup: self.fs.append(t) ; self.raws.append(r)
                
    def __call__(self, o, filt=None): return compose_tfms(o, self.fs, filt=filt)
    def decode  (self, i, filt=None): return compose_tfms(i, self.fs, func_nm='decode', reverse=True, filt=filt)
    #def __getitem__(self, x): return self(x)
    #def decode_at(self, idx): return self.decode(self[idx])
    #def show_at(self, idx):   return self.show(self[idx])
    
    def show(self, o, ctx=None, filt=None, **kwargs):
        if self.t_show is None: return self.decode(o, filt=filt)
        o = compose_tfms(o, self.fs[self.t_idx:], func_nm='decode', reverse=True, filt=filt)
        return self.t_show.show(o, ctx=ctx, **kwargs)

add_docs(Pipeline,
         __call__="Compose `__call__` of all `tfms` on `o`",
         decode="Compose `decode` of all `tfms` on `i`",
         new="Create a new `Pipeline`with the same `tfms` and a new initial `t`",
         show="Show item `o`",
         setup="Go through the transforms in order and call their potential setup on `items`")

A list of transforms are often applied in a particular order, and decoded by applying in the reverse order. `Pipeline` provides this functionality, and also ensures during its initialization that each transform get the proper functions according to the type of the previous transform. If any transform provides a type with a return annotation, this type is passed along to the next tranforms (until being overwritten by a new return annotation). Such a type can be useful when transforms filter depending on a given type (usually for data augmentation) or to provide a show method.

Here's some simple examples:

In [None]:
# Empty pipeline is noop
pipe = Pipeline()
test_eq(pipe(1), 1)

In [None]:
# Check a standard pipeline
class String():
    @staticmethod
    def show(o, ctx=None, **kwargs): return show_title(str(o), ctx=ctx)
    
class floatTfm(Transform):
    def encodes(self, x): return float(x)
    def decodes(self, x): return int(x)

float_tfm=floatTfm()
def neg(x) -> String: return -x
neg_tfm = Transform(neg, neg)
    
pipe = Pipeline([neg_tfm, float_tfm])

start = 2
t = pipe(2)
test_eq(t, -2.0)
test_eq(type(t), float)
#test_eq(t, pipe[2])
test_eq(pipe.decode(t), start)
#show decodes up to the point of the first transform that introduced the type that shows, not included
test_stdout(lambda:pipe.show(t), '-2')

In [None]:
# Check opposite order
pipe = Pipeline([float_tfm,neg_tfm])
t = pipe(2)
test_eq(t, -2.0)
# `show` comes from String on the last transform so nothing is decoded
test_stdout(lambda:pipe.show(t), '-2.0')

### Methods

In [None]:
show_doc(Pipeline.__call__)

<h4 id="<code>Pipeline.__call__</code>" class="doc_header"><code>Pipeline.__call__</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#Pipeline--" class="source_link" style="float:right">[source]</a></h4>

> <code>Pipeline.__call__</code>(**`o`**, **`filt`**=*`None`*)

Compose `__call__` of all `tfms` on `o`

In [None]:
show_doc(Pipeline.decode)

<h4 id="<code>Pipeline.decode</code>" class="doc_header"><code>Pipeline.decode</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#Pipeline--" class="source_link" style="float:right">[source]</a></h4>

> <code>Pipeline.decode</code>(**`i`**, **`filt`**=*`None`*)

Compose `decode` of all `tfms` on `i`

In [None]:
show_doc(Pipeline.new)

<h4 id="<code>Pipeline.new</code>" class="doc_header"><code>Pipeline.new</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#Pipeline--" class="source_link" style="float:right">[source]</a></h4>

> <code>Pipeline.new</code>(**`t`**=*`None`*)

Create a new [`Pipeline`](/data.pipeline.html#Pipeline)with the same `tfms` and a new initial `t`

In [None]:
show_doc(Pipeline.setup)

<h4 id="<code>Pipeline.setup</code>" class="doc_header"><code>Pipeline.setup</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#Pipeline--" class="source_link" style="float:right">[source]</a></h4>

> <code>Pipeline.setup</code>(**`items`**=*`None`*)

Go through the transforms in order and call their potential setup on `items`

During the setup, the `Pipeline` starts with no transform and adds them one at a time, so that during its setup, each transfrom get the items processed up to its point and not after. Depending on the attribute `add_before_setup`, the transform is added after the setup (default behaivor) so it's not called on the items used for the setup, or before (in which case it's called on the values used for setup).

In [None]:
#hide
#Test is below with TfmdList

## TfmedList -

In [None]:
#export
@docs
class TfmdList(GetAttr):
    "A `Pipeline` of `tfms` applied to a collection of `items`"
    _xtra = 'decode __call__ show'.split()
    
    def __init__(self, items, tfms, do_setup=True):
        self.items = L(items)
        self.default = self.tfms = Pipeline(tfms)
        if do_setup: self.setup()

    def __getitem__(self, i, filt=None):
        "Transformed item(s) at `i`"
        its = self.items[i]
        return its.mapped(self.tfms, filt=filt) if is_iter(i) else self.tfms(its, filt=filt)
    
    def setup(self): self.tfms.setup(self)
    def subset(self, idxs): return self.__class__(self.items[idxs], self.tfms, do_setup=False)
    def decode_at(self, idx, **kwargs): return self.decode(self[idx], **kwargs)
    def show_at(self, idx, **kwargs): return self.show(self[idx], **kwargs)
    def __eq__(self, b): return all_equal(self, b)
    def __len__(self): return len(self.items)
    def __iter__(self): return (self[i] for i in range_of(self))
    def __repr__(self): return f"{self.__class__.__name__}: {self.items}\ntfms - {self.tfms}"
    
    _docs = dict(setup="Transform setup with self",
                 decode_at="Decoded item at `idx`",
                 show_at="Show item at `idx`",
                 subset="New `TfmdList` that only includes items at `idxs`")

`tfms` can either be a `Pipeline` or a list of transforms.

In [None]:
tl = TfmdList([1,2,3], [neg_tfm, float_tfm])
t = tl[1]
test_eq(t, -2.0)
test_eq(type(t), float)
test_eq(tl.decode_at(1), 2)
test_eq(tl.decode(t), 2)
test_stdout(lambda: tl.show_at(2), '-3')
tl

TfmdList: (#3) [1,2,3]
tfms - Pipeline over [<__main__.Transform object at 0x7f415c9e1b00>, <__main__.floatTfm object at 0x7f415c9e1208>]

In [None]:
p2 = tl.subset([0,2])
test_eq(p2, [-1.,-3.])

Here's how we can use `TfmdList.setup` to implement a simple category list, getting labels from a mock file list:

In [None]:
class _Cat(Transform):
    order = 1
    def __init__(self, subset_idx=None): self.subset_idx = subset_idx
    def encodes(self, o): return self.o2i[o]
    def decodes(self, o): return self.vocab[o]
    def setup(self, items): 
        if self.subset_idx is not None: items = items.subset(self.subset_idx)
        self.vocab,self.o2i = uniqueify(items, sort=True, bidir=True)

def _lbl(o) -> String: return o.split('_')[0]

test_fns = ['dog_0.jpg','cat_0.jpg','cat_2.jpg','cat_1.jpg','dog_1.jpg']
tcat = _Cat()
tl = TfmdList(test_fns, [tcat,_lbl])

test_eq(tcat.vocab, ['cat','dog'])
test_eq([1,0,0,0,1], tl)
test_eq(1, tl[-1])
test_eq([1,0], tl[0,1])
t = list(tl)
test_eq([1,0,0,0,1], t)
test_eq(['dog','cat','cat','cat','dog'], map(tl.decode,t))
test_stdout(lambda:tl.show_at(0), "dog")
tl

TfmdList: (#5) [dog_0.jpg,cat_0.jpg,cat_2.jpg,cat_1.jpg,dog_1.jpg]
tfms - Pipeline over [<__main__.Transform object at 0x7f415c9ec6a0>, <__main__._Cat object at 0x7f415c9ec550>]

In [None]:
tcat = _Cat([0,1,2])
tl = TfmdList(test_fns, [tcat,_lbl])

In [None]:
#hide
#Test of add_before_setup
class _AddSome(Transform):
    def __init__(self):   self.a = 2
    def encodes(self, x): return x+self.a
    def decodes(self, x): return x-self.a
    def setup(self, items): self.a = tensor(items).float().mean().item()
        
tl1 = TfmdList([1,2,3,4], _AddSome())
test_eq(tl1.tfms.fs[0].a, 2.5) #Setup on the raw items, mean is 2.5

_AddSome.add_before_setup = True
tl1 = TfmdList([1,2,3,4], _AddSome())
test_eq(tl1.tfms.fs[0].a, 4.5) #Setup on the tfmed items, mean is 4.5

### Methods

In [None]:
show_doc(TfmdList.__getitem__)

<h4 id="<code>TfmdList.__getitem__</code>" class="doc_header"><code>TfmdList.__getitem__</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#TfmedList--" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdList.__getitem__</code>(**`i`**, **`filt`**=*`None`*)

Transformed item(s) at `i`

In [None]:
show_doc(TfmdList.decode_at)

<h4 id="<code>TfmdList.decode_at</code>" class="doc_header"><code>TfmdList.decode_at</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#TfmedList--" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdList.decode_at</code>(**`idx`**, **\*\*`kwargs`**)

Decoded item at `idx`

In [None]:
test_eq(tl.decode_at(1),tl.decode(tl[1]))

In [None]:
show_doc(TfmdList.show_at)

<h4 id="<code>TfmdList.show_at</code>" class="doc_header"><code>TfmdList.show_at</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#TfmedList--" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdList.show_at</code>(**`idx`**, **\*\*`kwargs`**)

Show item at `idx`

In [None]:
test_stdout(lambda: tl.show_at(1), 'cat')

In [None]:
show_doc(TfmdList.subset)

<h4 id="<code>TfmdList.subset</code>" class="doc_header"><code>TfmdList.subset</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#TfmedList--" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdList.subset</code>(**`idxs`**)

New [`TfmdList`](/data.pipeline.html#TfmdList) that only includes items at `idxs`

## TfmdDS -

In [None]:
#export
class TfmdDS(TfmdList):
    def __init__(self, items, tfms=None, tuple_tfms=None, do_setup=True):
        if tfms is None: tfms = [None]
        self.items,self.tfms = items,tfms
        self.tfmd_its = [TfmdList(items, t, do_setup=do_setup) for t in tfms]
        self.tuple_tfms = Pipeline(tuple_tfms, t=[it.tfms.final_t for it in self.tfmd_its])
        if do_setup: self.setup()
        
    def __getitem__(self, i, filt=None):  #TODO add filt
        its = [it.__getitem__(i, filt=filt) for it in self.tfmd_its]
        if is_iter(i): return L(zip(*L(its))).mapped(self.tuple_tfms, filt=filt)
        return self.tuple_tfms(its, filt=filt)
    
    def decode(self, o, filt=None):
        o = self.tuple_tfms.decode(o, filt=filt)
        return [it.decode(o_, filt=filt) for o_,it in zip(o,self.tfmd_its)]
    
    def show(self, o, ctx=None, filt=None, **kwargs):
        if self.tuple_tfms.t_show is not None: return self.tuple_tfms.show(o, ctx=ctx, **kwargs)
        o = self.tuple_tfms.decode(o, filt=filt)
        for o_,it in zip(o,self.tfmd_its): ctx = it.show(o_, ctx=ctx, **kwargs)
        return ctx
    
    def decode_batch(self, b, filt=None):
        transp = L(zip(*L(b)))
        return transp.mapped(self.decode, filt=filt).zipped()

    def setup(self): self.tuple_tfms.setup(self)
        
    def subset(self, idxs): 
        return self.__class__(self.items[idxs], self.tfms, self.tuple_tfms, do_setup=False)
    
    def __repr__(self): 
        return f"{self.__class__.__name__}: {self.items}\ntfms - {self.tfms}\ntuple tfms - {self.tuple_tfms}"

In [None]:
#export 
add_docs(TfmdDS,
         "A `Dataset` created from raw `items` by calling each element of `tfms` on them",
         __getitem__="Call all `tfms` on `items[i]` then all `tuple_tfms` on the result",
         decode="Compose `decode` of all `tuple_tfms` then all `tfms` on `i`",
         show="Show item `o` in `ctx`",
         decode_batch="Call `self.decode` on all elements of `b`",
         setup="Go through the transforms in order and call their potential setup on `items`",
         subset="New `TfmdDS` that only includes items at `idxs`")

`tfms` is a list of objects that can be:
- one transform
- a list of transforms
- a `Pipeline`

In [None]:
class _TNorm(Transform):
    #def __init__(self): self.m,self.s = 0,1
    def encodes(self, o): return (o-self.m)/self.s
    def decodes(self, o): return (o*self.s)+self.m
    def setup(self, items):
        its = tensor(items).float()
        self.m,self.s = its.mean(),its.std()

In [None]:
items = [1,2,3,4]
tds = TfmdDS(items, [neg_tfm, [neg_tfm,_TNorm()]])
x,y = zip(*tds)

In [None]:
tds[[0,1,2]]

(#3) [(#2) [-1,tensor(1.1619)],(#2) [-2,tensor(0.3873)],(#2) [-3,tensor(-0.3873)]]

In [None]:
y

(tensor(1.1619), tensor(0.3873), tensor(-0.3873), tensor(-1.1619))

In [None]:
items = [1,2,3,4]
tds = TfmdDS(items, [neg_tfm, [neg_tfm,_TNorm()]])
x,y = zip(*tds)
test_close(tensor(y).mean(), 0)
test_close(tensor(y).std(), 1)
test_eq(x, [-1,-2,-3,-4])
test_stdout(lambda:tds.show_at(1), '-2\ntensor(-2.)')

In [None]:
# Create a "batch"
b = list(zip(*tds))
bd = tds.decode_batch(b)

test_eq(len(bd),2)
test_eq(bd[0],items)
test_eq(bd[1],items)
test_eq(type(bd[1][0]),Tensor)
print('b ',b)
print('bd',bd)

b  [(-1, -2, -3, -4), (tensor(1.1619), tensor(0.3873), tensor(-0.3873), tensor(-1.1619))]
bd (#2) [(#4) [1,2,3,4],(#4) [tensor(1.),tensor(2.),tensor(3.),tensor(4.)]]


In [None]:
#hide
#Test if show at the tuple level interrupts decoding
class DoubleString():
    @staticmethod
    def show(o, ctx=None, **kwargs): print(o[0],o[1])

class _DummyTfm(Transform):
    def encodes(self, x,y)->DoubleString: return [x,y]

items = [1,2,3,4]
tds = TfmdDS(items, [neg_tfm, neg_tfm], _DummyTfm())
test_stdout(lambda: tds.show_at(0), "-1 -1")

In [None]:
show_doc(TfmdDS.__getitem__)

<h4 id="<code>TfmdDS.__getitem__</code>" class="doc_header"><code>TfmdDS.__getitem__</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#TfmdDS--" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdDS.__getitem__</code>(**`i`**, **`filt`**=*`None`*)

Call all `tfms` on `items[i]` then all `tuple_tfms` on the result

In [None]:
show_doc(TfmdDS.decode)

<h4 id="<code>TfmdDS.decode</code>" class="doc_header"><code>TfmdDS.decode</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#TfmdDS--" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdDS.decode</code>(**`o`**, **`filt`**=*`None`*)

Compose `decode` of all `tuple_tfms` then all `tfms` on `i`

In [None]:
show_doc(TfmdDS.decode_at)

<h4 id="<code>TfmdList.decode_at</code>" class="doc_header"><code>TfmdList.decode_at</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#TfmedList--" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdList.decode_at</code>(**`idx`**, **\*\*`kwargs`**)

Decoded item at `idx`

In [None]:
show_doc(TfmdDS.decode_batch)

<h4 id="<code>TfmdDS.decode_batch</code>" class="doc_header"><code>TfmdDS.decode_batch</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#TfmdDS--" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdDS.decode_batch</code>(**`b`**, **`filt`**=*`None`*)

Call `self.decode` on all elements of `b`

In [None]:
show_doc(TfmdDS.show)

<h4 id="<code>TfmdDS.show</code>" class="doc_header"><code>TfmdDS.show</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#TfmdDS--" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdDS.show</code>(**`o`**, **`ctx`**=*`None`*, **`filt`**=*`None`*, **\*\*`kwargs`**)

Show item `o` in `ctx`

In [None]:
show_doc(TfmdDS.show_at)

<h4 id="<code>TfmdList.show_at</code>" class="doc_header"><code>TfmdList.show_at</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#TfmedList--" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdList.show_at</code>(**`idx`**, **\*\*`kwargs`**)

Show item at `idx`

In [None]:
show_doc(TfmdDS.setup)

<h4 id="<code>TfmdDS.setup</code>" class="doc_header"><code>TfmdDS.setup</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#TfmdDS--" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdDS.setup</code>()

Go through the transforms in order and call their potential setup on `items`

In [None]:
show_doc(TfmdDS.subset)

<h4 id="<code>TfmdDS.subset</code>" class="doc_header"><code>TfmdDS.subset</code><a href="https://nbviewer.jupyter.org/github/fastai/fastai_docs/blob/master/dev/02_data_pipeline.ipynb#TfmdDS--" class="source_link" style="float:right">[source]</a></h4>

> <code>TfmdDS.subset</code>(**`idxs`**)

New [`TfmdDS`](/data.pipeline.html#TfmdDS) that only includes items at `idxs`

## Export -

In [None]:
#hide
from local.notebook.export import notebook2script
notebook2script(all_fs=True)

Converted 00_test.ipynb.
Converted 01_core.ipynb.
Converted 02_data_pipeline.ipynb.
Converted 03_data_external.ipynb.
Converted 04_data_core.ipynb.
Converted 05_data_source.ipynb.
Converted 06_vision_core.ipynb.
Converted 07_pets_tutorial-meta.ipynb.
Converted 07_pets_tutorial-oo.ipynb.
Converted 07_pets_tutorial-oo1.ipynb.
Converted 07_pets_tutorial-oo2-meta.ipynb.
Converted 07_pets_tutorial.ipynb.
Converted 08_augmentation.ipynb.
Converted 10_layers.ipynb.
Converted 11_optimizer.ipynb.
Converted 12_learner.ipynb.
Converted 13_callback_schedule.ipynb.
Converted 14_callback_hook.ipynb.
Converted 15_callback_progress.ipynb.
Converted 16_callback_tracker.ipynb.
Converted 17_callback_fp16.ipynb.
Converted 90_notebook_core.ipynb.
Converted 91_notebook_export.ipynb.
Converted 92_notebook_showdoc.ipynb.
Converted 93_notebook_export2html.ipynb.
Converted 94_index.ipynb.
Converted 95_synth_learner.ipynb.
Converted tmp_tensor_inherit.ipynb.
