In [None]:
#default_exp data.pipeline

In [None]:
#export
from fastai_local.imports import *
from fastai_local.test import *
from fastai_local.core import *

# Transforms and Pipeline

> Low-level transform pipelines

## Convenience functions

In [None]:
# export core
def opt_call(f, fname='__call__', *args, **kwargs):
    "Call `f.{fname}(*args, **kwargs)`, or `noop` if not defined"
    return getattr(f,fname,noop)(*args, **kwargs)

In [None]:
test_eq(opt_call(operator.neg, '__call__', 2), -2)
test_eq(opt_call(list, 'foobar', [2]), [2])

a=[2,1]
opt_call(list, 'sort', a)
test_eq(a, [1,2])

## Classes

### Transform -

In [None]:
# export
@docs
class Transform():
    "A function that `encodes` if `filt` matches, and optionally `decodes`, with an optional `setup`"
    order,filt = 0,None

    def __init__(self, encodes=None, **kwargs):
        if encodes is not None: self.encodes=encodes
        for k,v in kwargs.items(): setattr(self, k, v)

    @classmethod
    def create(cls, f, filt=None):
        "classmethod: Turn `f` into a `Transform` unless it already is one"
        return f if hasattr(f,'decode') or isinstance(f,Transform) else cls(f)
    
    def _filt_match(self, filt): return self.filt is None or self.filt==filt
    def __call__(self, o, filt=None, **kwargs): return self.encodes(o, **kwargs) if self._filt_match(filt) else o
    def decode  (self, o, filt=None, **kwargs): return self.decodes(o, **kwargs) if self._filt_match(filt) else o
    def __repr__(self): return str(self.encodes) if self.__class__==Transform else str(self.__class__)
    def decodes(self, o, *args, **kwargs): return o
    
    _docs=dict(__call__="Call `self.encodes` unless `filt` is passed and it doesn't match `self.filt`",
              decode="Call `self.decodes` unless `filt` is passed and it doesn't match `self.filt`",
              decodes="Override to implement custom decoding")

In a transformation pipeline some steps need to be reversible - for instance, if you turn a string (such as *dog*) into an int (such as *1*) for modeling, then for display purposes you'll want to turn it back to a string again (e.g. when you have a prediction). In addition, you may wish to only run the transformation for a particular data subset, such as the training set.

`Transform` provides all this functionality. `filt` is some dataset index (e.g. provided by `DataSource`), and you provide `encodes` and optional `decodes` functions for your code. You can pass `encodes` and `decodes` functions directly to the constructor for quickly creating simple transforms.

In [None]:
def add(x, a=1): return x+a
def add_undo(x, a=1): return x-a
addt  = Transform(add, decodes=add_undo)

start = 4
t = addt(start)
test_eq(t, 5)
test_eq(addt.decode(5), start)

More commonly, you'll subclass `Transform` and define `encodes` and `decodes`.

In [None]:
class _AddTfm(Transform):
    def encodes(self, x, a=1): return x+a
    def decodes(self, x, a=1): return x-a
    
addt  = _AddTfm()
start = 4
t = addt(start)
test_eq(t, 5)
test_eq(addt.decode(5), start)

In [None]:
show_doc(Transform.__call__)

<h4 id="Transform.__call__" class="doc_header"><code>__call__</code><a class="source_link" data-toggle="collapse" data-target="#Transform-__call__-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>__call__</code>(**`o`**, **`filt`**=***`None`***, **\*\*`kwargs`**)

<div class="collapse" id="Transform-__call__-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#Transform-__call__-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>__call__</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Call `self.encodes` unless `filt` is passed and it doesn't match `self.filt`  

In [None]:
show_doc(Transform.decode)

<h4 id="Transform.decode" class="doc_header"><code>decode</code><a class="source_link" data-toggle="collapse" data-target="#Transform-decode-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>decode</code>(**`o`**, **`filt`**=***`None`***, **\*\*`kwargs`**)

<div class="collapse" id="Transform-decode-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#Transform-decode-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>decode</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Call `self.decodes` unless `filt` is passed and it doesn't match `self.filt`  

In [None]:
show_doc(Transform.create)

<h4 id="Transform.create" class="doc_header"><code>create</code><a class="source_link" data-toggle="collapse" data-target="#Transform-create-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>create</code>(**`f`**, **`filt`**=***`None`***)

<div class="collapse" id="Transform-create-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#Transform-create-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>create</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

classmethod: Turn `f` into a `Transform` unless it already is one  

### Pipeline -

In [None]:
#export
class Pipeline():
    "A pipeline of transforms applied to a collection, composed and applied for encode/decode, and setup one at a time"
    def __init__(self, tfms, items=None):
        self.items,self.tfms = ListContainer(items),[]
        print(self.items)
        self.add([Transform.create(t) for t in listify(tfms)])

    def add(self, tfms):
        "Call `setup` on all `tfms` and append them to this pipeline"
        for t in sorted(listify(tfms), key=lambda o: getattr(o, 'order', 0)):
            self.tfms.append(t)
            if hasattr(t, 'setup'): t.setup(self)

    def __call__(self, x, **kwargs): return self.composed(x, **kwargs)
    def decode(self, x, **kwargs): return self.composed(x, rev=True, fname='decode', **kwargs)
    def __eq__(self, b): return all_equal(self, b)
    def __len__(self): return len(self.items)
    def __getitem__(self, i):
        its = self.items[i]
        set_trace()
        return [self(o) for o in its] if is_listy(its) else self(its)
    
    def composed(self, x, rev=False, fname='__call__', **kwargs):
        "Compose `{fname}` of all `self.tfms` (reversed if `rev`) on `x`"
        tfms = reversed(self.tfms) if rev else self.tfms
        for f in tfms: x = opt_call(f, fname, x, **kwargs)
        return x

    def __repr__(self): return str(self.tfms)
    def delete(self, idx): del(self.tfms[idx])
    def remove(self, tfm): self.tfms.remove(tfm)
    
    def __getattr__(self, k):
        "Find last tfm in `self.tfms` that has attr `k`"
        try: return next(getattr(t,k) for t in reversed(self.tfms) if hasattr(t,k))
        except StopIteration: raise AttributeError(k) from None

In [None]:
#export
add_docs(
    Pipeline,
    __call__="Compose `__call__` of all `tfms` on `x`",
    decode="Compose `decode` of all `tfms` on `x`",
    delete="Delete transform `idx` from pipeline",
    remove="Remove `tfm` from pipeline",
)

A list of transforms are often applied in a particular order, and decoded by applying in the reverse order. `Pipeline` provides this functionality, and also ensures that any `setup` methods are called, without including later transforms in those calls. NB: `setup` must be run before encoding/decoding.

Here's some simple examples:

In [None]:
def add(x, a=1): return x+a
def multiply(x, a=2): return x*a
def square(x): return x**2
def add_undo(x, a=1): return x-a
def multiply_undo(x, a=2): return x/a
tadd  = Transform(add, decodes=add_undo, order=2)
tmult = Transform(multiply, decodes=multiply_undo, order=1)
tsqr  = Transform(square, order=0)
pipe = Pipeline([tadd,tmult,tsqr])

start = 2
t = pipe(2)
test_eq(t, ((2**2) * 2) + 1)
test_eq(pipe.decode(t), (9-1)/2)

ListContainer (0 items) []


Here's how we can use `Pipeline.setup` to implement a simple category list, getting labels from a mock file list:

In [None]:
t = ListContainer(test_fns)

In [None]:
t[0]

ListContainer (1 items) [dog_0.jpg]

In [None]:
class _Cat(Transform):
    order=1
    def encodes(self, o): return self.o2i[o] if hasattr(self,'o2i') else o
    def decodes(self, o): return self.vocab[o]
    def setup(self, items): self.vocab,self.o2i = uniqueify(items, sort=True, bidir=True)
    def show(self, o): print(self.decodes(o))

def lbl_(o): return o.split('_')[0]

test_fns = ['dog_0.jpg','cat_0.jpg','cat_2.jpg','cat_1.jpg','dog_1.jpg']
tcat = _Cat()
pipe = Pipeline([tcat,lbl_], test_fns)

test_eq(tcat.vocab, ['cat','dog'])
test_eq([1,0,0,0,1], pipe)
t = list(pipe)
test_eq([1,0,0,0,1], t)
test_eq(['dog','cat','cat','cat','dog'], map(pipe.decode,t))

ListContainer (5 items) [dog_0.jpg,cat_0.jpg,cat_2.jpg,cat_1.jpg,dog_1.jpg]
> [0;32m<ipython-input-18-37a8919d7ca8>[0m(22)[0;36m__getitem__[0;34m()[0m
[0;32m     20 [0;31m        [0mits[0m [0;34m=[0m [0mself[0m[0;34m.[0m[0mitems[0m[0;34m[[0m[0mi[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     21 [0;31m        [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 22 [0;31m        [0;32mreturn[0m [0;34m[[0m[0mself[0m[0;34m([0m[0mo[0m[0;34m)[0m [0;32mfor[0m [0mo[0m [0;32min[0m [0mits[0m[0;34m][0m [0;32mif[0m [0mis_listy[0m[0;34m([0m[0mits[0m[0;34m)[0m [0;32melse[0m [0mself[0m[0;34m([0m[0mits[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     23 [0;31m[0;34m[0m[0m
[0m[0;32m     24 [0;31m    [0;32mdef[0m [0mcomposed[0m[0;34m([0m[0mself[0m[0;34m,[0m [0mx[0m[0;34m,[0m [0mrev[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m [0mfname[0m[0;34m=[0m[0;34m'__call__'[0m

BdbQuit: 

In [None]:
show_doc(Pipeline.__call__)

<h4 id="Pipeline.__call__" class="doc_header"><code>__call__</code><a class="source_link" data-toggle="collapse" data-target="#Pipeline-__call__-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>__call__</code>(**`x`**, **\*\*`kwargs`**)

<div class="collapse" id="Pipeline-__call__-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#Pipeline-__call__-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>__call__</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Compose `__call__` of all `tfms` on `x`  

In [None]:
show_doc(Pipeline.decode)

<h4 id="Pipeline.decode" class="doc_header"><code>decode</code><a class="source_link" data-toggle="collapse" data-target="#Pipeline-decode-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>decode</code>(**`x`**, **\*\*`kwargs`**)

<div class="collapse" id="Pipeline-decode-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#Pipeline-decode-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>decode</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Compose `decode` of all `tfms` on `x`  

In [None]:
show_doc(Pipeline.delete)

<h4 id="Pipeline.delete" class="doc_header"><code>delete</code><a class="source_link" data-toggle="collapse" data-target="#Pipeline-delete-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>delete</code>(**`idx`**)

<div class="collapse" id="Pipeline-delete-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#Pipeline-delete-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>delete</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Delete transform `idx` from pipeline  

In [None]:
show_doc(Pipeline.remove)

<h4 id="Pipeline.remove" class="doc_header"><code>remove</code><a class="source_link" data-toggle="collapse" data-target="#Pipeline-remove-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>remove</code>(**`tfm`**)

<div class="collapse" id="Pipeline-remove-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#Pipeline-remove-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>remove</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Remove `tfm` from pipeline  

In [None]:
show_doc(Pipeline.add)

<h4 id="Pipeline.add" class="doc_header"><code>add</code><a class="source_link" data-toggle="collapse" data-target="#Pipeline-add-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>add</code>(**`tfms`**)

<div class="collapse" id="Pipeline-add-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#Pipeline-add-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>add</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Call `setup` on all `tfms` and append them to this pipeline  

In [None]:
show_doc(Pipeline.__getattr__)

<h4 id="Pipeline.__getattr__" class="doc_header"><code>__getattr__</code><a class="source_link" data-toggle="collapse" data-target="#Pipeline-__getattr__-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>__getattr__</code>(**`k`**)

<div class="collapse" id="Pipeline-__getattr__-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#Pipeline-__getattr__-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>__getattr__</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Find last tfm in `self.tfms` that has attr `k`  

In [None]:
test_eq(pipe.show, tcat.show)

In [None]:
pipe.show(t[0])

dog


## Export -

In [None]:
#hide
from fastai_local.notebook.export import notebook2script
notebook2script(all_fs=True)

Converted 00_test.ipynb.
Converted 01_core.ipynb.
Converted 02_data_pipeline.ipynb.
Converted 03_data_source.ipynb.
Converted 04_data_external.ipynb.
Converted 05_data_core.ipynb.
Converted 06_pets_tutorial.ipynb.
Converted 08_vision_core.ipynb.
Converted 99_export.ipynb.
Converted 99a_export2html.ipynb.
Converted _07_data_blocks.ipynb.
Converted _09_data_blocks_tutorial_vision.ipynb.
