In [None]:
#default_exp data.source

In [None]:
#export
from fastai_local.imports import *
from fastai_local.test import *
from fastai_local.core import *
from fastai_local.data.pipeline import *

# Data source
> Base container for all the items

## Convenience functions

In [None]:
# export
def coll_repr(c, max=1000):
    "String repr of up to `max` items of (possibly lazy) collection `c`"
    return f'({len(c)} items) [' + ','.join(itertools.islice(map(str,c), 10)) + ('...'
            if len(c)>10 else '') + ']'

In [None]:
test_eq(coll_repr(range(1000)), '(1000 items) [0,1,2,3,4,5,6,7,8,9...]')

## ListContainer -

In [None]:
#export
class ListContainer():
    "Behaves like a list of `items` but can also index with list of indices or masks"
    def __init__(self, items): self.items = listify(items)
    def __len__(self): return len(self.items)
    def __iter__(self): return iter(self.items)
    def __setitem__(self, i, o): self.items[i] = o
    def __delitem__(self, i): del(self.items[i])
    def __repr__(self): return f'{self.__class__.__name__} {coll_repr(self)}'
    def __eq__(self,b): return all_equal(b,self)
    def __getitem__(self, idx):
        if is_iter(idx): return [self.items[i] for i in mask2idxs(idx)]
        return self.items[idx]

In [None]:
t = ListContainer(range(10))
test_eq(t, list(range(10)))
test_ne(t, list(range(11)))
test_eq(t[[1,2]], [1,2])
test_eq(t[[False]*8 + [True,False]], [8])
test_eq(t[tensor(3)], 3)

## DataSource -

In [None]:
# export
class DataSource():
    "Applies a `Pipeline` of `tfms` to filtered subsets of `items`"
    def __init__(self, items, tfms=noop, filts=None):
        if filts is None: filts = [range_of(items)]
        self.filts = listify(ListContainer(mask2idxs(filt)) for filt in filts)
        self.items,self.tfm = ListContainer(items),Pipeline(tfms)
        self.tfm.setup(self)
        
    def __len__(self): return len(self.filts)
    def len(self, filt=0): return len(self.filts[filt])
    def __getitem__(self, i): return _DsrcSubset(self, i)
    def decode(self, o, filt=0, **kwargs): return self.tfm.decode(o, filt=filt, **kwargs)
    def decoded(self, idx, filt=0): return self.decode(self.get(idx,filt), filt)
    def __iter__(self): return (self[i] for i in range_of(self))
    def __eq__(self,b): return all_equal(b if isinstance(b,DataSource) else DataSource(b),self)
    def show(self, o, filt=0, **kwargs): return self.tfm.show(self.decode(o, filt), **kwargs)

    def get(self, idx, filt=0):
        "Value(s) at `idx` from filtered subset `filt`"
        it = self.items[self.filts[filt][idx]]
        return [self.tfm(o, filt=filt) for o in it] if is_listy(it) else self.tfm(it, filt=filt)

    def __repr__(self):
        res = f'{self.__class__.__name__}\n'
        return res + '\n'.join(f'{i}: {coll_repr(o)}' for i,o in enumerate(self))
    
    def decode_batch(self, b):
        "Decode a batch of `x,y` (i.e. from a `DataLoader`)"
        d = map(self.decode, zip(*b))
        return list(zip(*d))

DataSource.train,DataSource.valid = property(lambda x: x[0]),property(lambda x: x[1])

In [None]:
# export
add_docs(
    DataSource,
    __len__="Number of filtered subsets",
    len="`len` of subset `filt`",
    __getitem__="Filtered subset `i`",
    decode="Decode `o` passing `filt`",
    decoded="Decoded version of `get`",
    __iter__="Iterator for each filtered subset",
    show="Call `tfm.show` on decoded `o`"
)

In [None]:
#export
class _DsrcSubset:
    def __init__(self, dsrc, filt): self.dsrc,self.filt = dsrc,filt
    def __getitem__(self,i): return self.dsrc.get(i,self.filt)
    def decode(self, o): return self.dsrc.decode(o, self.filt)
    def __len__(self): return self.dsrc.len(self.filt)
    def __eq__(self,b): return all_equal(b,self)
    def __iter__(self): return (self[i] for i in range_of(self))
    def __repr__(self): return coll_repr(self)
    def show(self, o, **kwargs): return self.dsrc.show(o, self.filt, **kwargs)
    def show_at(self, i, **kwargs): return self.show(self[i], **kwargs)

A `DataSource` provides filtering and transformation capabilities to a list of items. If you don't pass any filters or transforms, it simply provides a single subset with the same behavior as a `ListContainer`.

In [None]:
inp = [0,1,2,3,4]
dsrc = DataSource(inp)
test_eq(dsrc,inp)               # No filters, so equal to input items
test_eq(list(dsrc[0]), inp)     # Only one subset
test_ne(dsrc, [0,1,2,3,5])
test_eq(dsrc.get(2), 2)         # Retrieve one item (subset 0 is the default)
test_eq(dsrc.get([1,2]), [1,2]) # Retrieve two items by index
mask = [True,False,False,True,False]
test_eq(dsrc.get(mask), [0,3])  # Retrieve two items by mask
dsrc

DataSource
0: (5 items) [0,1,2,3,4]

In [None]:
dsrc.train

(5 items) [0,1,2,3,4]

Passing `filts` allows you to create multiple subsets.

In [None]:
# filts can be indices
dsrc = DataSource(range(5), filts=[tensor([0,2]), [1,3,4]])
test_eq(list(dsrc[0]), [0,2])
test_eq(list(dsrc[1]), [1,3,4])
test_eq(dsrc.get(2,1), 4)       # item 2 of subset 1
test_eq(dsrc[1][2], 4)          # item 2 of subset 1

# filts can be boolean masks (masks don't have to be disjoint)
filts = [[False,True,True,False,True], [True,False,False,True,True]]
dsrc = DataSource(range(5), filts=filts)
test_eq(list(dsrc[0]), [1,2,4])
test_eq(list(dsrc[1]), [0,3,4])
dsrc

DataSource
0: (3 items) [1,2,4]
1: (3 items) [0,3,4]

Pass `tfms` to have transformations applied before returning items.

In [None]:
# apply transforms to all items
tfms = [lambda x: x*2,lambda x: x+1]
filts = [[1,2],[0,3,4]]
dsrc = DataSource(range(5), tfms, filts=filts)
test_eq(list(dsrc[0]),[3,5])
test_eq(list(dsrc[1]),[1,7,9])
test_eq(dsrc.get([False,True], 0), [5])

The subset idx is also passed to your transform, so if it is an instance of `Transform` it will only be applied if the filt idx matches.

In [None]:
# only transform subset 1
class Tfm_(Transform):
    def encodes(self, x): return x*2
    def decodes(self, x): return x//2
    def show(self, x): return f" * {x}"
        
tfm = Tfm_(filt=1)
dsrc = DataSource(range(5), tfm, filts=[[1,2],[0,3,4]])
test_eq(list(dsrc[0]),[1,2])
test_eq(list(dsrc[1]),[0,6,8])
test_eq(dsrc.get([False,True], 0), [2])

In [None]:
show_doc(DataSource.get)

<h4 id="DataSource.get" class="doc_header"><code>get</code><a class="source_link" data-toggle="collapse" data-target="#DataSource-get-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>get</code>(**`idx`**, **`filt`**=***`0`***)

<div class="collapse" id="DataSource-get-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#DataSource-get-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>get</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Value(s) at `idx` from filtered subset `filt`  

`idx` can be an int, or list of ints, or a boolean mask.

In [None]:
dsrc.get([False,True]), dsrc.get([1]), dsrc.get(1)

([2], [2], 2)

In [None]:
show_doc(DataSource.decoded)

<h4 id="DataSource.decoded" class="doc_header"><code>decoded</code><a class="source_link" data-toggle="collapse" data-target="#DataSource-decoded-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>decoded</code>(**`idx`**, **`filt`**=***`0`***)

<div class="collapse" id="DataSource-decoded-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#DataSource-decoded-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>decoded</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Decoded version of `get`  

In [None]:
test_eq(dsrc.get(    1,1), 6)
test_eq(dsrc.decoded(1,1), 3)

In [None]:
show_doc(DataSource.__getitem__)

<h4 id="DataSource.__getitem__" class="doc_header"><code>__getitem__</code><a class="source_link" data-toggle="collapse" data-target="#DataSource-__getitem__-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>__getitem__</code>(**`i`**)

<div class="collapse" id="DataSource-__getitem__-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#DataSource-__getitem__-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>__getitem__</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Filtered subset `i`  

In [None]:
dsrc[1]

(3 items) [0,6,8]

In [None]:
show_doc(DataSource.__len__)

<h4 id="DataSource.__len__" class="doc_header"><code>__len__</code><a class="source_link" data-toggle="collapse" data-target="#DataSource-__len__-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>__len__</code>()

<div class="collapse" id="DataSource-__len__-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#DataSource-__len__-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>__len__</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Number of filtered subsets  

In [None]:
len(dsrc)

2

In [None]:
show_doc(DataSource.__iter__)

<h4 id="DataSource.__iter__" class="doc_header"><code>__iter__</code><a class="source_link" data-toggle="collapse" data-target="#DataSource-__iter__-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>__iter__</code>()

<div class="collapse" id="DataSource-__iter__-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#DataSource-__iter__-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>__iter__</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Iterator for each filtered subset  

In [None]:
for o in dsrc: print(f" * {o}")

 * (2 items) [1,2]
 * (3 items) [0,6,8]


In [None]:
show_doc(DataSource.len)

<h4 id="DataSource.len" class="doc_header"><code>len</code><a class="source_link" data-toggle="collapse" data-target="#DataSource-len-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>len</code>(**`filt`**=***`0`***)

<div class="collapse" id="DataSource-len-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#DataSource-len-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>len</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

`len` of subset `filt`  

In [None]:
[dsrc.len(i) for i in range_of(dsrc)]

[2, 3]

In [None]:
show_doc(DataSource.decode)

<h4 id="DataSource.decode" class="doc_header"><code>decode</code><a class="source_link" data-toggle="collapse" data-target="#DataSource-decode-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>decode</code>(**`o`**, **`filt`**=***`0`***, **\*\*`kwargs`**)

<div class="collapse" id="DataSource-decode-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#DataSource-decode-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>decode</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Decode `o` passing `filt`  

In [None]:
t = dsrc.get(1,1)
test_eq(dsrc.decode(t,1), 3)

In [None]:
show_doc(DataSource.show)

<h4 id="DataSource.show" class="doc_header"><code>show</code><a class="source_link" data-toggle="collapse" data-target="#DataSource-show-pytest" style="float:right; padding-right:10px">[test]</a></h4>

> <code>show</code>(**`o`**, **`filt`**=***`0`***, **\*\*`kwargs`**)

<div class="collapse" id="DataSource-show-pytest"><div class="card card-body pytest_card"><a type="button" data-toggle="collapse" data-target="#DataSource-show-pytest" class="close" aria-label="Close"><span aria-hidden="true">&times;</span></a><p>No tests found for <code>show</code>. To contribute a test please refer to <a href="/dev/test.html">this guide</a> and <a href="https://forums.fast.ai/t/improving-expanding-functional-tests/32929">this discussion</a>.</p></div></div>

Call `tfm.show` on decoded `o`  

In [None]:
test_eq(dsrc.show(t,1), ' * 3')

It is often more convenient to use these methods on the filtered subset:

In [None]:
test_eq(dsrc[1].show(t), ' * 3')
test_eq(dsrc[1].show_at(1), ' * 3')

## Export -

In [None]:
#hide
from fastai_local.notebook.export import notebook2script
notebook2script(all_fs=True)

Converted 00_test.ipynb.
Converted 01_core.ipynb.
Converted 02_data_pipeline.ipynb.
Converted 03_data_source.ipynb.
Converted 04_data_core.ipynb.
Converted 99_export.ipynb.
Converted 99a_export2html.ipynb.
Converted _05_pets_tutorial.ipynb.
Converted _06_data_blocks.ipynb.
