In [None]:
#default_exp utils

In [None]:
#export
from fastcore.imports import *
from fastcore.foundation import *
from functools import wraps

import mimetypes,bz2,pickle,random,json,urllib,subprocess,shlex,bz2,gzip,distutils.util
from contextlib import contextmanager
from pdb import set_trace
from urllib.request import Request,urlopen
from urllib.error import HTTPError
from urllib.parse import urlencode
from threading import Thread

In [None]:
from fastcore.test import *
from nbdev.showdoc import *
from fastcore.nb_imports import *

# Utility functions

> Utility functions used in the fastai library

## Basics

In [None]:
# export
def ifnone(a, b):
    "`b` if `a` is None else `a`"
    return b if a is None else a

Since `b if a is None else a` is such a common pattern, we wrap it in a function. However, be careful, because python will evaluate *both* `a` and `b` when calling `ifnone` (which it doesn't do if using the `if` version directly).

In [None]:
test_eq(ifnone(None,1), 1)
test_eq(ifnone(2   ,1), 2)

In [None]:
#export
def maybe_attr(o, attr):
    "`getattr(o,attr,o)`"
    return getattr(o,attr,o)

Return the attribute `attr` for object `o`.  If the attribute doesn't exist, then return the object `o` instead. 

In [None]:
class myobj: myattr='foo'

test_eq(maybe_attr(myobj, 'myattr'), 'foo')
test_eq(maybe_attr(myobj, 'another_attr'), myobj)

In [None]:
#export
def basic_repr(flds=None):
    if isinstance(flds, str): flds = re.split(', *', flds)
    flds = L(flds)
    def _f(self):
        sig = ', '.join(f'{o}={maybe_attr(getattr(self,o), "__name__")}' for o in flds)
        return f'{self.__class__.__name__}({sig})'
    return _f

Lookup a user-supplied list of attributes (`flds`) of an object and generate a string with the name of each attribute and its corresponding value. The format of this string is `key=value`, where `key` is the name of the attribute, and `value` is the value of the attribute.  For each value, attempt to use the `__name__` attribute, otherwise fall back to using the value's `__repr__` when constructing the string.  

In [None]:
class SomeClass:
    a=1
    b='foo'
    __repr__=basic_repr('a,b')
    __name__='some-class'
    
class AnotherClass:
    c=SomeClass()
    d='bar'
    __repr__=basic_repr(['c', 'd'])
    
sc = SomeClass()    
ac = AnotherClass()

test_eq(repr(sc), 'SomeClass(a=1, b=foo)')
test_eq(repr(ac), 'AnotherClass(c=some-class, d=bar)')

In [None]:
#export
def get_class(nm, *fld_names, sup=None, doc=None, funcs=None, **flds):
    "Dynamically create a class, optionally inheriting from `sup`, containing `fld_names`"
    attrs = {}
    for f in fld_names: attrs[f] = None
    for f in L(funcs): attrs[f.__name__] = f
    for k,v in flds.items(): attrs[k] = v
    sup = ifnone(sup, ())
    if not isinstance(sup, tuple): sup=(sup,)

    def _init(self, *args, **kwargs):
        for i,v in enumerate(args): setattr(self, list(attrs.keys())[i], v)
        for k,v in kwargs.items(): setattr(self,k,v)

    all_flds = [*fld_names,*flds.keys()]
    def _eq(self,b):
        return all([getattr(self,k)==getattr(b,k) for k in all_flds])

    if not sup: attrs['__repr__'] = basic_repr(all_flds)
    attrs['__init__'] = _init
    attrs['__eq__'] = _eq
    res = type(nm, sup, attrs)
    if doc is not None: res.__doc__ = doc
    return res

In [None]:
show_doc(get_class, title_level=4)

<h4 id="get_class" class="doc_header"><code>get_class</code><a href="__main__.py#L2" class="source_link" style="float:right">[source]</a></h4>

> <code>get_class</code>(**`nm`**, **\*`fld_names`**, **`sup`**=*`None`*, **`doc`**=*`None`*, **`funcs`**=*`None`*, **\*\*`flds`**)

Dynamically create a class, optionally inheriting from `sup`, containing `fld_names`

In [None]:
_t = get_class('_t', 'a', b=2)
t = _t()
test_eq(t.a, None)
test_eq(t.b, 2)
t = _t(1, b=3)
test_eq(t.a, 1)
test_eq(t.b, 3)
t = _t(1, 3)
test_eq(t.a, 1)
test_eq(t.b, 3)
test_eq(repr(t), '_t(a=1, b=3)')
test_eq(t, pickle.loads(pickle.dumps(t)))

Most often you'll want to call `mk_class`, since it adds the class to your module. See `mk_class` for more details and examples of use (which also apply to `get_class`).

In [None]:
#export
def mk_class(nm, *fld_names, sup=None, doc=None, funcs=None, mod=None, **flds):
    "Create a class using `get_class` and add to the caller's module"
    if mod is None: mod = sys._getframe(1).f_locals
    res = get_class(nm, *fld_names, sup=sup, doc=doc, funcs=funcs, **flds)
    mod[nm] = res

Any `kwargs` will be added as class attributes, and `sup` is an optional (tuple of) base classes.

In [None]:
mk_class('_t', a=1, sup=GetAttr)
t = _t()
test_eq(t.a, 1)
assert(isinstance(t,GetAttr))

A `__init__` is provided that sets attrs for any `kwargs`, and for any `args` (matching by position to fields), along with a `__repr__` which prints all attrs. The docstring is set to `doc`. You can pass `funcs` which will be added as attrs with the function names.

In [None]:
def foo(self): return 1
mk_class('_t', 'a', sup=GetAttr, doc='test doc', funcs=foo)

t = _t(3, b=2)
test_eq(t.a, 3)
test_eq(t.b, 2)
test_eq(t.foo(), 1)
test_eq(t.__doc__, 'test doc')
t

<__main__._t at 0x7ff6ea4703d0>

In [None]:
#export
def wrap_class(nm, *fld_names, sup=None, doc=None, funcs=None, **flds):
    "Decorator: makes function a method of a new class `nm` passing parameters to `mk_class`"
    def _inner(f):
        mk_class(nm, *fld_names, sup=sup, doc=doc, funcs=L(funcs)+f, mod=f.__globals__, **flds)
        return f
    return _inner

In [None]:
@wrap_class('_t', a=2)
def bar(self,x): return x+1

t = _t()
test_eq(t.a, 2)
test_eq(t.bar(3), 4)

In [None]:
#export
class ignore_exceptions:
    "Context manager to ignore exceptions"
    def __enter__(self): pass
    def __exit__(self, *args): return True

In [None]:
show_doc(ignore_exceptions, title_level=4)

<h4 id="ignore_exceptions" class="doc_header"><code>class</code> <code>ignore_exceptions</code><a href="" class="source_link" style="float:right">[source]</a></h4>

> <code>ignore_exceptions</code>()

Context manager to ignore exceptions

In [None]:
with ignore_exceptions(): 
    # Exception will be ignored
    raise Exception

In [None]:
#export
def exec_local(code, var_name):
    "Call `exec` on `code` and return the var `var_name"
    loc = {}
    exec(code, globals(), loc)
    return loc[var_name]

In [None]:
test_eq(exec_local("a=1", "a"), 1)

In [None]:
#export
def risinstance(types, obj=None):
    "Curried `isinstance` but with args reversed"
    if not obj: return partial(risinstance,types)
    return isinstance(obj, types)

In [None]:
assert risinstance(int, 1)
assert risinstance(int)(1)

## NoOp

These are used when you need a pass-through function.

In [None]:
show_doc(noop, title_level=4)

<h4 id="noop" class="doc_header"><code>noop</code><a href="https://github.com/fastai/fastcore/tree/master/fastcore/imports.py#L35" class="source_link" style="float:right">[source]</a></h4>

> <code>noop</code>(**`x`**=*`None`*, **\*`args`**, **\*\*`kwargs`**)

Do nothing

In [None]:
noop()
test_eq(noop(1),1)

In [None]:
show_doc(noops, title_level=4)

<h4 id="noops" class="doc_header"><code>noops</code><a href="https://github.com/fastai/fastcore/tree/master/fastcore/imports.py#L39" class="source_link" style="float:right">[source]</a></h4>

> <code>noops</code>(**`x`**=*`None`*, **\*`args`**, **\*\*`kwargs`**)

Do nothing (method)

In [None]:
mk_class('_t', foo=noops)
test_eq(_t().foo(1),1)

## Infinite Lists

These lists are useful for things like padding an array or adding index column(s) to arrays.

In [None]:
#export
#hide
class _InfMeta(type):
    @property
    def count(self): return itertools.count()
    @property
    def zeros(self): return itertools.cycle([0])
    @property
    def ones(self):  return itertools.cycle([1])
    @property
    def nones(self): return itertools.cycle([None])

In [None]:
#export
class Inf(metaclass=_InfMeta):
    "Infinite lists"
    pass

In [None]:
show_doc(Inf, title_level=4);

<h4 id="Inf" class="doc_header"><code>class</code> <code>Inf</code><a href="" class="source_link" style="float:right">[source]</a></h4>

> <code>Inf</code>()

Infinite lists

`Inf` defines the following properties:
    
- `count: itertools.count()`
- `zeros: itertools.cycle([0])`
- `ones : itertools.cycle([1])`
- `nones: itertools.cycle([None])`

In [None]:
test_eq([o for i,o in zip(range(5), Inf.count)],
        [0, 1, 2, 3, 4])

test_eq([o for i,o in zip(range(5), Inf.zeros)],
        [0]*5)

test_eq([o for i,o in zip(range(5), Inf.ones)],
        [1]*5)

test_eq([o for i,o in zip(range(5), Inf.nones)],
        [None]*5)

## Operator Functions

In [None]:
#export
_dumobj = object()
def _oper(op,a,b=_dumobj): return (lambda o:op(o,a)) if b is _dumobj else op(a,b)

def _mk_op(nm, mod):
    "Create an operator using `oper` and add to the caller's module"
    op = getattr(operator,nm)
    def _inner(a, b=_dumobj): return _oper(op, a,b)
    _inner.__name__ = _inner.__qualname__ = nm
    _inner.__doc__ = f'Same as `operator.{nm}`, or returns partial if 1 arg'
    mod[nm] = _inner

In [None]:
#export
def in_(x, a):
    "`True` if `x in a`"
    return x in a

operator.in_ = in_

In [None]:
#export
_all_ = ['lt','gt','le','ge','eq','ne','add','sub','mul','truediv','is_','is_not','in_']

In [None]:
#export
for op in ['lt','gt','le','ge','eq','ne','add','sub','mul','truediv','is_','is_not','in_']: _mk_op(op, globals())

In [None]:
# test if element is in another
assert in_('c', ('b', 'c', 'a'))
assert in_(4, [2,3,4,5])
assert in_('t', 'fastai')
test_fail(in_('h', 'fastai'))

# use in_ as a partial
assert in_('fastai')('t')
assert in_([2,3,4,5])(4)
test_fail(in_('fastai')('h'))

In addition to `in_`, the following functions are provided matching the behavior of the equivalent versions in `operator`: *lt gt le ge eq ne add sub mul truediv is_ is_not*.

In [None]:
lt(3,5),gt(3,5),is_(None,None),in_(0,[1,2])

(True, False, True, False)

Similarly to `_in`, they also have additional functionality: if you only pass one param, they return a partial function that passes that param as the second positional parameter.

In [None]:
lt(5)(3),gt(5)(3),is_(None)(None),in_([1,2])(0)

(True, False, True, False)

In [None]:
#export
def true(*args, **kwargs):
    "Predicate: always `True`"
    return True

In [None]:
assert true(1,2,3)
assert true(False)
assert true(None)
assert true([])

In [None]:
#export
def gen(func, seq, cond=true):
    "Like `(func(o) for o in seq if cond(func(o)))` but handles `StopIteration`"
    return itertools.takewhile(cond, map(func,seq))

In [None]:
test_eq(gen(noop, Inf.count, lt(5)),
        range(5))
test_eq(gen(operator.neg, Inf.count, gt(-5)),
        [0,-1,-2,-3,-4])
test_eq(gen(lambda o:o if o<5 else stop(), Inf.count),
        range(5))

In [None]:
#export
def chunked(it, chunk_sz=None, drop_last=False, n_chunks=None):
    "Return batches from iterator `it` of size `chunk_sz` (or return `n_chunks` total)"
    assert bool(chunk_sz) ^ bool(n_chunks)
    if n_chunks: chunk_sz = math.ceil(len(it)/n_chunks)
    if not isinstance(it, Iterator): it = iter(it)
    while True:
        res = list(itertools.islice(it, chunk_sz))
        if res and (len(res)==chunk_sz or not drop_last): yield res
        if len(res)<chunk_sz: return

Note that you must pass either `chunk_sz`, or `n_chunks`, but not both.

In [None]:
t = L.range(10)
test_eq(chunked(t,3),      [[0,1,2], [3,4,5], [6,7,8], [9]])
test_eq(chunked(t,3,True), [[0,1,2], [3,4,5], [6,7,8],    ])

t = map(lambda o:stop() if o==6 else o, Inf.count)
test_eq(chunked(t,3), [[0, 1, 2], [3, 4, 5]])
t = map(lambda o:stop() if o==7 else o, Inf.count)
test_eq(chunked(t,3), [[0, 1, 2], [3, 4, 5], [6]])

t = np.arange(10)
test_eq(chunked(t,3),      L([0,1,2], [3,4,5], [6,7,8], [9]))
test_eq(chunked(t,3,True), L([0,1,2], [3,4,5], [6,7,8],    ))

In [None]:
#export
def otherwise(x, tst, y):
    "`y if tst(x) else x`"
    return y if tst(x) else x

In [None]:
test_eq(otherwise(2+1, gt(3), 4), 3)
test_eq(otherwise(2+1, gt(2), 4), 4)

## Attribute Helpers

These functions reduce boilerplate when setting or manipulating attributes or properties of objects.

In [None]:
#export
class AttrDict(dict):
    "`dict` subclass that also provides access to keys as attrs"
    def __getattr__(self,k): return self[k] if k in self else stop(AttributeError(k))
    def __setattr__(self, k, v): (self.__setitem__,super().__setattr__)[k[0]=='_'](k,v)
    def __dir__(self): return custom_dir(self, list(self.keys()))

In [None]:
show_doc(AttrDict, title_level=4)

<h4 id="AttrDict" class="doc_header"><code>class</code> <code>AttrDict</code><a href="" class="source_link" style="float:right">[source]</a></h4>

> <code>AttrDict</code>() :: `dict`

`dict` subclass that also provides access to keys as attrs

In [None]:
d = AttrDict(a=1,b="two")
test_eq(d.a, 1)
test_eq(d['b'], 'two')
test_eq(d.get('c','nope'), 'nope')
d.b = 2
test_eq(d.b, 2)
test_eq(d['b'], 2)
d['b'] = 3
test_eq(d['b'], 3)
test_eq(d.b, 3)

In [None]:
#export
def dict2obj(d):
    "Convert (possibly nested) dicts (or lists of dicts) to `AttrDict`"
    if isinstance(d, (L,list)): return L(d).map(dict2obj)
    if not isinstance(d, dict): return d
    return AttrDict(**{k:dict2obj(v) for k,v in d.items()})

This is a convenience to give you "dotted" access to (possibly nested) dictionaries, e.g:

In [None]:
d1 = dict(a=1, b=dict(c=2,d=3))
d2 = dict2obj(d1)
test_eq(d2.b.c, 2)
test_eq(d2.b['c'], 2)
d2

{'a': 1, 'b': {'c': 2, 'd': 3}}

It can also be used on lists of dicts.

In [None]:
ds = L(d1, d1)
test_eq(dict2obj(ds)[0].b.c, 2)

In [None]:
#export
def with_cast(f):
    "Decorator which uses any parameter annotations as preprocessing functions"
    anno = f.__annotations__
    params = f.__code__.co_varnames
    def _inner(*args, **kwargs):
        args = list(args)
        for i,v in enumerate(params):
            if v in anno:
                c = anno[v]
                if v in kwargs: kwargs[v] = c(kwargs[v])
                elif i<len(args): args[i] = c(args[i])
        return f(*args, **kwargs)
    return _inner

In [None]:
@with_cast
def _f(a, b:Path, c:str='', d=0)->bool: return (a,b,c,d)

test_eq(_f(1, '.', 3), (1,Path('.'),'3',0))

In [None]:
#export
def _store_attr(self, anno, **attrs):
    for n,v in attrs.items():
        if n in anno: v = anno[n](v)
        setattr(self, n, v)
        self.__stored_args__[n] = v

In [None]:
#export
def store_attr(names=None, self=None, but=None, cast=False, **attrs):
    "Store params named in comma-separated `names` from calling context into attrs in `self`"
    fr = sys._getframe(1)
    args = fr.f_code.co_varnames[:fr.f_code.co_argcount]
    if self: args = ('self', *args)
    else: self = fr.f_locals[args[0]]
    if not hasattr(self, '__stored_args__'): self.__stored_args__ = {}
    anno = self.__class__.__init__.__annotations__ if cast else {}
    if attrs: return _store_attr(self, anno, **attrs)
    ns = re.split(', *', names) if names else args[1:]
    _store_attr(self, anno, **{n:fr.f_locals[n] for n in ns if n not in L(but)})

In it's most basic form, you can use `store_attr` to shorten code like this:

In [None]:
class T:
    def __init__(self, a,b,c): self.a,self.b,self.c = a,b,c

...to this:

In [None]:
class T:
    def __init__(self, a,b,c): store_attr('a,b,c', self)

This class behaves as if we'd used the first form:

In [None]:
t = T(1,c=2,b=3)
assert t.a==1 and t.b==3 and t.c==2

In addition, it stores the attrs as a `dict` in `__stored_args__`, which you can use for display, logging, and so forth.

In [None]:
test_eq(t.__stored_args__, {'a':1, 'b':3, 'c':2})

Since you normally want to use the first argument (often called `self`) for storing attributes, it's optional:

In [None]:
class T:
    def __init__(self, a,b,c:str): store_attr('a,b,c')

t = T(1,c=2,b=3)
assert t.a==1 and t.b==3 and t.c==2

In [None]:
#hide
class _T:
    def __init__(self, a,b):
        c = 2
        store_attr('a,b,c')

t = _T(1,b=3)
assert t.a==1 and t.b==3 and t.c==2

With `cast=True` any parameter annotations will be used as preprocessing functions for the corresponding arguments:

In [None]:
class T:
    def __init__(self, a:L, b, c:str): store_attr('a,b,c', cast=True)

t = T(1,c=2,b=3)
assert t.a==[1] and t.b==3 and t.c=='2'

You can inherit from a class using `store_attr`, and just call it again to add in any new attributes added in the derived class:

In [None]:
class T2(T):
    def __init__(self, d, **kwargs):
        super().__init__(**kwargs)
        store_attr('d')

t = T2(d=1,a=2,b=3,c=4)
assert t.a==2 and t.b==3 and t.c==4 and t.d==1

You can skip passing a list of attrs to store. In this case, all arguments passed to the method are stored:

In [None]:
class T:
    def __init__(self, a,b,c): store_attr()

t = T(1,c=2,b=3)
assert t.a==1 and t.b==3 and t.c==2

In [None]:
class T4(T):
    def __init__(self, d, **kwargs):
        super().__init__(**kwargs)
        store_attr()

t = T4(4, a=1,c=2,b=3)
assert t.a==1 and t.b==3 and t.c==2 and t.d==4

In [None]:
#hide
# ensure that subclasses work with or without `store_attr`
class T4(T):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        store_attr()

t = T4(a=1,c=2,b=3)
assert t.a==1 and t.b==3 and t.c==2

class T4(T): pass

t = T4(a=1,c=2,b=3)
assert t.a==1 and t.b==3 and t.c==2

You can skip some attrs by passing `but`:

In [None]:
class T:
    def __init__(self, a,b,c): store_attr(but=['a'])

t = T(1,c=2,b=3)
assert t.b==3 and t.c==2
assert not hasattr(t,'a')

You can also pass keywords to `store_attr`, which is identical to setting the attrs directly, but also stores them in `__stored_args__`.

In [None]:
class T:
    def __init__(self): store_attr(a=1)

t = T()
assert t.a==1

You can also use store_attr inside functions.

In [None]:
def create_T(a, b):
    t = SimpleNamespace()
    store_attr(self=t)
    return t

t = create_T(a=1, b=2)
assert t.a==1 and t.b==2

In [None]:
#export
def attrdict(o, *ks):
    "Dict from each `k` in `ks` to `getattr(o,k)`"
    return {k:getattr(o,k) for k in ks}

In [None]:
class T:
    def __init__(self, a,b,c): store_attr()

t = T(1,c=2,b=3)
test_eq(attrdict(t,'b','c'), {'b':3, 'c':2})

In [None]:
#export
def properties(cls, *ps):
    "Change attrs in `cls` with names in `ps` to properties"
    for p in ps: setattr(cls,p,property(getattr(cls,p)))

In [None]:
class T:
    def a(self): return 1
    def b(self): return 2
properties(T,'a')

test_eq(T().a,1)
test_eq(T().b(),2)

In [None]:
#export
_camel_re1 = re.compile('(.)([A-Z][a-z]+)')
_camel_re2 = re.compile('([a-z0-9])([A-Z])')

In [None]:
#export
def camel2snake(name):
    "Convert CamelCase to snake_case"
    s1   = re.sub(_camel_re1, r'\1_\2', name)
    return re.sub(_camel_re2, r'\1_\2', s1).lower()

In [None]:
test_eq(camel2snake('ClassAreCamel'), 'class_are_camel')
test_eq(camel2snake('Already_Snake'), 'already__snake')

In [None]:
#export
def snake2camel(s):
    "Convert snake_case to CamelCase"
    return ''.join(s.title().split('_'))

In [None]:
test_eq(snake2camel('a_b_cc'), 'ABCc')

In [None]:
#export
def class2attr(self, cls_name):
    "Return the snake-cased name of the class; strip ending `cls_name` if it exists."
    return camel2snake(re.sub(rf'{cls_name}$', '', self.__class__.__name__) or cls_name.lower())

In [None]:
class Parent:
    @property
    def name(self): return class2attr(self, 'Parent')

class ChildOfParent(Parent): pass
class ParentChildOf(Parent): pass

p = Parent()
cp = ChildOfParent()
cp2 = ParentChildOf()

test_eq(p.name, 'parent')
test_eq(cp.name, 'child_of')
test_eq(cp2.name, 'parent_child_of')

In [None]:
#export
def hasattrs(o,attrs):
    "Test whether `o` contains all `attrs`"
    return all(hasattr(o,attr) for attr in attrs)

In [None]:
assert hasattrs(1,('imag','real'))
assert not hasattrs(1,('imag','foo'))

In [None]:
#export
def setattrs(dest, flds, src):
    f = dict.get if isinstance(src, dict) else getattr
    flds = re.split(r",\s*", flds)
    for fld in flds: setattr(dest, fld, f(src, fld))

In [None]:
d = dict(a=1,bb="2",ignore=3)
o = SimpleNamespace()
setattrs(o, "a,bb", d)
test_eq(o.a, 1)
test_eq(o.bb, "2")

In [None]:
d = SimpleNamespace(a=1,bb="2",ignore=3)
o = SimpleNamespace()
setattrs(o, "a,bb", d)
test_eq(o.a, 1)
test_eq(o.bb, "2")

## Extensible Types

`ShowPrint` is a base class that defines a `show` method, which is used primarily for callbacks in fastai that expect this method to be defined.

In [None]:
#export
#hide
class ShowPrint:
    "Base class that prints for `show`"
    def show(self, *args, **kwargs): print(str(self))

`Int`, `Float`, and `Str` extend `int`, `float` and `str` respectively by adding an additional `show` method by inheriting from `ShowPrint`.

The code for `Int` is shown below:

In [None]:
#export
#hide
class Int(int,ShowPrint): pass

In [None]:
#export 
#hide
class Str(str,ShowPrint): pass
class Float(float,ShowPrint): pass
add_docs(Str, "An extensible `str`");
add_docs(Int, "An extensible `int`");
add_docs(Float, "An extensible `float`")

Examples:

In [None]:
Int(0).show()
Float(2.0).show()
Str('Hello').show()

0
2.0
Hello


## Collection functions

Functions that manipulate popular python collections.

In [None]:
#export
def tuplify(o, use_list=False, match=None):
    "Make `o` a tuple"
    return tuple(L(o, use_list=use_list, match=match))

In [None]:
test_eq(tuplify(None),())
test_eq(tuplify([1,2,3]),(1,2,3))
test_eq(tuplify(1,match=[1,2,3]),(1,1,1))

In [None]:
#export
def detuplify(x):
    "If `x` is a tuple with one thing, extract it"
    return None if len(x)==0 else x[0] if len(x)==1 and getattr(x, 'ndim', 1)==1 else x

In [None]:
test_eq(detuplify(()),None)
test_eq(detuplify([1]),1)
test_eq(detuplify([1,2]), [1,2])
test_eq(detuplify(np.array([[1,2]])), np.array([[1,2]]))

In [None]:
#export
def replicate(item,match):
    "Create tuple of `item` copied `len(match)` times"
    return (item,)*len(match)

In [None]:
t = [1,1]
test_eq(replicate([1,2], t),([1,2],[1,2]))
test_eq(replicate(1, t),(1,1))

In [None]:
#export
def uniqueify(x, sort=False, bidir=False, start=None):
    "Return the unique elements in `x`, optionally `sort`-ed, optionally return the reverse correspondence, optionally prepended with a list or tuple of elements."
    res = L(x).unique()
    if start is not None: res = start+res
    if sort: res.sort()
    if bidir: return res, res.val2idx()
    return res

In [None]:
# test
test_eq(set(uniqueify([1,1,0,5,0,3])),{0,1,3,5})
test_eq(uniqueify([1,1,0,5,0,3], sort=True),[0,1,3,5])
test_eq(uniqueify([1,1,0,5,0,3], start=[7,8,6]), [7,8,6,1,0,5,3])
v,o = uniqueify([1,1,0,5,0,3], bidir=True)
test_eq(v,[1,0,5,3])
test_eq(o,{1:0, 0: 1, 5: 2, 3: 3})
v,o = uniqueify([1,1,0,5,0,3], sort=True, bidir=True)
test_eq(v,[0,1,3,5])
test_eq(o,{0:0, 1: 1, 3: 2, 5: 3})

In [None]:
# export
def setify(o): 
    "Turn any list like-object into a set."
    return o if isinstance(o,set) else set(L(o))

In [None]:
# test
test_eq(setify(None),set())
test_eq(setify('abc'),{'abc'})
test_eq(setify([1,2,2]),{1,2})
test_eq(setify(range(0,3)),{0,1,2})
test_eq(setify({1,2}),{1,2})

In [None]:
#export
def merge(*ds):
    "Merge all dictionaries in `ds`"
    return {k:v for d in ds if d is not None for k,v in d.items()}

In [None]:
test_eq(merge(), {})
test_eq(merge(dict(a=1,b=2)), dict(a=1,b=2))
test_eq(merge(dict(a=1,b=2), dict(b=3,c=4), None), dict(a=1, b=3, c=4))

In [None]:
#export
def is_listy(x):
    "`isinstance(x, (tuple,list,L,slice,Generator))`"
    return isinstance(x, (tuple,list,L,slice,Generator))

In [None]:
assert is_listy((1,))
assert is_listy([1])
assert is_listy(L([1]))
assert is_listy(slice(2))
assert not is_listy(array([1]))

In [None]:
#export
def range_of(x):
    "All indices of collection `x` (i.e. `list(range(len(x)))`)"
    return list(range(len(x)))

In [None]:
test_eq(range_of([1,1,1,1]), [0,1,2,3])

In [None]:
#export
def groupby(x, key):
    "Like `itertools.groupby` but doesn't need to be sorted, and isn't lazy"
    res = {}
    for o in x: res.setdefault(key(o), []).append(o)
    return res

In [None]:
test_eq(groupby('aa ab bb'.split(), itemgetter(0)), {'a':['aa','ab'], 'b':['bb']})

In [None]:
#export
def last_index(x, o):
    "Finds the last index of occurence of `x` in `o` (returns -1 if no occurence)"
    try: return next(i for i in reversed(range(len(o))) if o[i] == x)
    except StopIteration: return -1

In [None]:
test_eq(last_index(9, [1, 2, 9, 3, 4, 9, 10]), 5)
test_eq(last_index(6, [1, 2, 9, 3, 4, 9, 10]), -1)

In [None]:
#export
def shufflish(x, pct=0.04):
    "Randomly relocate items of `x` up to `pct` of `len(x)` from their starting location"
    n = len(x)
    return L(x[i] for i in sorted(range_of(x), key=lambda o: o+n*(1+random.random()*pct)))

In [None]:
l = list(range(100))
l2 = array(shufflish(l))
test_close(l2[:50 ].mean(), 25, eps=5)
test_close(l2[-50:].mean(), 75, eps=5)
test_ne(l,l2)

## Reindexing Collections

In [None]:
#export
#hide
class IterLen:
    "Base class to add iteration to anything supporting `__len__` and `__getitem__`"
    def __iter__(self): return (self[i] for i in range_of(self))

In [None]:
#export
@docs
class ReindexCollection(GetAttr, IterLen):
    "Reindexes collection `coll` with indices `idxs` and optional LRU cache of size `cache`"
    _default='coll'
    def __init__(self, coll, idxs=None, cache=None, tfm=noop):
        if idxs is None: idxs = L.range(coll) 
        store_attr()
        if cache is not None: self._get = functools.lru_cache(maxsize=cache)(self._get)

    def _get(self, i): return self.tfm(self.coll[i])
    def __getitem__(self, i): return self._get(self.idxs[i])
    def __len__(self): return len(self.coll)
    def reindex(self, idxs): self.idxs = idxs
    def shuffle(self): random.shuffle(self.idxs)
    def cache_clear(self): self._get.cache_clear()
    def __getstate__(self): return {'coll': self.coll, 'idxs': self.idxs, 'cache': self.cache, 'tfm': self.tfm}
    def __setstate__(self, s): self.coll,self.idxs,self.cache,self.tfm = s['coll'],s['idxs'],s['cache'],s['tfm']

    _docs = dict(reindex="Replace `self.idxs` with idxs",
                shuffle="Randomly shuffle indices",
                cache_clear="Clear LRU cache")

In [None]:
show_doc(ReindexCollection, title_level=4)

<h4 id="ReindexCollection" class="doc_header"><code>class</code> <code>ReindexCollection</code><a href="" class="source_link" style="float:right">[source]</a></h4>

> <code>ReindexCollection</code>(**`coll`**, **`idxs`**=*`None`*, **`cache`**=*`None`*, **`tfm`**=*`noop`*) :: [`GetAttr`](/foundation.html#GetAttr)

Reindexes collection `coll` with indices `idxs` and optional LRU cache of size `cache`

This is useful when constructing batches or organizing data in a particular manner (i.e. for deep learning).  This class is primarly used in organizing data for language models in fastai.

#### Reindexing

You can supply a custom index upon instantiation with the `idxs` argument, or you can call the `reindex` method to supply a new index for your collection.

Here is how you can reindex a list such that the elements are reversed:

In [None]:
rc=ReindexCollection(['a', 'b', 'c', 'd', 'e'], idxs=[4,3,2,1,0])
list(rc)

['e', 'd', 'c', 'b', 'a']

Alternatively, you can use the `reindex` method:

In [None]:
show_doc(ReindexCollection.reindex, title_level=6)

<h6 id="ReindexCollection.reindex" class="doc_header"><code>ReindexCollection.reindex</code><a href="__main__.py#L14" class="source_link" style="float:right">[source]</a></h6>

> <code>ReindexCollection.reindex</code>(**`idxs`**)

Replace `self.idxs` with idxs

In [None]:
rc=ReindexCollection(['a', 'b', 'c', 'd', 'e'])
rc.reindex([4,3,2,1,0])
list(rc)

['e', 'd', 'c', 'b', 'a']

#### LRU Cache

You can optionally specify a LRU cache, which uses [functools.lru_cache](https://docs.python.org/3/library/functools.html#functools.lru_cache) upon instantiation:

In [None]:
sz = 50
t = ReindexCollection(L.range(sz), cache=2)

#trigger a cache hit by indexing into the same element multiple times
t[0], t[0]
t._get.cache_info()

CacheInfo(hits=1, misses=1, maxsize=2, currsize=1)

You can optionally clear the LRU cache by calling the `cache_clear` method:

In [None]:
show_doc(ReindexCollection.cache_clear, title_level=5)

<h5 id="ReindexCollection.cache_clear" class="doc_header"><code>ReindexCollection.cache_clear</code><a href="__main__.py#L16" class="source_link" style="float:right">[source]</a></h5>

> <code>ReindexCollection.cache_clear</code>()

Clear LRU cache

In [None]:
sz = 50
t = ReindexCollection(L.range(sz), cache=2)

#trigger a cache hit by indexing into the same element multiple times
t[0], t[0]
t.cache_clear()
t._get.cache_info()

CacheInfo(hits=0, misses=0, maxsize=2, currsize=0)

In [None]:
show_doc(ReindexCollection.shuffle, title_level=5)

<h5 id="ReindexCollection.shuffle" class="doc_header"><code>ReindexCollection.shuffle</code><a href="__main__.py#L15" class="source_link" style="float:right">[source]</a></h5>

> <code>ReindexCollection.shuffle</code>()

Randomly shuffle indices

Note that an ordered index is automatically constructed for the data structure even if one is not supplied.

In [None]:
rc=ReindexCollection(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'])
rc.shuffle()
list(rc)

['c', 'a', 'f', 'g', 'h', 'd', 'e', 'b']

#### Tests

In [None]:
sz = 50
t = ReindexCollection(L.range(sz), cache=2)
test_eq(list(t), range(sz))
test_eq(t[sz-1], sz-1)
test_eq(t._get.cache_info().hits, 1)
t.shuffle()
test_eq(t._get.cache_info().hits, 1)
test_ne(list(t), range(sz))
test_eq(set(t), set(range(sz)))
t.cache_clear()
test_eq(t._get.cache_info().hits, 0)
test_eq(t.count(0), 1)

In [None]:
#hide
#Test ReindexCollection pickles
t1 = pickle.loads(pickle.dumps(t))
test_eq(list(t), list(t1))

In [None]:
#export
num_methods = """
    __add__ __sub__ __mul__ __matmul__ __truediv__ __floordiv__ __mod__ __divmod__ __pow__
    __lshift__ __rshift__ __and__ __xor__ __or__ __neg__ __pos__ __abs__
""".split()
rnum_methods = """
    __radd__ __rsub__ __rmul__ __rmatmul__ __rtruediv__ __rfloordiv__ __rmod__ __rdivmod__
    __rpow__ __rlshift__ __rrshift__ __rand__ __rxor__ __ror__
""".split()
inum_methods = """
    __iadd__ __isub__ __imul__ __imatmul__ __itruediv__
    __ifloordiv__ __imod__ __ipow__ __ilshift__ __irshift__ __iand__ __ixor__ __ior__
""".split()

## fastuple

A tuple with extended functionality.

In [None]:
#export
class fastuple(tuple):
    "A `tuple` with elementwise ops and more friendly __init__ behavior"
    def __new__(cls, x=None, *rest):
        if x is None: x = ()
        if not isinstance(x,tuple):
            if len(rest): x = (x,)
            else:
                try: x = tuple(iter(x))
                except TypeError: x = (x,)
        return super().__new__(cls, x+rest if rest else x)

    def _op(self,op,*args):
        if not isinstance(self,fastuple): self = fastuple(self)
        return type(self)(map(op,self,*map(cycle, args)))

    def mul(self,*args):
        "`*` is already defined in `tuple` for replicating, so use `mul` instead"
        return fastuple._op(self, operator.mul,*args)

    def add(self,*args):
        "`+` is already defined in `tuple` for concat, so use `add` instead"
        return fastuple._op(self, operator.add,*args)

def _get_op(op):
    if isinstance(op,str): op = getattr(operator,op)
    def _f(self,*args): return self._op(op,*args)
    return _f

for n in num_methods:
    if not hasattr(fastuple, n) and hasattr(operator,n): setattr(fastuple,n,_get_op(n))

for n in 'eq ne lt le gt ge'.split(): setattr(fastuple,n,_get_op(n))
setattr(fastuple,'__invert__',_get_op('__not__'))
setattr(fastuple,'max',_get_op(max))
setattr(fastuple,'min',_get_op(min))

In [None]:
show_doc(fastuple, title_level=4)

<h4 id="fastuple" class="doc_header"><code>class</code> <code>fastuple</code><a href="" class="source_link" style="float:right">[source]</a></h4>

> <code>fastuple</code>(**`x`**=*`None`*, **\*`rest`**) :: `tuple`

A `tuple` with elementwise ops and more friendly __init__ behavior

#### Friendly init behavior

Common failure modes when trying to initialize a tuple in python:

```py
tuple(3)
> TypeError: 'int' object is not iterable
```

or 

```py
tuple(3, 4)
> TypeError: tuple expected at most 1 arguments, got 2
```

However, `fastuple` allows you to define tuples like this and in the usual way:

In [None]:
test_eq(fastuple(3), (3,))
test_eq(fastuple(3,4), (3, 4))
test_eq(fastuple((3,4)), (3, 4))

#### Elementwise operations

In [None]:
show_doc(fastuple.add, title_level=5)

<h5 id="fastuple.add" class="doc_header"><code>fastuple.add</code><a href="__main__.py#L21" class="source_link" style="float:right">[source]</a></h5>

> <code>fastuple.add</code>(**\*`args`**)

`+` is already defined in `tuple` for concat, so use `add` instead

In [None]:
test_eq(fastuple.add((1,1),(2,2)), (3,3))
test_eq_type(fastuple(1,1).add(2), fastuple(3,3))
test_eq(fastuple('1','2').add('2'), fastuple('12','22'))

In [None]:
show_doc(fastuple.mul, title_level=5)

<h5 id="fastuple.mul" class="doc_header"><code>fastuple.mul</code><a href="__main__.py#L17" class="source_link" style="float:right">[source]</a></h5>

> <code>fastuple.mul</code>(**\*`args`**)

`*` is already defined in `tuple` for replicating, so use `mul` instead

In [None]:
test_eq_type(fastuple(1,1).mul(2), fastuple(2,2))

#### Other Elementwise Operations

Additionally, the following elementwise operations are available:
- `le`: less than
- `eq`: equal
- `gt`: greater than
- `min`: minimum of

In [None]:
test_eq(fastuple(3,1).le(1), (False, True))
test_eq(fastuple(3,1).eq(1), (False, True))
test_eq(fastuple(3,1).gt(1), (True, False))
test_eq(fastuple(3,1).min(2), (2,1))

You can also do other elemntwise operations like negate a `fastuple`, or subtract two `fastuple`s:

In [None]:
test_eq(-fastuple(1,2), (-1,-2))
test_eq(~fastuple(1,0,1), (False,True,False))

test_eq(fastuple(1,1)-fastuple(2,2), (-1,-1))

#### Other Tests

In [None]:
test_eq(type(fastuple(1)), fastuple)
test_eq_type(fastuple(1,2), fastuple(1,2))
test_ne(fastuple(1,2), fastuple(1,3))
test_eq(fastuple(), ())

## Functions on Functions

Utilities for functional programming or for defining, modifying, or debugging functions. 

In [None]:
#export
def trace(f):
    "Add `set_trace` to an existing function `f`"
    if getattr(f, '_traced', False): return f
    def _inner(*args,**kwargs):
        set_trace()
        return f(*args,**kwargs)
    _inner._traced = True
    return _inner

You can add a breakpoint to an existing function, e.g:

```python
Path.cwd = trace(Path.cwd)
Path.cwd()
```

Now, when the function is called it will drop you into the debugger.  Note, you must issue  the `s` command when you begin to step into the function that is being traced.

In [None]:
# export
def compose(*funcs, order=None):
    "Create a function that composes all functions in `funcs`, passing along remaining `*args` and `**kwargs` to all"
    funcs = L(funcs)
    if len(funcs)==0: return noop
    if len(funcs)==1: return funcs[0]
    if order is not None: funcs = funcs.sorted(order)
    def _inner(x, *args, **kwargs):
        for f in L(funcs): x = f(x, *args, **kwargs)
        return x
    return _inner

In [None]:
f1 = lambda o,p=0: (o*2)+p
f2 = lambda o,p=1: (o+1)/p
test_eq(f2(f1(3)), compose(f1,f2)(3))
test_eq(f2(f1(3,p=3),p=3), compose(f1,f2)(3,p=3))
test_eq(f2(f1(3,  3),  3), compose(f1,f2)(3,  3))

f1.order = 1
test_eq(f1(f2(3)), compose(f1,f2, order="order")(3))

In [None]:
#export
def maps(*args, retain=noop):
    "Like `map`, except funcs are composed first"
    f = compose(*args[:-1])
    def _f(b): return retain(f(b), b)
    return map(_f, args[-1])

In [None]:
test_eq(maps([1]), [1])
test_eq(maps(operator.neg, [1,2]), [-1,-2])
test_eq(maps(operator.neg, operator.neg, [1,2]), [1,2])

In [None]:
#export
def partialler(f, *args, order=None, **kwargs):
    "Like `functools.partial` but also copies over docstring"
    fnew = partial(f,*args,**kwargs)
    fnew.__doc__ = f.__doc__
    if order is not None: fnew.order=order
    elif hasattr(f,'order'): fnew.order=f.order
    return fnew

In [None]:
def _f(x,a=1):
    "test func"
    return x-a
_f.order=1

f = partialler(_f, 2)
test_eq(f.order, 1)
test_eq(f(3), -1)
f = partialler(_f, a=2, order=3)
test_eq(f.__doc__, "test func")
test_eq(f.order, 3)
test_eq(f(3), _f(3,2))

In [None]:
class partial0:
    "Like `partialler`, but args passed to callable are inserted at started, instead of at end"
    def __init__(self, f, *args, order=None, **kwargs):
        self.f,self.args,self.kwargs = f,args,kwargs
        self.order = ifnone(order, getattr(f,'order',None))
        self.__doc__ = f.__doc__

    def __call__(self, *args, **kwargs): return self.f(*args, *self.args, **kwargs, **self.kwargs)

In [None]:
f = partial0(_f, 2)
test_eq(f.order, 1)
test_eq(f(3), 1) # NB: different to `partialler` example

In [None]:
#export
def mapped(f, it):
    "map `f` over `it`, unless it's not listy, in which case return `f(it)`"
    return L(it).map(f) if is_listy(it) else f(it)

In [None]:
test_eq(mapped(_f,1),0)
test_eq(mapped(_f,[1,2]),[0,1])
test_eq(mapped(_f,(1,)),(0,))

In [None]:
#export
def instantiate(t):
    "Instantiate `t` if it's a type, otherwise do nothing"
    return t() if isinstance(t, type) else t

In [None]:
test_eq_type(instantiate(int), 0)
test_eq_type(instantiate(1), 1)

In [None]:
#export
def _using_attr(f, attr, x): return f(getattr(x,attr))

In [None]:
#export
def using_attr(f, attr):
    "Change function `f` to operate on `attr`"
    return partial(_using_attr, f, attr)

In [None]:
t = Path('/a/b.txt')
f = using_attr(str.upper, 'name')
test_eq(f(t), 'B.TXT')

### Self (with an _uppercase_ S)

A Concise Way To Create Lambdas

In [None]:
#export
class _Self:
    "An alternative to `lambda` for calling methods on passed object."
    def __init__(self): self.nms,self.args,self.kwargs,self.ready = [],[],[],True
    def __repr__(self): return f'self: {self.nms}({self.args}, {self.kwargs})'

    def __call__(self, *args, **kwargs):
        if self.ready:
            x = args[0]
            for n,a,k in zip(self.nms,self.args,self.kwargs):
                x = getattr(x,n)
                if callable(x) and a is not None: x = x(*a, **k)
            return x
        else:
            self.args.append(args)
            self.kwargs.append(kwargs)
            self.ready = True
            return self

    def __getattr__(self,k):
        if not self.ready:
            self.args.append(None)
            self.kwargs.append(None)
        self.nms.append(k)
        self.ready = False
        return self

In [None]:
#export
class _SelfCls:
    def __getattr__(self,k): return getattr(_Self(),k)
    def __getitem__(self,i): return self.__getattr__('__getitem__')(i)

Self = _SelfCls()

In [None]:
#export
_all_ = ['Self']

This is a concise way to create lambdas that are calling methods on an object (note the capitalization!)

`Self.sum()`, for instance, is a shortcut for `lambda o: o.sum()`.

In [None]:
f = Self.sum()
x = array([3.,1])
test_eq(f(x), 4.)

# This is equivalent to above
f = lambda o: o.sum()
x = array([3.,1])
test_eq(f(x), 4.)

f = Self.argmin()
arr = np.array([1,2,3,4,5])
test_eq(f(arr), arr.argmin())

f = Self.sum().is_integer()
x = array([3.,1])
test_eq(f(x), True)

f = Self.sum().real.is_integer()
x = array([3.,1])
test_eq(f(x), True)

f = Self.imag()
test_eq(f(3), 0)

f = Self[1]
test_eq(f(x), 1)

## Extensions to Pathlib.Path

An extension of the standard python libary [Pathlib.Path](https://docs.python.org/3/library/pathlib.html#basic-use).  These extensions are accomplished by monkey patching additional methods onto `Pathlib.Path`.

In [None]:
#export
@patch
def readlines(self:Path, hint=-1, encoding='utf8'):
    "Read the content of `self`"
    with self.open(encoding=encoding) as f: return f.readlines(hint)

In [None]:
#export
@patch
def mk_write(self:Path, data, encoding=None, errors=None, mode=511):
    "Make all parent dirs of `self`"
    self.parent.mkdir(exist_ok=True, parents=True, mode=mode)
    self.write_text(data, encoding=encoding, errors=errors)

In [None]:
#export
@patch
def ls(self:Path, n_max=None, file_type=None, file_exts=None):
    "Contents of path as a list"
    extns=L(file_exts)
    if file_type: extns += L(k for k,v in mimetypes.types_map.items() if v.startswith(file_type+'/'))
    has_extns = len(extns)==0
    res = (o for o in self.iterdir() if has_extns or o.suffix in extns)
    if n_max is not None: res = itertools.islice(res, n_max)
    return L(res)

We add an `ls()` method to `pathlib.Path` which is simply defined as `list(Path.iterdir())`, mainly for convenience in REPL environments such as notebooks.

In [None]:
path = Path()
t = path.ls()
assert len(t)>0
t1 = path.ls(10)
test_eq(len(t1), 10)
t2 = path.ls(file_exts='.ipynb')
assert len(t)>len(t2)
t[0]

Path('.gitattributes')

You can also pass an optional `file_type` MIME prefix and/or a list of file extensions.

In [None]:
lib_path = (path/'../fastcore')
txt_files=lib_path.ls(file_type='text')
assert len(txt_files) > 0 and txt_files[0].suffix=='.py'
ipy_files=path.ls(file_exts=['.ipynb'])
assert len(ipy_files) > 0 and ipy_files[0].suffix=='.ipynb'
txt_files[0],ipy_files[0]

(Path('../fastcore/all.py'), Path('00_test.ipynb'))

In [None]:
#hide
path = Path()
pkl = pickle.dumps(path)
p2 = pickle.loads(pkl)
test_eq(path.ls()[0], p2.ls()[0])

In [None]:
#export
def open_file(fn, mode='r'):
    "Open a file, with optional compression if gz or bz2 suffix"
    if isinstance(fn, io.IOBase): return fn
    fn = Path(fn)
    if   fn.suffix=='.bz2': return bz2.BZ2File(fn, mode)
    elif fn.suffix=='.gz' : return gzip.GzipFile(fn, mode)
    else: return open(fn,mode)

In [None]:
#export
def save_pickle(fn, o):
    "Save a pickle file, to a file name or opened file"
    with open_file(fn, 'wb') as f: pickle.dump(o, f)

In [None]:
#export
def load_pickle(fn):
    "Load a pickle file from a file name or opened file"
    with open_file(fn, 'rb') as f: return pickle.load(f)

In [None]:
for suf in '.pkl','.bz2','.gz':
    with tempfile.NamedTemporaryFile(suffix=suf) as f:
        fn = Path(f.name)
        save_pickle(fn, 't')
        t = load_pickle(fn)
    test_eq(t,'t')

In [None]:
#export
@patch
def __repr__(self:Path):
    b = getattr(Path, 'BASE_PATH', None)
    if b:
        try: self = self.relative_to(b)
        except: pass
    return f"Path({self.as_posix()!r})"

fastai also updates the `repr` of `Path` such that, if `Path.BASE_PATH` is defined, all paths are printed relative to that path (as long as they are contained in `Path.BASE_PATH`:

In [None]:
t = ipy_files[0].absolute()
try:
    Path.BASE_PATH = t.parent.parent
    test_eq(repr(t), f"Path('nbs/{t.name}')")
finally: Path.BASE_PATH = None

## File Functions

Utilities (other than extensions to Pathlib.Path) for dealing with IO.

In [None]:
#export
def bunzip(fn):
    "bunzip `fn`, raising exception if output already exists"
    fn = Path(fn)
    assert fn.exists(), f"{fn} doesn't exist"
    out_fn = fn.with_suffix('')
    assert not out_fn.exists(), f"{out_fn} already exists"
    with bz2.BZ2File(fn, 'rb') as src, out_fn.open('wb') as dst:
        for d in iter(lambda: src.read(1024*1024), b''): dst.write(d)

In [None]:
f = Path('files/test.txt')
if f.exists(): f.unlink()
bunzip('files/test.txt.bz2')
t = f.open().readlines()
test_eq(len(t),1)
test_eq(t[0], 'test\n')
f.unlink()

In [None]:
#export
def join_path_file(file, path, ext=''):
    "Return `path/file` if file is a string or a `Path`, file otherwise"
    if not isinstance(file, (str, Path)): return file
    path.mkdir(parents=True, exist_ok=True)
    return path/f'{file}{ext}'

In [None]:
path = Path.cwd()/'_tmp'/'tst'
f = join_path_file('tst.txt', path)
assert path.exists()
test_eq(f, path/'tst.txt')
with open(f, 'w') as f_: assert join_path_file(f_, path) == f_
shutil.rmtree(Path.cwd()/'_tmp')

In [None]:
#export
def urlread(url, data=None, **kwargs):
    "Retrieve `url`, using `data` dict or `kwargs` to `POST` if present"
    if kwargs and not data: data=kwargs
    if data is not None:
        if not isinstance(data, (str,bytes)): data = urlencode(data)
        if not isinstance(data, bytes): data = data.encode('ascii')
    cls = urllib.request.Request
    if not isinstance(url,cls): url = cls(url)
    url.headers['User-Agent'] = 'Mozilla/5.0'
    with urlopen(url, data=data) as res: return res.read()

In [None]:
#export
def urljson(url, data=None):
    "Retrieve `url` and decode json"
    return json.loads(urlread(url, data=data))

In [None]:
#export
def run(cmd, *rest, ignore_ex=False, as_bytes=False):
    "Pass `cmd` (splitting with `shlex` if string) to `subprocess.run`; return `stdout`; raise `IOError` if fails"
    if rest: cmd = (cmd,)+rest
    elif isinstance(cmd,str): cmd = shlex.split(cmd)
    res = subprocess.run(cmd, capture_output=True)
    stdout = res.stdout
    if not as_bytes: stdout = stdout.decode()
    if ignore_ex: return (res.returncode, stdout)
    if res.returncode: raise IOError("{} ;; {}".format(res.stdout, res.stderr))
    return stdout

You can pass a string (which will be split based on standard shell rules), a list, or pass args directly:

In [None]:
assert 'ipynb' in run('ls -l')
assert 'ipynb' in run(['ls', '-l'])
assert 'ipynb' in run('ls', '-l')

Some commands fail in non-error situations, like `grep`. Use `ignore_ex` in those cases, which will return a tuple of stdout and returncode:

In [None]:
test_eq(run('grep asdfds 00_test.ipynb', ignore_ex=True)[0], 1)

`run` automatically decodes returned bytes to a `str`. Use `as_bytes` to skip that:

In [None]:
test_eq(run('echo hi', as_bytes=True), b'hi\n')

In [None]:
#export
def do_request(url, post=False, headers=None, **data):
    "Call GET or json-encoded POST on `url`, depending on `post`"
    if data:
        if post: data = json.dumps(data).encode('ascii')
        else:
            url += "?" + urlencode(data)
            data = None
    return urljson(Request(url, headers=headers, data=data or None))

## Sorting Objects From Before/After

Transforms and callbacks will have run_after/run_before attributes, this function will sort them to respect those requirements (if it's possible). Also, sometimes we want a tranform/callback to be run at the end, but still be able to use run_after/run_before behaviors. For those, the function checks for a toward_end attribute (that needs to be True).

In [None]:
#export
def _is_instance(f, gs):
    tst = [g if type(g) in [type, 'function'] else g.__class__ for g in gs]
    for g in tst:
        if isinstance(f, g) or f==g: return True
    return False

def _is_first(f, gs):
    for o in L(getattr(f, 'run_after', None)):
        if _is_instance(o, gs): return False
    for g in gs:
        if _is_instance(f, L(getattr(g, 'run_before', None))): return False
    return True

def sort_by_run(fs):
    end = L(fs).attrgot('toward_end')
    inp,res = L(fs)[~end] + L(fs)[end], L()
    while len(inp):
        for i,o in enumerate(inp):
            if _is_first(o, inp):
                res.append(inp.pop(i))
                break
        else: raise Exception("Impossible to sort")
    return res

In [None]:
class Tst(): pass    
class Tst1():
    run_before=[Tst]
class Tst2():
    run_before=Tst
    run_after=Tst1
    
tsts = [Tst(), Tst1(), Tst2()]
test_eq(sort_by_run(tsts), [tsts[1], tsts[2], tsts[0]])

Tst2.run_before,Tst2.run_after = Tst1,Tst
test_fail(lambda: sort_by_run([Tst(), Tst1(), Tst2()]))

def tst1(x): return x
tst1.run_before = Tst
test_eq(sort_by_run([tsts[0], tst1]), [tst1, tsts[0]])
    
class Tst1():
    toward_end=True
class Tst2():
    toward_end=True
    run_before=Tst1
tsts = [Tst(), Tst1(), Tst2()]
test_eq(sort_by_run(tsts), [tsts[0], tsts[2], tsts[1]])

## Other Helpers

In [None]:
#export
class PrettyString(str):
    "Little hack to get strings to show properly in Jupyter."
    def __repr__(self): return self

In [None]:
show_doc(PrettyString, title_level=4)

<h4 id="PrettyString" class="doc_header"><code>class</code> <code>PrettyString</code><a href="" class="source_link" style="float:right">[source]</a></h4>

> <code>PrettyString</code>() :: `str`

Little hack to get strings to show properly in Jupyter.

Allow strings with special characters to render properly in Jupyter.  Without calling `print()` strings with special characters are displayed like so:

In [None]:
with_special_chars='a string\nwith\nnew\nlines and\ttabs'
with_special_chars

'a string\nwith\nnew\nlines and\ttabs'

We can correct this with `PrettyString`:

In [None]:
PrettyString(with_special_chars)

a string
with
new
lines and	tabs

In [None]:
#export
def round_multiple(x, mult, round_down=False):
    "Round `x` to nearest multiple of `mult`"
    def _f(x_): return (int if round_down else round)(x_/mult)*mult
    res = L(x).map(_f)
    return res if is_listy(x) else res[0]

In [None]:
test_eq(round_multiple(63,32), 64)
test_eq(round_multiple(50,32), 64)
test_eq(round_multiple(40,32), 32)
test_eq(round_multiple( 0,32),  0)
test_eq(round_multiple(63,32, round_down=True), 32)
test_eq(round_multiple((63,40),32), (64,32))

In [None]:
#export
def even_mults(start, stop, n):
    "Build log-stepped array from `start` to `stop` in `n` steps."
    if n==1: return stop
    mult = stop/start
    step = mult**(1/(n-1))
    return [start*(step**i) for i in range(n)]

In [None]:
test_eq(even_mults(2,8,3), [2,4,8])
test_eq(even_mults(2,32,5), [2,4,8,16,32])
test_eq(even_mults(2,8,1), 8)

In [None]:
#export
def num_cpus():
    "Get number of cpus"
    try:                   return len(os.sched_getaffinity(0))
    except AttributeError: return os.cpu_count()

defaults.cpus = num_cpus()

In [None]:
num_cpus()

8

In [None]:
#export
def add_props(f, g=None, n=2):
    "Create properties passing each of `range(n)` to f"
    if g is None: return (property(partial(f,i)) for i in range(n))
    return (property(partial(f,i), partial(g,i)) for i in range(n))

In [None]:
class _T(): a,b = add_props(lambda i,x:i*2)

t = _T()
test_eq(t.a,0)
test_eq(t.b,2)

In [None]:
class _T(): 
    def __init__(self, v): self.v=v
    def _set(i, self, v): self.v[i] = v
    a,b = add_props(lambda i,x: x.v[i], _set)

t = _T([0,2])
test_eq(t.a,0)
test_eq(t.b,2)
t.a = t.a+1
t.b = 3
test_eq(t.a,1)
test_eq(t.b,3)

In [None]:
# export
from contextlib import ExitStack

In [None]:
#export
class ContextManagers(GetAttr):
    "Wrapper for `contextlib.ExitStack` which enters a collection of context managers"
    def __init__(self, mgrs): self.default,self.stack = L(mgrs),ExitStack()
    def __enter__(self): self.default.map(self.stack.enter_context)
    def __exit__(self, *args, **kwargs): self.stack.__exit__(*args, **kwargs)

In [None]:
show_doc(ContextManagers, title_level=4)

<h4 id="ContextManagers" class="doc_header"><code>class</code> <code>ContextManagers</code><a href="" class="source_link" style="float:right">[source]</a></h4>

> <code>ContextManagers</code>(**`mgrs`**) :: [`GetAttr`](/foundation.html#GetAttr)

Wrapper for `contextlib.ExitStack` which enters a collection of context managers

In [None]:
#export
def _typeerr(arg, val, typ): return TypeError(f"{arg}=={val} not {typ}")

In [None]:
#export
def typed(f):
    "Decorator to check param and return types at runtime"
    names = f.__code__.co_varnames
    anno = f.__annotations__
    ret = anno.pop('return',None)
    def _f(*args,**kwargs):
        kw = {**kwargs}
        if len(anno) > 0:
            for i,arg in enumerate(args): kw[names[i]] = arg
            for k,v in kw.items():
                if not isinstance(v,anno[k]): raise _typeerr(k, v, anno[k])
        res = f(*args,**kwargs)
        if ret is not None and not isinstance(res,ret): raise _typeerr("return", res, ret)
        return res
    return functools.update_wrapper(_f, f)

`typed` validates argument types at **runtime**.  This is in contrast to [MyPy](http://mypy-lang.org/) which only offers static type checking.

For example, a `TypeError` will be raised if we try to pass an integer into the first argument of the below function: 

In [None]:
@typed
def discount(price:int, pct:float): 
    return (1-pct) * price

with ExceptionExpected(TypeError): discount(100.0, .1)

We can also optionally allow multiple types by enumarating the types in a tuple as illustrated below:

In [None]:
def discount(price:(int,float), pct:float): 
    return (1-pct) * price

assert 90.0 == discount(100.0, .1)

In [None]:
@typed
def foo(a:int, b:str='a'): return a
test_eq(foo(1, '2'), 1)

with ExceptionExpected(TypeError): foo(1,2)

@typed
def foo()->str: return 1
with ExceptionExpected(TypeError): foo()

@typed
def foo()->str: return '1'
assert foo()

In [None]:
#export
def str2bool(s):
    "Case-insensitive convert string `s` too a bool (`y`,`yes`,`t`,`true`,`on`,`1`->`True`)"
    if not isinstance(s,str): return bool(s)
    return bool(distutils.util.strtobool(s)) if s else False

In [None]:
for o in "y YES t True on 1".split(): assert str2bool(o)
for o in "n no FALSE off 0".split(): assert not str2bool(o)
for o in 0,None,'',False: assert not str2bool(o)
for o in 1,True: assert str2bool(o)

## Multiprocessing

In [None]:
#export
from multiprocessing import Process, Queue
import concurrent.futures
import time
from multiprocessing import Manager

In [None]:
#export
def set_num_threads(nt):
    "Get numpy (and others) to use `nt` threads"
    try: import mkl; mkl.set_num_threads(nt)
    except: pass
    try: import torch; torch.set_num_threads(nt)
    except: pass
    os.environ['IPC_ENABLE']='1'
    for o in ['OPENBLAS_NUM_THREADS','NUMEXPR_NUM_THREADS','OMP_NUM_THREADS','MKL_NUM_THREADS']:
        os.environ[o] = str(nt)

This sets the number of threads consistently for many tools, by:

1. Set the following environment variables equal to `nt`: `OPENBLAS_NUM_THREADS`,`NUMEXPR_NUM_THREADS`,`OMP_NUM_THREADS`,`MKL_NUM_THREADS`
2. Sets `nt` threads for numpy and pytorch.

In [None]:
#export
def _call(lock, pause, n, g, item):
    l = False
    if pause:
        try:
            l = lock.acquire(timeout=pause*(n+2))
            time.sleep(pause)
        finally:
            if l: lock.release()
    return g(item)

In [None]:
#export
class ProcessPoolExecutor(concurrent.futures.ProcessPoolExecutor):
    "Same as Python's ProcessPoolExecutor, except can pass `max_workers==0` for serial execution"
    def __init__(self, max_workers=defaults.cpus, on_exc=print, pause=0, **kwargs):
        if max_workers is None: max_workers=defaults.cpus
        store_attr()
        self.not_parallel = max_workers==0
        if self.not_parallel: max_workers=1
        super().__init__(max_workers, **kwargs)

    def map(self, f, items, timeout=None, chunksize=1, *args, **kwargs):
        self.lock = Manager().Lock()
        g = partial(f, *args, **kwargs)
        if self.not_parallel: return map(g, items)
        _g = partial(_call, self.lock, self.pause, self.max_workers, g)
        try: return super().map(_g, items, timeout=timeout, chunksize=chunksize)
        except Exception as e: self.on_exc(e)

In [None]:
#export
class ThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor):
    "Same as Python's ThreadPoolExecutor, except can pass `max_workers==0` for serial execution"
    def __init__(self, max_workers=defaults.cpus, on_exc=print, pause=0, **kwargs):
        if max_workers is None: max_workers=defaults.cpus
        store_attr()
        self.not_parallel = max_workers==0
        if self.not_parallel: max_workers=1
        super().__init__(max_workers, **kwargs)

    def map(self, f, items, timeout=None, chunksize=1, *args, **kwargs):
        self.lock = Manager().Lock()
        g = partial(f, *args, **kwargs)
        if self.not_parallel: return map(g, items)
        _g = partial(_call, self.lock, self.pause, self.max_workers, g)
        try: return super().map(_g, items, timeout=timeout, chunksize=chunksize)
        except Exception as e: self.on_exc(e)

In [None]:
show_doc(ProcessPoolExecutor, title_level=4)

<h4 id="ProcessPoolExecutor" class="doc_header"><code>class</code> <code>ProcessPoolExecutor</code><a href="" class="source_link" style="float:right">[source]</a></h4>

> <code>ProcessPoolExecutor</code>(**`max_workers`**=*`4`*, **`on_exc`**=*`print`*, **`pause`**=*`0`*, **\*\*`kwargs`**) :: [`ProcessPoolExecutor`](/utils.html#ProcessPoolExecutor)

Same as Python's ProcessPoolExecutor, except can pass `max_workers==0` for serial execution

In [None]:
#export
try: from fastprogress import progress_bar
except: progress_bar = None

In [None]:
#export 
def parallel(f, items, *args, n_workers=defaults.cpus, total=None, progress=None, pause=0,
             threadpool=False, timeout=None, chunksize=1, **kwargs):
    "Applies `func` in parallel to `items`, using `n_workers`"
    if progress is None: progress = progress_bar is not None
    pool = ThreadPoolExecutor if threadpool else ProcessPoolExecutor
    with pool(n_workers, pause=pause) as ex:
        r = ex.map(f,items, *args, timeout=timeout, chunksize=chunksize, **kwargs)
        if progress:
            if total is None: total = len(items)
            r = progress_bar(r, total=total, leave=False)
        return L(r)

In [None]:
def add_one(x, a=1): 
    time.sleep(random.random()/80)
    return x+a

inp,exp = range(50),range(1,51)
test_eq(parallel(add_one, inp, n_workers=2, progress=False), exp)
test_eq(parallel(add_one, inp, threadpool=True, n_workers=2, progress=False), exp)
test_eq(parallel(add_one, inp, n_workers=0), exp)
test_eq(parallel(add_one, inp, n_workers=1, a=2), range(2,52))
test_eq(parallel(add_one, inp, n_workers=0, a=2), range(2,52))

Use the `pause` parameter to ensure a pause of `pause` seconds between processes starting. This is in case there are race conditions in starting some process, or to stagger the time each process starts, for example when making many requests to a webserver. Set `threadpool=True` to use `ThreadPoolExecutor` instead of `ProcessPoolExecutor`.

In [None]:
from datetime import datetime

In [None]:
def print_time(i): 
    time.sleep(random.random()/1000)
    print(i, datetime.now())

parallel(print_time, range(5), n_workers=2, pause=0.25);

1 2020-10-12 21:48:54.012317
0 2020-10-12 21:48:54.265181
2 2020-10-12 21:48:54.519284
3 2020-10-12 21:48:54.773004
4 2020-10-12 21:48:55.026344


Note that `f` should accept a collection of items.

In [None]:
#export
def run_procs(f, f_done, args):
    "Call `f` for each item in `args` in parallel, yielding `f_done`"
    processes = L(args).map(Process, args=arg0, target=f)
    for o in processes: o.start()
    yield from f_done()
    processes.map(Self.join())

In [None]:
#export
def _f_pg(obj, queue, batch, start_idx):
    for i,b in enumerate(obj(batch)): queue.put((start_idx+i,b))

def _done_pg(queue, items): return (queue.get() for _ in items)

In [None]:
#export 
def parallel_gen(cls, items, n_workers=defaults.cpus, **kwargs):
    "Instantiate `cls` in `n_workers` procs & call each on a subset of `items` in parallel."
    if n_workers==0:
        yield from enumerate(list(cls(**kwargs)(items)))
        return
    batches = L(chunked(items, n_chunks=n_workers))
    idx = L(itertools.accumulate(0 + batches.map(len)))
    queue = Queue()
    if progress_bar: items = progress_bar(items, leave=False)
    f=partial(_f_pg, cls(**kwargs), queue)
    done=partial(_done_pg, queue, items)
    yield from run_procs(f, done, L(batches,idx).zip())

In [None]:
class _C:
    def __call__(self, o): return ((i+1) for i in o)

items = range(5)

res = L(parallel_gen(_C, items, n_workers=3))
idxs,dat1 = zip(*res.sorted(itemgetter(0)))
test_eq(dat1, range(1,6))

res = L(parallel_gen(_C, items, n_workers=0))
idxs,dat2 = zip(*res.sorted(itemgetter(0)))
test_eq(dat2, dat1)

`cls` is any class with `__call__`. It will be passed `args` and `kwargs` when initialized. Note that `n_workers` instances of `cls` are created, one in each process. `items` are then split in `n_workers` batches and one is sent to each `cls`. The function then returns a generator of tuples of item indices and results.

In [None]:
class TestSleepyBatchFunc:
    "For testing parallel processes that run at different speeds"
    def __init__(self): self.a=1
    def __call__(self, batch):
        for k in batch:
            time.sleep(random.random()/4)
            yield k+self.a

x = np.linspace(0,0.99,20)
res = L(parallel_gen(TestSleepyBatchFunc, x, n_workers=2))
test_eq(res.sorted().itemgot(1), x+1)

In [None]:
#export
def threaded(f):
    "Run `f` in a thread, and returns the thread"
    @wraps(f)
    def _f(*args, **kwargs):
        res = Thread(target=f, args=args, kwargs=kwargs)
        res.start()
        return res
    return _f

## Notebook functions

In [None]:
show_doc(ipython_shell)

<h4 id="ipython_shell" class="doc_header"><code>ipython_shell</code><a href="https://github.com/fastai/fastcore/tree/master/fastcore/imports.py#L66" class="source_link" style="float:right">[source]</a></h4>

> <code>ipython_shell</code>()

Same as `get_ipython` but returns `False` if not in IPython

In [None]:
show_doc(in_ipython)

<h4 id="in_ipython" class="doc_header"><code>in_ipython</code><a href="https://github.com/fastai/fastcore/tree/master/fastcore/imports.py#L71" class="source_link" style="float:right">[source]</a></h4>

> <code>in_ipython</code>()

Check if code is running in some kind of IPython environment

In [None]:
show_doc(in_colab)

<h4 id="in_colab" class="doc_header"><code>in_colab</code><a href="https://github.com/fastai/fastcore/tree/master/fastcore/imports.py#L75" class="source_link" style="float:right">[source]</a></h4>

> <code>in_colab</code>()

Check if the code is running in Google Colaboratory

In [None]:
show_doc(in_jupyter)

<h4 id="in_jupyter" class="doc_header"><code>in_jupyter</code><a href="https://github.com/fastai/fastcore/tree/master/fastcore/imports.py#L79" class="source_link" style="float:right">[source]</a></h4>

> <code>in_jupyter</code>()

Check if the code is running in a jupyter notebook

In [None]:
show_doc(in_notebook)

<h4 id="in_notebook" class="doc_header"><code>in_notebook</code><a href="https://github.com/fastai/fastcore/tree/master/fastcore/imports.py#L84" class="source_link" style="float:right">[source]</a></h4>

> <code>in_notebook</code>()

Check if the code is running in a jupyter notebook

These variables are availabe as booleans in `fastcore.utils` as `IN_IPYTHON`, `IN_JUPYTER`, `IN_COLAB` and `IN_NOTEBOOK`.

In [None]:
IN_IPYTHON, IN_JUPYTER, IN_COLAB, IN_NOTEBOOK

(True, True, False, True)

# Export -

In [None]:
#hide
from nbdev.export import notebook2script
notebook2script()

Converted 00_test.ipynb.
Converted 01_foundation.ipynb.
Converted 02_utils.ipynb.
Converted 03_dispatch.ipynb.
Converted 04_transform.ipynb.
Converted 05_logargs.ipynb.
Converted 06_meta.ipynb.
Converted 07_script.ipynb.
Converted index.ipynb.
