In [3]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [53]:
from bz2 import BZ2File
import json
import os
import pickle

from htools import InvalidArgumentError, debug_call, load

In [109]:
def validating_descriptor(func, allow_del=False):
    """Descriptor that performs some user-specified validation when setting 
    values. Attributes can be read as usual (i.e. no __get__ method) because 
    we put the value in the instance dictionary. Retrieval is faster this way.
    
    Parameters
    ----------
    func: function
        Function or lambda that accepts a single parameter. This will be used
        when attempting to set a value for the managed attribute.
    allow_del: bool
        If True, allow the attribute to be deleted.
    """
    def descriptor(name):
        @method.setter
        def method(instance, val):
            if lambda_(val):
                instance.__dict__[name] = val
            else:
                raise ValueError(f'Invalid value {val} for argument {name}.')
        if allow_del:
            @method.deleter
            def method(instance):
                del instance.__dict__[name]
        return method
    return descriptor

In [7]:
d = dict(a=1, b=2)
'a={a}, b={b}'.format_map(d)

'a=1, b=2'

In [8]:
ls ../data

d.pkl          lines100.txt   model_e1.pth   [34mnew[m[m/
d.zip          lines5000.txt  model_e2.pth


In [107]:
def _read_write_args(path, mode):
    """Helper for `save` and `load` functions.
    
    Parameters
    ----------
    path: str
        Path to read/write object from/to.
    mode: str
        'w' for writing files (as in `save`), 'r' for reading files 
        (as in `load`).
    
    Returns
    -------
    tuple: Function to open file, mode to open file with (str), object to open
        file with.
    """
    ext = path.rpartition('.')[-1]
    if ext not in {'json', 'pkl', 'zip'}:
        raise InvalidArgumentError(
            'Invalid extension. Make sure your filename ends with .json, '
            '.pkl, or .zip.'
        )
        
    # Store in dict to make it easier to add additional formats in future.
    ext2data = {'pkl': (open, 'b', pickle), 
                'zip': (BZ2File, '', pickle), 
                'json': (open, '', json)}
    opener, mode_suffix, saver = ext2data[ext]
    return opener, mode + mode_suffix, saver

In [77]:
@debug_call
def save(obj, path, verbose=True):
    """Wrapper to quickly save a pickled object.

    Parameters
    -----------
    obj: any
        Object to pickle.
    path: str
        File name to save pickled object to. Should end with .pkl, .zip, or 
        .json depending on desired output format. If .zip is used, object will
        be zipped and then pickled.
    verbose: bool
        If True, print a message confirming that the data was pickled, along
        with its path.

    Returns
    -------
    None
    """
    os.makedirs(os.path.dirname(path), exist_ok=True)
    opener, mode, saver = _read_write_args(path, 'w')
    with opener(path, mode) as f:
        saver.dump(obj, f)
    if verbose: print(f'Data written to {path}.')

In [105]:
@debug_call
def load_v2(path, verbose=True):
    """Wrapper to load a pickled object.
    
    Parameters
    ----------
    path : str
        File to load. File type will be inferred from extension.
    verbose : bool, optional
        If True, will print message stating where object was loaded from.
    
    Returns
    -------
    object: The Python object that was pickled to the specified file.
    """
    opener, mode, saver = _read_write_args(path, 'r')
    with opener(path, mode) as f:
        data = saver.load(f)
    if verbose: print(f'Object loaded from {path}.')
    return data

In [84]:
load_v2(os.path.join('..', 'data', 'd.pkl'))
d

load_v2(path='../data/d.pkl', verbose=True)
Object loaded from ../data/d.pkl.


{'a': 3, 'c': 5, 'e': 7}

In [89]:
d2 = d.copy()
d2['z'] = 222
save(d2, os.path.join('..', 'data', 'd2.pkl'))

save(obj={'a': 3, 'c': 5, 'e': 7, 'z': 222}, path='../data/d2.pkl', verbose=True)
Data written to ../data/d2.pkl.


In [90]:
ls ../data

d.pkl          d2.pkl         lines100.txt   model_e1.pth   [34mnew[m[m/
d.zip          d4.json        lines5000.txt  model_e2.pth


In [92]:
d2 = load_v2(os.path.join('..', 'data', 'd2.pkl'))
d2

load_v2(path='../data/d2.pkl', verbose=True)
Object loaded from ../data/d2.pkl.


{'a': 3, 'c': 5, 'e': 7, 'z': 222}

In [94]:
d3 = d2.copy()
d3['m'] = 999
d3

{'a': 3, 'c': 5, 'e': 7, 'z': 222, 'm': 999}

In [95]:
save(d3, os.path.join('..', 'data', 'new', 'd3.zip'), False)

save(obj={'a': 3, 'c': 5, 'e': 7, 'z': 222, 'm': 999}, path='../data/new/d3.zip', verbose=False)


In [96]:
!ls ../data/new

d.zip  d3.zip


In [98]:
d3 = load_v2(os.path.join('..', 'data', 'new', 'd3.zip'), False)
d3

load_v2(path='../data/new/d3.zip', verbose=False)


{'a': 3, 'c': 5, 'e': 7, 'z': 222, 'm': 999}

In [65]:
d4 = d3.copy()
d4['4'] = 4444
d4

{'a': 3, 'c': 5, 'e': 7, 'z': 222, 'm': 999, '4': 4444}

In [101]:
save(d4, os.path.join('..', 'data', 'd4.json'))

save(obj={'a': 3, 'c': 5, 'e': 7, 'z': 222, 'm': 999, '4': 4444}, path='../data/d4.json', verbose=True)
Data written to ../data/d4.json.


In [103]:
d4_ = load_v2(os.path.join('..', 'data', 'd4.json'))
d4_

load_v2(path='../data/d4.json', verbose=True)
Object loaded from ../data/d4.json.


{'a': 3, 'c': 5, 'e': 7, 'z': 222, 'm': 999, '4': 4444}

In [104]:
d4 == d4_

True

## Timing gencomps vs. listcomps

In [113]:
from htools import magics

In [114]:
def process_listcomps(n):
    arr = [i for i in range(n)]
    arr = [str(x) for x in arr]
    arr = [x*2 for x in arr]
    arr = [x.upper() for x in arr]
    arr = [x[:1] for x in arr]
    return arr

In [115]:
def process_gencomps(n):
    arr = (i for i in range(n))
    arr = (str(x) for x in arr)
    arr = (x*2 for x in arr)
    arr = (x.upper() for x in arr)
    arr = (x[:1] for x in arr)
    return list(arr)

In [119]:
%%race -n 2 -r 2
_ = process_listcomps(5_000_000)
_ = process_gencomps(5_000_000)

3.17 s ± 312 µs per loop (mean ± std. dev. of 2 runs, 2 loops each)
3.27 s ± 277 ms per loop (mean ± std. dev. of 2 runs, 2 loops each)
