Documentation on the Python standard library / built-ins (STL).
`more_itertools` is discussed here.

In [8]:
import enum
import os
import glob
import copy
import numbers
import time
import datetime
import itertools
import functools
import collections
import operator
import json
import logging
import random

import utility as util
import utility.doc

### Collections Abstract Base Classes

The collections module offers ABCs defined [here](https://docs.python.org/3/library/collections.abc.html)

### Operating System

In [2]:
util.doc.results(
    "Get list of files from directory path", os.listdir("dummydirs"),
    "Get list of file paths given wildcard", glob.glob("dummydirs/*.txt"),
)

Get list of files from directory path
['dummyD.txt', 'dummyF.txt', 'dummyB.txt', 'dummyA.txt', 'dummyE.txt', 'dummyC.txt', 'dummyG.txt']

Get list of file paths given wildcard
['dummydirs/dummyD.txt', 'dummydirs/dummyF.txt', 'dummydirs/dummyB.txt', 'dummydirs/dummyA.txt', 'dummydirs/dummyE.txt', 'dummydirs/dummyC.txt', 'dummydirs/dummyG.txt']


### Iteration

Iteration involves iterators and iterable.

What exactly are iterator, iterable, and iteration? [StackOverflow](https://stackoverflow.com/questions/9884132/what-exactly-are-iterator-iterable-and-iteration) user says:

An iterable is an object that has an `__iter__()` method which returns an iterator, or which defines a `__getitem__()` method that can take sequential indexes starting from zero (and raises an IndexError when the indexes are no longer valid).

An iterator is an object with a `__next__()` (Python 3) method.

Is an iterator also an iterable? [StackOverflow](https://stackoverflow.com/questions/46106143/is-an-iterator-also-an-iterable) says yes.

In [8]:
# get next in iterator. Modifies internal state of iterator.
it = iter(range(5))
a1 = next(it)
a2 = next(it)
a3 = next(it)
a1, a2, a3

(0, 1, 2)

In [24]:
# using itertools.count() to generate numbers. This will iterate forever until explicit a break or exception.
for t in itertools.count():
    print(t, end=" ")
    if t >= 5:
        break

0 1 2 3 4 5 

In [10]:
"""Using multiple for loops in 
for i in it1 for j in it2
is equivalent to
for i in it1
    for j in it2
"""
for s, t in [(i, j) for i in range(1, 6) for j in range(2)]:
    print(f"({s}, {t})", end=" ")

(1, 0) (1, 1) (2, 0) (2, 1) (3, 0) (3, 1) (4, 0) (4, 1) (5, 0) (5, 1) 

In [25]:
def is_iterable(something):
    try:
        iter(something)
    except TypeError:
        return False
    else:
        return True

class Itor:
    """Iterator"""
    def __init__(self):
        self.__i = -1
    
    def __next__(self):
        self.__i += 1
        return self.__i

class Itble:
    """Iterable. __iter__() uses yield which makes the function return a generator"""
    def __init__(self, l):
        self.__l = l
    
    def __iter__(self):
        for i in self.__l:
            yield i

class ItorItble:
    """Iterator and iterable, since __iter__() returns itself which can be used via __next__()"""
    def __init__(self):
        self.__i = -1
        
    def __iter__(self):
        self.__i = -1
        return self
    
    def __next__(self):
        self.__i += 1
        if self.__i > 5:
            raise StopIteration
        else:
            return self.__i

it = Itor()
util.doc.results(
    "Using an iterable", list(Itble([1, 2, 3, 4, 5])),
    "Using an iterator", [next(it) for _ in range(6)],
    "Using an iterable-iterator", [i for i in ItorItble()],
compact=True)

Using an iterable [1, 2, 3, 4, 5]
Using an iterator [0, 1, 2, 3, 4, 5]
Using an iterable-iterator [0, 1, 2, 3, 4, 5]


In [2]:
# An iterable can be a sequence in which it implements __getitem__() and __len__(),
# or just be a vanilla iterable and implement __iter__()

class MyIterable(object):
    def __init__(self, l):
        self.__l = l
    
    def __len__(self):
        return self.__l
    
    def __getitem__(self, idx):
        if idx >= len(self):
            raise IndexError
        return idx

class MySequenceIterable(object):
    def __init__(self, l):
        self.__l = l
    
    def __iter__(self):
        for i in range(self.__l):
            yield i

util.doc.results(
    "Iterable implementing __getitem__() and __len__()", [i for i in MyIterable(10)],
    "Iterable implementing __iter__()", [i for i in MySequenceIterable(10)],
compact=True)

Iterable implementing __getitem__() and __len__() [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
Iterable implementing __iter__() [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [27]:
# Check whether something is iterable
# The only(?) reliable way to determine whether an object is iterable is to call iter(obj)

def is_iterable(x):
    try:
        iter(x)
        return True
    except:
        return False

def _is_iterable(x):
    """
    This does not check for classes that are iterable through __getitem__"""
    return isinstance(x, collections.abc.Iterable)

print("Is a list iterable?",  is_iterable([]),      _is_iterable([]))
print("Is a set iterable?",   is_iterable(set()),   _is_iterable(set()))
print("Is a tuple iterable?", is_iterable(tuple()), _is_iterable(tuple()))
print("Is a dict iterable?",  is_iterable(dict()),  _is_iterable(dict()))
print("Is a map object iterable?", is_iterable(map(lambda x: x, [1,2,3])), _is_iterable(map(lambda x: x, [1,2,3])))
print("Is MyIterable iterable?", is_iterable(MyIterable(10)), _is_iterable(MyIterable(10)))
print("Is MySequenceIterable iterable?", is_iterable(MySequenceIterable(10)), _is_iterable(MySequenceIterable(10)))
print("Is a number iterable?", is_iterable(1), _is_iterable(1))

Is a list iterable? True True
Is a set iterable? True True
Is a tuple iterable? True True
Is a dict iterable? True True
Is a map object iterable? True True
Is MyIterable iterable? True False
Is MySequenceIterable iterable? True True
Is a number iterable? False False


In [28]:
def select(l, indices):
    """Select from iterable using an index of a list of indices,
    returning a list with entries in the same order as their
    corresponding indices. Examples:
    """
    if is_iterable(indices):
        try:
            return [l[i] for i in indices]
        except TypeError:
            mask = indices_to_selection_mask(indices, max(indices))
            return itertools.compress(l, mask)
    else:
        i = indices
        try:
            return l[i]
        except TypeError:
            return next(itertools.islice(l, i, None))

m = map(lambda x: 2*x, range(10))
select(m, 1)

2

In [3]:
def divide(n, iterable):
    """Divide the elements from *iterable* into *n* parts, maintaining order.
    Taken from more-itertools with minor modification."""
    if n < 1:
        raise ValueError('n must be at least 1')
    try:
        iterable[:0]
    except TypeError:
        seq = tuple(iterable)
    else:
        seq = iterable

    q, r = divmod(len(seq), n)

    ret = []
    stop = 0
    for i in range(1, n + 1):
        start = stop
        stop += q + 1 if i <= r else q
        ret.append(list(seq[start:stop]))

    return ret

util.doc.results(
    "Spliting range(5) to 1 parts", [l for l in divide(1, range(5))],
    "Spliting range(5) to 2 parts", [list(l) for l in divide(2, range(5))],
    "Spliting range(5) to 4 parts", [list(l) for l in divide(4, range(5))],
    "Spliting range(5) to 5 parts", [list(l) for l in divide(5, range(5))],
    "Spliting range(5) to 7 parts", [list(l) for l in divide(7, range(5))],
)

Spliting range(5) to 1 parts
[[0, 1, 2, 3, 4]]

Spliting range(5) to 2 parts
[[0, 1, 2], [3, 4]]

Spliting range(5) to 4 parts
[[0, 1], [2], [3], [4]]

Spliting range(5) to 5 parts
[[0], [1], [2], [3], [4]]

Spliting range(5) to 7 parts
[[0], [1], [2], [3], [4], [], []]


In [6]:
list(itertools.islice(range(20), 5, 10))

[5, 6, 7, 8, 9]

### Lists

In [19]:
# Flatten list of list

arbitrary_nested_list = [
    [],
    ["a"],
    [
        [1, "b", 2],
        [],
        [3],
    ],
    "c",
    [
        [
            [5]
        ]
    ],
    [[[[]]]],
    [
        6,
        [7, 8, "d"]
    ],
    9
]

def flatten_list(arb, include=(), exclude=()):
    out = []
    if include:
        def f(v):
            if isinstance(v, include):
                out.append(v)
    elif exclude:
        def f(v):
            if not isinstance(v, exclude):
                out.append(v)
    else:
        def f(v):
            out.append(v)
    def _flatten(arb):
        for v in arb:
            if isinstance(v, list):
                _flatten(v)
            else:
                f(v)
    _flatten(arb)
    return out

l = flatten_list(arbitrary_nested_list)
util.doc.results(
    "Flatten arbitrary list", flatten_list(arbitrary_nested_list),
    "Flatten list, including numbers", flatten_list(arbitrary_nested_list, include=(int)),
    "Flatten list, excluding numbers", flatten_list(arbitrary_nested_list, exclude=(int)),
)

Flatten arbitrary list
['a', 1, 'b', 2, 3, 'c', 5, 6, 7, 8, 'd', 9]

Flatten list, including numbers
[1, 2, 3, 5, 6, 7, 8, 9]

Flatten list, excluding numbers
['a', 'b', 'c', 'd']


In [10]:
arbitrary_nested_list = [ # 0
    [ # 1
        [ # 2
            [1, 2]
        ]
    ],
    [ # 1
        [ # 2
            [3, 4, 5, 6],
            [7, 8, 9, 10, 11]
        ]
    ],
    [],
    [ # 1
        []
    ],
]

def select_nested_list_levelindex(arb, level, index):
    if level == 0:
        return arb
    out = []
    def get_to_level(arb, _level):
        try:
            if 0 < _level:
                for v in arb:
                    if isinstance(v, list):
                        get_to_level(v, _level - 1)
            else:
                out.append(arb[index])
        except (TypeError, IndexError):
            pass
    get_to_level(arb, level)
    return out

l = select_nested_list_levelindex(arbitrary_nested_list, 3, 2)
l

[5, 9]

### Functions

In [2]:
f = lambda : print('x')
f()

x


In [2]:
# Currying functions using functools.partial()
def myfunc(a, b, c, d=0):
    print(a, b, c, d)
_myfunc = functools.partial(myfunc, 1, 2, d=4)
_myfunc(3)
_myfunc = functools.partial(myfunc, 1)
_myfunc(2, 3)
_myfunc = functools.partial(myfunc, d=4)
_myfunc(1, 2, 3)

1 2 3 4
1 2 3 0
1 2 3 4


### Numbers

In [13]:
# max(1,2,3), min(1,2,3)

class Clip(object):
    def __init__(self, low=-1, high=1):
        self.__low = low
        self.__high = high
        
    def __call__(self, x):
        return min(max(self.__low, x), self.__high)

Clip(1, 3)(2)

2

In [5]:
# Check whether a value is a number using numbers.Number
util.doc.results(
    "Is 5 a number?", isinstance(5, numbers.Number),
    "Is 5.1 a number?", isinstance(5.1, numbers.Number),
    "Is '5.1' a number?", isinstance('5.1', numbers.Number),
compact=True)

Is 5 a number? True
Is 5.1 a number? True
Is '5.1' a number? False


In [32]:
# producing bits from integer
i = random.randrange(32)
bs = [float(x) for x in list(f"{i:0b}")]
i, f"{i:0b}", bs

(29, '11101', [1.0, 1.0, 1.0, 0.0, 1.0])

In [44]:
bitstr = list(reversed(f"{i:0b}"))

def bitstr_to_floatint(i):
    try:
        return float(bitstr[i])
    except IndexError as e:
        return 0.

list(map(bitstr_to_floatint, range(6)))

[1.0, 0.0, 1.0, 1.0, 1.0, 0.0]

### Dictionary

In [36]:
# deleting keys from a dictionary
d = {"a": 1, "b": 2, "c": 3}
dd = d.copy()

del d["a"]
try:
    del d["d"]
except KeyError as e:
    print("Deleting a key that doesn't exist using del gives", repr(e))

print("Instead use dict.pop()")
d.pop("d", None)
d.pop("b", None)

util.doc.results(
    "Dict before deleting", dd,
    "Dict after deleting", d,
compact=True, start="")

Deleting a key that doesn't exist using del gives KeyError('d')
Instead use dict.pop()

Dict before deleting {'a': 1, 'b': 2, 'c': 3}
Dict after deleting {'c': 3}


In [40]:
# Passing multiple dict as function arguments
d1 = {"a": 1, "b": 2}
d2 = {"c": 3, "d": 4}

def f(a, c, **kwargs):
    print(a, c, kwargs)

f(**d1, **d2)

1 3 {'b': 2, 'd': 4}


In [37]:
# Creating a dict with attribute-like access.
class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.__dict__ = self

params = AttrDict(a='a', b='b')
params.c = 2

util.doc.results(
    "variables of AttrDict are", vars(params),
    "attribute 'a' is", params.a,
    "attribute 'c' is", params.c,
    "attribute 'a' via __getitem__() is", params['a'],
    "the keys of AttrDict", params.keys(),
    "the items of AttrDict", params.items(),
compact=True)

variables of AttrDict are {'a': 'a', 'b': 'b', 'c': 2}
attribute 'a' is a
attribute 'c' is 2
attribute 'a' via __getitem__() is a
the keys of AttrDict dict_keys(['a', 'b', 'c'])
the items of AttrDict dict_items([('a', 'a'), ('b', 'b'), ('c', 2)])


In [38]:
# inheriting AttrDict to gain fixed attributes with attribute-like acces.
class MySettings(AttrDict):
    def __init__(self, a, b, c=1):
        super().__init__(a=a, b=b, c=c)

def mult2(b, **kwargs):
    return 2*b
        
settings = MySettings(1, 2)
util.doc.results(
    "variables of MySettings are", vars(settings),
    "attribute 'a' is", settings.a,
    "attribute 'c' is", settings.c,
    "attribute 'a' via __getitem__() is", settings['a'],
    "the keys of AttrDict", settings.keys(),
    "pass settings to mult2()", mult2(**settings),
compact=True)

variables of MySettings are {'a': 1, 'b': 2, 'c': 1}
attribute 'a' is 1
attribute 'c' is 1
attribute 'a' via __getitem__() is 1
the keys of AttrDict dict_keys(['a', 'b', 'c'])
pass settings to mult2() 4


### Testing and Python

In [3]:
# Test for dictionary equality is done recursively. It is not a reference check.
# See also:
# https://stackoverflow.com/questions/17217225/what-does-the-operator-actually-do-on-a-python-dictionary
c1 = {'a': [1, 2], 'b': 1 } == {'a': [1, 2], 'b': 1 }
c2 = {'a': [1, 2], 'b': 1 } == {'a': [1], 'b': 1 }
c3 = {'a': [1, 2], 'b': 1 } == {'a': [1, 2] }
c4 = {'a': [1, 2], 'b': 1 } == {'a': [1, 2], 'c': 1 }

c1, c2, c3, c4

(True, False, False, False)

In [27]:
# Type checking
a = 1
b = "b"
d = {"b": 1}
f = 1.
util.doc.results(
    "An int is (int or str)", isinstance(a, (int, str)),
    "An int is a number", isinstance(a, numbers.Number),
    "A float is a number", isinstance(f, numbers.Number),
    "A dict is not (int or str)", isinstance(d, (int, str)),
compact=True)

An int is (int or str) True
An int is a number True
A float is a number True
A dict is not (int or str) False


### Classes and Objects

Class members
- public members have no underscore `myobj.a`.
- there are no "protected members", but simulate using one underscore like `myobj._a` or `myobj.a_`.
- private members have two underscores `myobj.__a`.

In [8]:
# assignment of primitives is by value
a = b = c = 1
c = 2
a, b, c

(1, 1, 2)

In [10]:
# assignment of objects is by reference
class A:
    def __init__(self, a):
        self.a = a
    def __repr__(self):
        return f"A(a={self.a})"
a = b = A(1)
print(a, b)
b.a = 2
print(a, b)

A(a=1) A(a=1)
A(a=2) A(a=2)


In [4]:
# Using vars() to access the attributes of an object.
# If a class attribute names are identical to initialization argument names.
# Then it's easy to do a shallow copy an object instance
# This works regardless of if the are positional or keywords, and regardless of argument order.
class A:
    def __init__(self, b, a=None, c=None):
        self.a = a
        self.b = b
        self.c = c
    def __repr__(self):
        return f"A(a={self.a}, b={self.b}, c={self.c})"
        
a = A(2, a=1, c=3)
util.doc.results(
    "instance of A", a,
    "Using vars() to access the attributes of an object", vars(a),
    "Using vars() to instantiate another instance of A", A(**vars(a)),
    "Same as above, but without knowledge of class", type(a)(**vars(a)),
)

instance of A
A(a=1, b=2, c=3)

Using vars() to access the attributes of an object
{'a': 1, 'b': 2, 'c': 3}

Using vars() to instantiate another instance of A
A(a=1, b=2, c=3)

Same as above, but without knowledge of class
A(a=1, b=2, c=3)


In [6]:
# Getting names of classes. Works with primitives
class A:
    pass
a = A()
i = 1
s = 'x'
def classname_1(x):
    return x.__class__.__name__
def classname_2(x):
    return type(x).__name__
classname_1(a), classname_1(i), classname_1(s), classname_2(a), classname_2(i), classname_2(s)

('A', 'int', 'str', 'A', 'int', 'str')

In [7]:
# hasattr() still evaluates to true for a attribute that has been set to None,
# and will evaluate to false when using del
class A:
    def __init__(self):
        pass
a = A()
print( hasattr(a, 'a') )
a.a = 1
print( hasattr(a, 'a') )
a.a = None
print( hasattr(a, 'a') )
del a.a
print( hasattr(a, 'a') )

False
True
True
False


In [17]:
# isinstance() to check that an object's class is equal to or a subclass of a class
class A:
    def _greet(self):
        print("Hi!")
class B(A):
    def greet(self):
        self._greet()
class C: pass
a, b = A(), B()
util.doc.results(
    "a is an instance of A", isinstance(a, A),
    "a is an instance of B", isinstance(a, B),
    "b is an instance of A", isinstance(b, A),
    "b is an instance of B", isinstance(b, B),
    "a is an instance of one of (A,B,C)", isinstance(a, (A,B,C)),
    "b is an instance of one of (A,B,C)", isinstance(b, (A,B,C)),
compact=True)
b.greet()

a is an instance of A True
a is an instance of B False
b is an instance of A True
b is an instance of B True
a is an instance of one of (A,B,C) True
b is an instance of one of (A,B,C) True
Hi!


In [38]:
# overriding functions and using super()
class A:
    def hello(self):
        return "Hola!"
    def goodbye(self):
        return "adiós"
    
        
class B(A):
    def hello(self):
        return super().hello() + " Hello!"
    def goodbye(self):
        return "bye"
a, b = A(), B()
util.doc.results(
    "A function", a.hello(),
    "A function", a.goodbye(),
    "B override with super", b.hello(),
    "B override", b.goodbye(),
)

A function
Hola!

A function
adiós

B override with super
Hola! Hello!

B override
bye


### Named tuples

Using `collections.nametuples`

In [9]:
# accessing named tuples
A = collections.namedtuple('A', ('a', 'b', 'c',))
a = A(1,2,3)

try: a.b = 4 # can't set attributes
except AttributeError as e: print(e)

try: a[1] =4 # can't do item assignment
except TypeError as e: print(e)

util.doc.results(
    "Instance a of amedtuple A is", a,
    "The item in the first index", a[0],
    "The item with field name 'a' using getattr()", getattr(a, 'a'),
    "The item with field name 'a' using access", a.a,
compact=True, start="")

can't set attribute
'A' object does not support item assignment

Instance a of amedtuple A is A(a=1, b=2, c=3)
The item in the first index 1
The item with field name 'a' using getattr() 1
The item with field name 'a' using access 1


In [10]:
# attributes of named tuples
A = collections.namedtuple('A', ('a', 'b', 'c',))
a = A._make([1,2,3])

util.doc.results(
    "Instance a of amedtuple A is", a,
    "Has fields", a._fields,
    "As a dict", a._asdict(),
compact=True)

Instance a of amedtuple A is A(a=1, b=2, c=3)
Has fields ('a', 'b', 'c')
As a dict {'a': 1, 'b': 2, 'c': 3}


In [11]:
# aggregating named tuples
A = collections.namedtuple('A', ('c', 'd',))
a1 = A(1, 2)
a2 = A(3, 4)
a3 = A(5, 6)
# aggregating a tuple
batch = A(*zip(*[a1, a2, a3]))
a1, batch

(A(c=1, d=2), A(c=(1, 3, 5), d=(2, 4, 6)))

### Queues

Remember to use `collections.deque` and not `queue.Queue`.

In [17]:
# append(), extend() and pop()
q = collections.deque([1, 2, 3])
q.append(4)
q.extend([5,6])
l = []
while q:
    i = q.pop()
    l.append(i)

util.doc.results(
    "Results from q.pop()", l,
    "Queue q after popping all items", q,
compact=True)

Results from q.pop() [6, 5, 4, 3, 2, 1]
Queue q after popping all items deque([])


In [18]:
# A queue is an iterable
q = collections.deque()
q.append(1)
q.append(2)
q.append(3)

util.doc.results(
    "Queue q after appending the numbers", q,
    "The sum of the numbers in the queue", sum(q),
compact=True)

Queue q after appending the numbers deque([1, 2, 3])
The sum of the numbers in the queue 6


### Enum

In [6]:
class BasicEnum(enum.Enum):
    TALL = 0
    SHORT = 1
    
    @classmethod
    def generate(cls, l):
        for i in l:
            yield cls(i)

for e in BasicEnum.generate([0,1]):
    print(e)

BasicEnum.TALL
BasicEnum.SHORT


### Time and Date

In [27]:
util.doc.results(
    "Current time string format:", time.strftime('%d %b %Y %H:%M:%S', time.localtime()),
    "                           ", time.strftime("%Y%m%d-%H_%M_%S", time.localtime()),
    "Datetime object:", datetime.datetime.now(),
    "Converte a datetime object to string format:", datetime.datetime.now().strftime("%Y%m%d-%H_%M_%S"),
compact=True)

Current time string format: 29 Jul 2022 11:48:04
                            20220729-11_48_04
Datetime object 2022-07-29 11:48:04.263758
Converte a datetime object to string format: 20220729-11_48_04


### Unsorted

In [4]:
# TODO: isn't there a way to include exception codes?
try:
    raise Exception("Something went wrong.")
except Exception as e:
    print(e)
    print(repr(e))

Something went wrong.
Exception('Something went wrong.')


In [9]:
# Joining and splitting strings
l = ['a', 'b', 'c']
s = '/'.join(l)
s, s.split('/')

('a/b/c', ['a', 'b', 'c'])

In [10]:
# Sorting iterables
l = [1,3,2]
s = {1,3,2}
r = range(3, 0, -1)
sorted(l), sorted(s), sorted(r)

([1, 2, 3], [1, 2, 3], [1, 2, 3])

In [11]:
# Filter items in a dict by inverse mapping
d = {'a': 1, 'b': 1, 'c': 2, 'd': 3, 'e': 3}
{v: k for k, v in d.items()}

{1: 'b', 2: 'c', 3: 'e'}

In [1]:
optimizer/doc.control.2.ipynb# Unzip dict key-value pairs
unzip = lambda ll: list(zip(*ll))
d = {'a': 1, 'b': 1, 'c': 2, 'd': 3, 'e': 3}
unzip(d.items())

[('a', 'b', 'c', 'd', 'e'), (1, 1, 2, 3, 3)]

In [12]:
# Using zip(), and easy way to create dict() object
zip_to_list = lambda *args: list(zip(*args))

zip_to_list('abc', [1, 2, 3])

[('a', 1), ('b', 2), ('c', 3)]

In [13]:
# Using dict
def subdict(d, ks):
    return {k: d[k] for k in ks if k in d}

def map_to_list(f, l):
    return list(map(f, l))

def from_dict_with_remainder(d, ks):
    leftover = set(ks) - set(d.keys())
    ks = set(ks) & set(d.keys())
    return map_to_list(d.get, ks), list(leftover)

d = dict(zip('abcd', range(4)))

d, subdict(d, 'abe'), subdict({}, 'abe'), map_to_list(d.get, 'abe'), from_dict_with_remainder(d, 'cdef')

({'a': 0, 'b': 1, 'c': 2, 'd': 3},
 {'a': 0, 'b': 1},
 {},
 [0, 1, None],
 ([2, 3], ['e', 'f']))

Chaining lists.

What is the difference between chain and chain.from_iterable in itertools? [StackOverflow](https://stackoverflow.com/questions/15004772/what-is-the-difference-between-chain-and-chain-from-iterable-in-itertools)

In [14]:
l = [[1,2,3], [4,5], [6]]

list( itertools.chain(*l) ), list( itertools.chain.from_iterable(l) )

([1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6])

In [15]:
# Using accumulate()
accumulate_to_list = lambda *args, **kwargs: list(itertools.accumulate(*args, **kwargs))
class A:
    def __init__(self, a):
        self.a = a
    def __repr__(self):
        return f"a={self.a}"
v = [A(2), A(3), A(4), A(5)]
add = lambda c,d: A(c.a + d.a)

accumulate_to_list(v, add), accumulate_to_list(v, add, initial=A(10))

([a=2, a=5, a=9, a=14], [a=10, a=12, a=15, a=19, a=24])

In [16]:
# Using tee() to make a list of consecutive lists
# itertools.tee() returns n independent iterators from a single iterable.

def pairwise_to_list(l):
    a, b = itertools.tee(l)
    next(b, None)
    return list(zip(a, b))

pairwise_to_list(range(5)), pairwise_to_list(range(2)), pairwise_to_list(range(1))

([(0, 1), (1, 2), (2, 3), (3, 4)], [(0, 1)], [])

In [17]:
def pairwise_do(f, l):
    a, b = itertools.tee(l)
    next(b, None)
    return [f(i, j) for i, j in zip(a, b)]
class A:
    def __init__(self, a):
        self.a = a
    def __repr__(self):
        return f"a={self.a}"
v = [A(1), A(2), A(3), A(4), A(5)]
add = lambda c,d: A(c.a + d.a)

pairwise_do(add, v)

[a=3, a=5, a=7, a=9]

In [18]:
# Using reduce()
v = ['The', 'quick', 'brown', 'fox', 'jumps', 'over', 'the', 'lazy', 'dog']
functools.reduce(lambda acc, x: acc + ' ' + x, v), functools.reduce(lambda acc, x: acc + ' ' + x, v, 'LOG:')

('The quick brown fox jumps over the lazy dog',
 'LOG: The quick brown fox jumps over the lazy dog')

In [22]:
# Deep copy
# WARNING: copy.deepcopy() is known to be very slow
class A:
    def __init__(self, a):
        self.a = a

# example of deep structure of form
# (list, A, dict, A, int)
v = [A({'a': A(1)})]
u = copy.deepcopy(v)
u[0].a['a'].a = 2
v[0].a['a'].a, u[0].a['a'].a

(1, 2)

In [26]:
# Working with slices
l = [0, 1, 2, 3, 4, 5]
s = slice(1, 4)
l[s], s.start, s.stop, s.step, l[slice(None)]

([1, 2, 3], 1, 4, None, [0, 1, 2, 3, 4, 5])

In [27]:
# getting the indices of the longest sequence in a list

def longest_subsequence(l, cond=None):
    """Get the longest subsequence of the list composed of entries
    where cond is true. For example:
    (lambda l, i: l[i], [1, 0, 1, 1, 1, 0, 1]) -> ((slice(2, 5), 3)
    
    Parameters
    ==========
    l : list or np.array
        The list to get subsequence from. 
    cond : function (l, i) -> bool
        To check whether list entries belong to the subsequence.
        If not passed the check using truthiness. 
    
    Returns
    =======
    slice or None
        The slice object to get the subsequence.
    int
        The length of the subsequence.
    """
    if not cond:
        cond = lambda x: x
    n = len(l)
    longest_size  = 0
    longest_begin = None
    longest_end   = None
    curr_end = 0
    while curr_end < n:
        if cond(l[curr_end]):
            curr_begin = curr_end
            while curr_end < n - 1 and cond(l[curr_end+1]):
                curr_end  += 1
            if (curr_end - curr_begin + 1) > longest_size:
                longest_size = curr_end - curr_begin + 1
                longest_begin = curr_begin
                longest_end = curr_end
            curr_end += 1
        curr_end += 1
    if longest_size == 0:
        return None, longest_size
    else:
        return slice(longest_begin, longest_end+1), longest_size

f = lambda x: x
t1 = [1, 0, 1, 1, 1, 0, 1]
t2 = [0, 1, 0, 1, 1, 1, 1, 1]
t3 = [1, 1, 0, 1, 0]
t4 = [0, 1, 0, 1, 0]
t5 = [0, 0]
t6 = [1]
t7 = []

l = t4
r, length = longest_subsequence(l, f)
def d():
    try:
        return l[r]
    except:
        return None
r, length, d()

(slice(1, 2, None), 1, [1])

In [28]:
def longest_sequence_using_split(l, split):
    """Get the longest subsequence of the list after it has been
    split into segments. For example if split is the function
    lambda l, i: l[i + 1] != l[i] + 1 if i < len(l) - 1 else False
    then
    ([1, 2, 3, 2, 3, 4, 5, 1, 2], split) -> ((slice(3, 7), 4)
    as split gives
    [1, 2, 3 | 2, 3, 4, 5 | 1, 2]
    
    Parameters
    ==========
    l : list or np.array
        The list to get subsequence.
    split : function (l, i) -> bool
        To check whether to split the list
        If not passed the check using truthiness.
    
    Returns
    =======
    slice or None
        The slice object to get the subsequence.
    int
        The length of the subsequence.
    """
    n = len(l)
    longest_size  = 0
    longest_begin = None
    longest_end   = None
    curr_begin = 0
    curr_end   = 0
    while curr_end < n:
        if split(l, curr_end) or curr_end == n - 1:
            if (curr_end - curr_begin + 1)  > longest_size:
                longest_size  = curr_end - curr_begin + 1
                longest_begin = curr_begin
                longest_end   = curr_end
            curr_begin = curr_end + 1
            curr_end   = curr_end + 1
        else:
            curr_end  += 1
    if longest_size == 0:
        return None, longest_size
    else:
        return slice(longest_begin, longest_end+1), longest_size

def longest_consecutive_increasing_subsequence(l):
    """Get the longest consecutively increasing subsequence.
    
    Parameters
    ==========
    list of int
        The list to get subsequence.
    
    Returns
    =======
    slice or None
        The slice object to get the subsequence.
    int
        The length of the subsequence.
    """
    def split(l, i):
        try:
            return l[i + 1] != l[i] + 1
        except:
            return False
    return longest_sequence_using_split(l, split)

t1 = [1, 2, 3, 2, 3, 4, 5, 6, 1, 2]
t2 = [3, 2, 3, 4, 5, 6, 7, 1, 2, 1]
t3 = [1, 3, 2, 3, 2, 3, 4, 5]
t4 = [6, 7, 8, 1, 2]
t5 = [1, 3, 5, 7]
t6 = [1]
t7 = [2, 1]
t8 = []

l = t1
r, length = longest_consecutive_increasing_subsequence(l)
def d():
    try:
        return l[r]
    except:
        return None
r, length, d()

(slice(3, 8, None), 5, [2, 3, 4, 5, 6])

In [29]:
def longest_consecutive_decreasing_subsequence(l):
    """Get the longest consecutively decreasing subsequence.
    
    Parameters
    ==========
    list of int
        The list to get subsequence.
    
    Returns
    =======
    slice or None
        The slice object to get the subsequence.
    int
        The length of the subsequence.
    """
    def split(l, i):
        try:
            return l[i + 1] != l[i] - 1
        except:
            return False
    return longest_sequence_using_split(l, split)

t1 = [3, 2, 1, 6, 5, 4, 3, 2, 2, 1]
t2 = [3, 7, 6, 5, 4, 3, 2, 2, 1, 1]
t3 = [1, 3, 3, 2, 5, 4, 3, 2]
t4 = [8, 7, 6, 2, 1]
t5 = [1, 3, 5, 7]
t6 = [1]
t7 = [2, 1]
t8 = []

l = t8
r, length = longest_consecutive_decreasing_subsequence(l)
def d():
    try:
        return l[r]
    except:
        return None
r, length, d()

(None, 0, None)

In [30]:
# locals() returns function's arguments as dict.
def f(a,b, *args,d=None, **kwargs):
    print( locals() )
f(1,2,3,d=4,e=5)

{'a': 1, 'b': 2, 'd': 4, 'args': (3,), 'kwargs': {'e': 5}}


In [31]:
unzip = lambda ll: list(zip(*ll))

def inner_keys_from_nested_dict(d, layers=2):
    ll = []
    vl = [d]
    for layer in range(layers):
        l = []
        f = lambda x: isinstance(x, dict)
        q = collections.deque(util.filter_to_list(f, vl))
        vl = []
        if not q:
            break
        while q:
            d = q.pop()
            keys, values = unzip(d.items())
            vl.append(values)
            l.append(keys)
        l = util.merge_list_of_list(l)
        vl = util.merge_list_of_list(vl)
        ll.append(l)
    return ll

d = {
    'a': {
        'a1': {
            'a11': 11,
            'a12': 12,
        },
        'a2': {
            'a21': 21,
        },
        'a3': 3,
    },
    'b': {
        'a1': 1,
        'b2': {
            'b21': 21,
            'b22': 22,
            'b23': 23,
        },
    },
    'c': {
        'c1': 1,
    },
}

inner_keys_from_nested_dict(d, layers=3)

[['a', 'b', 'c'],
 ['c1', 'a1', 'b2', 'a1', 'a2', 'a3'],
 ['a21', 'a11', 'a12', 'b21', 'b22', 'b23']]

In [36]:
# Using zip, unzip and zip
zip_to_list = lambda *args: list(zip(*args))
unzip = lambda ll: list(zip(*ll))

zip_to_list(* unzip([('a', 1), ('b', 2), ('c', 3), ('d', 4)]) ), \
        unzip( zip_to_list(('a', 'b', 'c', 'd'), (1, 2, 3, 4)) )

([('a', 1), ('b', 2), ('c', 3), ('d', 4)],
 [('a', 'b', 'c', 'd'), (1, 2, 3, 4)])

In [37]:
def do_on_nested_dict_of_list(f, dl, *args, **kwargs):
    if isinstance(dl, list):
        f(dl, *args, **kwargs)
    elif isinstance(dl, dict):
        for v in dl.values():
            sort_nested_dict_of_list(v, *args, **kwargs)
    else:
        pass

def sort_nested_dict_of_list(dl, **kwargs):
    def f(l, **kwargs):
        l.sort(**kwargs)
    do_on_nested_dict_of_list(f, dl, **kwargs)

dl = {'a': {'b': [3,1,2]}, 'c': [3,4,1,2], 'd': dict()}
sort_nested_dict_of_list(dl,reverse=True)
dl

{'a': {'b': [3, 2, 1]}, 'c': [4, 3, 2, 1], 'd': {}}

In [38]:
def gen_splits(n):
    """Generator of group indices for (train, val, test) set.
    Only yields n-1 of the possible index combinations"""
    v = util.range_to_list(n)
    for idx in range(0, len(v) - 1):
        yield tuple(v[:idx] + v[idx + 2:]), (idx,), (idx + 1,)

for train, val, test in gen_splits(3):
    print(train, val, test)

(2,) (0,) (1,)
(0,) (1,) (2,)


In [39]:
l = zip(range(10),'aabbbccddd')
s_ = filter(lambda x: x[1] == 'b', l)
l = zip(range(10),'aabbbccddd')
s = filter(lambda x: x[1] == 'b', l)
s = itertools.islice(s, 2)
[*s_], [*s]

([(2, 'b'), (3, 'b'), (4, 'b')], [(2, 'b'), (3, 'b')])

In [40]:
# Cartesian product of iterables
p = itertools.product('ABC', 'ab', range(2))
[*p]

[('A', 'a', 0),
 ('A', 'a', 1),
 ('A', 'b', 0),
 ('A', 'b', 1),
 ('B', 'a', 0),
 ('B', 'a', 1),
 ('B', 'b', 0),
 ('B', 'b', 1),
 ('C', 'a', 0),
 ('C', 'a', 1),
 ('C', 'b', 0),
 ('C', 'b', 1)]

In [2]:
d = collections.OrderedDict( zip('abcd', range(4)) )
random.choices(list(d.values()), k=5)

[0, 0, 3, 2, 2]

In [9]:
# collections.OrderedDict are dict that remember insertion order.
d = collections.OrderedDict()
d[3] = 'a'
d[1] = 'b'
d[2] = 'c'
for k, v in d.items():
    print(k, v)
    
import numpy as np
print(np.array(d.keys()))

3 a
1 b
2 c
odict_keys([3, 1, 2])
