# Collections

> Tuples and lists, sets and dictionaries

In [None]:
# uncomment this to install nbdev
# !pip install nbdev

from timeit import timeit
from fastcore.test import test_eq
from typing import Any, Callable, Dict, List

In [None]:
def run(test, n, *fs):
    """
    :param test: a testdriver
    :n: number of repetitions
    :fs: the testees
    :return: None
    """
    for f in fs:
        print(f"{f.__qualname__:<12} | {timeit(lambda: test(f), number=n):.4f}")

## Tuples and Lists
Iterables produce Iterators via `iter()` such as all Collections.
Iterator yield the next element via `next()`. By convention, all Iterators
are also Iterables; the standard implementation being to return self.
Some examples:

* `(0, 1, 2, 3)` is a tuple, no iterator
* `range(4)` is a tuple, no iterator
* `naturals()` is an iterator
* `(1 / k for k in naturals())` is an iterator

## Histograms

Problem 01: Write a function `histogram(xs: list) -> dict` which returns a dictionary containing the
number of occurrences of each x in xs.

In [None]:
def test_01(f: Callable[[List[Any]], Dict[Any, int]]) -> None:
    xss = [[], [1], [0, 1, 2, 2, 3, 3, 3], list(range(500)) + list(range(500))]
    for xs in xss:
        hist = f(xs)
        for x in xs:
            test_eq(hist[x], xs.count(x))

Idea: Start with an empty dictionary; iterate over xs and count the occurrence of each x.

In [None]:
#collapse
def histogram1(xs: List[Any]) -> Dict[Any, int]:
    """
    :param xs: a list
    :return: histogram of xs: a dictionary indicating how often each x occurs in xs
    """
    result = {}
    for x in xs:
        result[x] = 1 if x not in result.keys() else result[x] + 1
    return result

Another idea: define the dictionary directly

In [None]:
#collapse
def histogram2(xs: List[Any]) -> Dict[Any, int]:
    """
    :param xs: a list
    :return: histogram of xs: a dictionary indicating how often each x occurs in xs
    """
    return dict([(x, xs.count(x)) for x in xs])

A little tweak: iterate over `set(xs)` rather than `xs`

In [None]:
#collapse
def histogram3(xs: List[Any]) -> Dict[Any, int]:
    """
    :param xs: a list
    :return: histogram of xs: a dictionary indicating how often each x occurs in xs
    """
    return dict([(x, xs.count(x)) for x in set(xs)])

In [None]:
run(test_01, 100, histogram1, histogram2, histogram3)

histogram1   | 1.4567
histogram2   | 2.3046
histogram3   | 1.8564


### Indices

Problem 02: Write a function `index(book: list, keywords: list) -> dict` which returns the index of a book.
The `book` is given as a list of pages, each page being a list of words. `keywords` is the list of words to be indexed.
The result is a dictionary which contains for each keyword the set of pages where it occurs.

In [None]:
def test_02(f: Callable[[List[List[str]], List[str]], Dict[str, List[int]]]) -> None:
    book = [['John', 'Jim', 'Adam'],
            ['John', 'Rupert', 'Adam'],
            ['John', 'Jim', 'Nick', 'Richard']]
    keywords = ['John', 'Richard']

    index = f(book, keywords)
    test_eq(keywords, list(index.keys()))
    for w, ps in index.items():
        for p in ps:
            test_eq(w in book[p], True)

Idea: Start with an empty dictionary; iterate over all pages

In [None]:
#collapse
def index(book: list, keywords: list) -> dict:
    """
    book[i]  = set of all words on page i
    keywords = set of all indexable words
    result[word] = list of pages containing word
    standard solution
    """
    result = {}
    keywords = set(keywords)
    for i, page in enumerate(book):
        for word in set(page) & keywords:
            if word not in result.keys():
                result[word] = []
            else:
                result[word].append(i)
    return result

A variant: replace each page with the intersection of itself and the keywords

In [None]:
#collapse
def index1(book: list, keywords: list) -> dict:
    """
    book[i]  = set of all words on page i
    keywords = set of all indexable words
    result[word] = list of pages containing word
    """
    keywords = set(keywords)
    book = [set(page) & keywords for page in book]
    result = {}
    for i, page in enumerate(book):
        for word in page:
            if word not in result.keys():
                result[word] = []
            else:
                result[word].append(i)
    return result

In [None]:
run(test_02, 1000, index, index1)

index        | 0.0270
index1       | 0.0269


Problem 03: Write a function `merge(xs, ys: list) -> list` which returns the merge of non-descending lists xs and ys.

In [None]:
def test_03(f:Callable[[List[Any], List[Any]], List[Any]]) -> None:
    xs = [9, 11]
    ys = [2, 4, 5]
    test_eq(f(xs, ys), [2, 4, 5, 9, 11])

    for i in range(100):
        xs = i * [1]
        ys = 2 * i * [1]
        m = f(xs, ys)
        test_eq(3 * i * [1], m)

We first consider a non-recursive solution. Start with an empty result.
Let x and y be the next elements of xs and ys and assume x <= y.
Then x is appended to the result and replaced with the next element of xs.
The algorithm stops if at least one of the lists is exhausted.

In [None]:
#collapse
def merge(xs, ys: List[Any]) -> List[Any]:
    """
    :param xs: a non-descending list
    :param ys: a non-descending list
    :return: merge of xs and ys
    Standard solution. A bit tricky.
    """
    result = []
    # invariants:
    # x, y first elements of xs, ys, None if there is no first element
    x = xs.pop(0) if xs else None
    y = ys.pop(0) if ys else None

    while x and y:  # same as x is not None and y is not None
        if x <= y:  # get next element of xs for next loop
            result.append(x)
            x = xs.pop(0) if xs else None
        else:  # get next element of ys for next loop
            result.append(y)
            y = ys.pop(0) if ys else None

    # x or y may be left behind (but not both)
    if x:
        result.append(x)
    if y:
        result.append(y)

    # one of the remaining xs, ys is empty
    # the one which is not (if any) is appended to result
    if xs:  # same as len(xs) > 0
        result += xs
    if ys:
        result += ys

    return result

The recursive solution is simpler and equally fast, but he stack depth equals the combined length of xs and ys.
Idea: Return xs if ys is empty and vice versa. If both lists are not empty, the smaller of xs[0] and ys[0]
is going to be the first element of the merged list to which you append the rest.

In [None]:
#collapse
def merge1(xs, ys: List[Any]) -> List[Any]:
    """
    :param xs: a non-descending list
    :param ys: a non-descending list
    :return: merge of xs and ys
    This the definition of merge, and it does run!
    """
    if not xs:
        return list(ys)
    elif not ys:
        return list(xs)
    elif xs[0] <= ys[0]:
        return xs[:1] + merge1(xs[1:], ys)
    else:
        return ys[:1] + merge1(xs, ys[1:])

In [None]:
run(test_03, 1, merge, merge1)

merge        | 0.0861
merge1       | 0.0848


## Towers of Hanoi

We write a function `hanoi(n: int) -> list` which returns the list of all moves when you do the towers of Hanoi with
n disks on the first pole. The number of moves is exactly 2**n.

Idea:

(1) A natural representation of the Hanoi towers are three stacks, named a, b, c.
The first one, a, is initialized to [n, n-1, ..., 1], b and c are empty.

(2) Let `move(k, x, y, z)` denote the move of k disks from stack x to stack z using y as buffer.

The algorithm is recursive:
`move(1, a, b, c)` is obvious: Just perform the move, let the buffer alone.
`move(2, a, b, c)` requires three moves: `(1, a, c, b), (1, a, b, c), (1, b, a, c)`
The general case `move(n, a, b, c)` can be reduced to three
moves with n-1 instead of n in exactly the same way.

In [None]:
def test_04(f: Callable[[int], List[List[int]]]):
    protocol = f(2)
    expected = [([2, 1], [], []),    # initial state
                ([2], [1], []),      # move disk 1 from a to b
                ([], [1], [2]),      # move disk 2 from a to c
                ([], [], [2, 1])]    # move disk 1 from b to c
    test_eq(expected, protocol)
    n = 20
    test_eq(len(f(n)), 2**n)

In [None]:
#collapse
def hanoi(n: int) -> list:
    """
    Towers of Hanoi. Exponential time (n = 20 -> 1s)
    :param n: number of disks on first tower > 0
    :return: protocol of all moves
    """

    a, b, c = list(range(n, 0, -1)), [], []     # a = [n, n-1, ..., 1]
    protocol = [(list(a), list(b), list(c))]

    def move(k: int, x: list, y: list, z: list) -> None:
        """
        This function moves k disks from x to z using y
        :param k: number of disks to move
        :param x: stack to move from
        :param y: buffer
        :param z: stack to move to
        """
        if k == 1:
            z.append(x.pop())
            protocol.append((list(a), list(b), list(c)))
        else:
            move(k - 1, x, z, y)
            move(1, x, y, z)
            move(k - 1, y, x, z)

    move(n, a, b, c)
    return protocol

In [None]:
run(test_04, 1, hanoi)

hanoi        | 0.5125
