# Balíček collections

https://docs.python.org/3/library/collections.html

In [1]:
import collections
import re
import itertools

## Counter

In [2]:
words = re.findall(r'\w+', open('data/tom.txt').read().lower())

In [3]:
len(words)

77607

In [4]:
collections.Counter(words).most_common(10)

[('the', 3971),
 ('and', 3193),
 ('a', 1955),
 ('to', 1806),
 ('of', 1585),
 ('it', 1332),
 ('he', 1252),
 ('was', 1168),
 ('that', 1041),
 ('i', 1015)]

In [5]:
c = collections.Counter('gallahad')

In [6]:
c

Counter({'g': 1, 'a': 3, 'l': 2, 'h': 1, 'd': 1})

In [7]:
c.elements()

<itertools.chain at 0x7f95e82e3438>

In [8]:
tuple(c.elements())

('g', 'a', 'a', 'a', 'l', 'l', 'h', 'd')

In [9]:
c = collections.Counter(a=3, b=1)

In [10]:
d = collections.Counter(a=1, b=2)

In [11]:
c-d

Counter({'a': 2})

In [12]:
c+d

Counter({'a': 4, 'b': 3})

## DefaultDict

In [13]:
len(words)

77607

In [14]:
def word_frequency_classic(text):
    """
    spocita frekvency slov v textu do slovniku
    :param text:
    :return:
    """
    result = {}
    for znak in text:
        try:
            result[znak] += 1
        except KeyError:
            result[znak] = 1

    return result

In [15]:
freq = word_frequency_classic(words)

In [16]:
keys = sorted(freq, key=freq.get, reverse=True)[:10]

In [17]:
for key in keys:
    print(key, freq[key]) 

the 3971
and 3193
a 1955
to 1806
of 1585
it 1332
he 1252
was 1168
that 1041
i 1015


In [18]:
def word_frequency(text):
    """
    spocita frekvency slov v textu do slovniku
    :param text:
    :return:
    """
    result = collections.defaultdict(int)
    for znak in text:
        result[znak] += 1

    return dict(result)

In [19]:
freq = word_frequency(words)

In [20]:
keys = sorted(freq, key=freq.get, reverse=True)[:10]

In [21]:
for key in keys:
    print(key, freq[key]) 

the 3971
and 3193
a 1955
to 1806
of 1585
it 1332
he 1252
was 1168
that 1041
i 1015


## Deque

[:Deck:] = “double-ended queue” 

In [22]:
d = collections.deque('ghi')

In [23]:
for el in d:
    print(el.upper())

G
H
I


In [24]:
d.append('j')

In [25]:
d.appendleft('k')

In [26]:
d

deque(['k', 'g', 'h', 'i', 'j'])

In [27]:
d.rotate(2)

In [28]:
d

deque(['i', 'j', 'k', 'g', 'h'])

In [29]:
d.rotate(-1)

In [30]:
d

deque(['j', 'k', 'g', 'h', 'i'])

In [31]:
def moving_average(iterable, n=3):
    # moving_average([40, 30, 50, 46, 39, 44]) --> 40.0 42.0 45.0 43.0
    # http://en.wikipedia.org/wiki/Moving_average
    it = iter(iterable)
    d = collections.deque(itertools.islice(it, n-1))
    d.appendleft(0)
    s = sum(d)
    for elem in it:
        s += elem - d.popleft()
        d.append(elem)
        yield s / n

In [32]:
moving_average([40, 30, 50, 46, 39, 44])

<generator object moving_average at 0x7f95e828bbf8>

In [33]:
list(moving_average([40, 30, 50, 46, 39, 44]))

[40.0, 42.0, 45.0, 43.0]

## Ordered Dictionary

In [40]:
d = {'banana': 3, 'apple': 4, 'pear': 1, 'orange': 2}

## sorted by key

In [41]:
collections.OrderedDict(sorted(d.items(), key=lambda t: t[0]))

OrderedDict([('apple', 4), ('banana', 3), ('orange', 2), ('pear', 1)])

## sorted by value

In [42]:
collections.OrderedDict(sorted(d.items(), key=lambda t: t[1]))

OrderedDict([('pear', 1), ('orange', 2), ('banana', 3), ('apple', 4)])

## sorted by key len

In [43]:
collections.OrderedDict(sorted(d.items(), key=lambda t: len(t[0])))

OrderedDict([('pear', 1), ('apple', 4), ('banana', 3), ('orange', 2)])