# Introduction to Python Collections

## Counter

Dict subclass for counting hashable objects.

In [1]:
from collections import Counter

In [2]:
cnt = Counter()
words = 'hello people how are you people world hello again people hello hello'


In [3]:
for w in words.split():
    cnt[w] += 1


In [7]:
cnt

Counter({'again': 1,
         'are': 1,
         'hello': 4,
         'how': 1,
         'people': 3,
         'world': 1,
         'you': 1})

In [8]:
print dir(cnt)

['__add__', '__and__', '__class__', '__cmp__', '__contains__', '__delattr__', '__delitem__', '__dict__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__len__', '__lt__', '__missing__', '__module__', '__ne__', '__new__', '__or__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__weakref__', 'clear', 'copy', 'elements', 'fromkeys', 'get', 'has_key', 'items', 'iteritems', 'iterkeys', 'itervalues', 'keys', 'most_common', 'pop', 'popitem', 'setdefault', 'subtract', 'update', 'values', 'viewitems', 'viewkeys', 'viewvalues']


In [9]:
Counter('hellopeople')

Counter({'e': 3, 'h': 1, 'l': 3, 'o': 2, 'p': 2})

In [10]:
cnt

Counter({'again': 1,
         'are': 1,
         'hello': 4,
         'how': 1,
         'people': 3,
         'world': 1,
         'you': 1})

In [12]:
cnt['again'] = 0
cnt

Counter({'again': 0,
         'are': 1,
         'hello': 4,
         'how': 1,
         'people': 3,
         'world': 1,
         'you': 1})

In [15]:
del cnt['again']
cnt

Counter({'are': 1, 'hello': 4, 'how': 1, 'people': 3, 'world': 1, 'you': 1})

In [17]:
list(cnt.elements())

['people',
 'people',
 'people',
 'how',
 'are',
 'world',
 'you',
 'hello',
 'hello',
 'hello',
 'hello']

In [20]:
print cnt.most_common(1)
print cnt.most_common()

[('hello', 4)]
[('hello', 4), ('people', 3), ('how', 1), ('are', 1), ('world', 1), ('you', 1)]


## deque

Deques are a generalization of stacks and queues (the name is pronounced “deck” and is short for “double-ended queue”). Deques support thread-safe, memory efficient appends and pops from either side of the deque with approximately the same O(1) performance in either direction.



In [27]:
from collections import deque

In [30]:
d = deque('hello')

In [31]:
for elem in d:
    print elem

h
e
l
l
o


In [32]:
d.append('!')

In [33]:
d

deque(['h', 'e', 'l', 'l', 'o', '!'])

In [34]:
d.pop()

'!'

In [35]:
d.popleft()

'h'

In [36]:
d.appendleft('H')

In [37]:
d

deque(['H', 'e', 'l', 'l', 'o'])

In [38]:
list(d)

['H', 'e', 'l', 'l', 'o']

In [39]:
d.count('l')

2

In [40]:
d.clear()
d

deque([])

In [41]:
d.extend('Hello')

In [42]:
d

deque(['H', 'e', 'l', 'l', 'o'])

In [43]:
d.extend(' Friend!')

In [44]:
d

deque(['H', 'e', 'l', 'l', 'o', ' ', 'F', 'r', 'i', 'e', 'n', 'd', '!'])

In [45]:
d.remove('l')
print d

deque(['H', 'e', 'l', 'o', ' ', 'F', 'r', 'i', 'e', 'n', 'd', '!'])


In [46]:
d.reverse()

In [47]:
d

deque(['!', 'd', 'n', 'e', 'i', 'r', 'F', ' ', 'o', 'l', 'e', 'H'])

In [49]:
d.rotate(5)

In [50]:
d

deque([' ', 'o', 'l', 'e', 'H', '!', 'd', 'n', 'e', 'i', 'r', 'F'])

In [67]:
d = deque([1,2,3,4], 5)


In [68]:
d.maxlen

5

In [69]:
print d
d.append(10)
print d

deque([1, 2, 3, 4], maxlen=5)
deque([1, 2, 3, 4, 10], maxlen=5)


In [70]:
print d
d.append(100)
print d

deque([1, 2, 3, 4, 10], maxlen=5)
deque([2, 3, 4, 10, 100], maxlen=5)


## Default Dicts


In [71]:
from collections import defaultdict
s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]

In [72]:
d = defaultdict(list)

In [73]:
d

defaultdict(list, {})

In [74]:
for k,v in s:
    d[k].append(v)

In [75]:
d.items()

[('blue', [2, 4]), ('red', [1]), ('yellow', [1, 3])]

In [81]:
s = 'HelloFriendHowAreyou'
d = defaultdict(int)
for letter in s:
    d[letter.lower()] += 1
sorted(d.items(), key=lambda x: x[1], reverse=True)

[('e', 3),
 ('o', 3),
 ('h', 2),
 ('l', 2),
 ('r', 2),
 ('a', 1),
 ('d', 1),
 ('f', 1),
 ('i', 1),
 ('n', 1),
 ('u', 1),
 ('w', 1),
 ('y', 1)]

## NamedTuple

Named tuples assign meaning to each position in a tuple and allow for more readable, self-documenting code. They can be used wherever regular tuples are used, and they add the ability to access fields by name instead of position index.

In [82]:
from collections import namedtuple

In [83]:
Point = namedtuple('Point', ['x', 'y'], verbose=True)

class Point(tuple):
    'Point(x, y)'

    __slots__ = ()

    _fields = ('x', 'y')

    def __new__(_cls, x, y):
        'Create new instance of Point(x, y)'
        return _tuple.__new__(_cls, (x, y))

    @classmethod
    def _make(cls, iterable, new=tuple.__new__, len=len):
        'Make a new Point object from a sequence or iterable'
        result = new(cls, iterable)
        if len(result) != 2:
            raise TypeError('Expected 2 arguments, got %d' % len(result))
        return result

    def __repr__(self):
        'Return a nicely formatted representation string'
        return 'Point(x=%r, y=%r)' % self

    def _asdict(self):
        'Return a new OrderedDict which maps field names to their values'
        return OrderedDict(zip(self._fields, self))

    def _replace(_self, **kwds):
        'Return a new Point object replacing specified fields with new values'
        result = _self._make(map(kwds.pop, ('x', 'y'), _self))
        if kwds:
            raise ValueError('

In [84]:
p = Point(x=1, y=2)

In [85]:
p

Point(x=1, y=2)

In [86]:
p.x

1

In [87]:
p.y

2

In [88]:
p[0] + p[1]

3

In [89]:
x,y = p

In [90]:
print x,y

1 2


In [91]:
p._asdict()

OrderedDict([('x', 1), ('y', 2)])

## Ordered Dict

Ordered dictionaries are just like regular dictionaries but they remember the order that items were inserted. When iterating over an ordered dictionary, the items are returned in the order their keys were first added.

In [92]:
from collections import OrderedDict

In [93]:
d = {'banana': 3, 'apple': 4, 'pear': 1, 'orange': 2}

In [94]:
# Ordered by Key
OrderedDict(sorted(d.items(), key=lambda t: t[0]))

OrderedDict([('apple', 4), ('banana', 3), ('orange', 2), ('pear', 1)])

In [95]:
# Ordered by Value
OrderedDict(sorted(d.items(), key=lambda t: t[1]))

OrderedDict([('pear', 1), ('orange', 2), ('banana', 3), ('apple', 4)])

In [96]:
# Ordered by Value reverse
OrderedDict(sorted(d.items(), key=lambda t: t[1], reverse=True))

OrderedDict([('apple', 4), ('banana', 3), ('orange', 2), ('pear', 1)])