# Collections Module

In [2]:
# counter
from collections import Counter

In [3]:
l = [1,1,2,1,2,3,4,5,3,2,3,4,2,1,3,4]

Counter(l)

Counter({1: 4, 2: 4, 3: 4, 4: 3, 5: 1})

In [4]:
s = 'ahshshdjkdkjsjajhsjsjdhsjksjajhshsj'

Counter(s)

Counter({'a': 3, 'd': 3, 'h': 7, 'j': 10, 'k': 3, 's': 9})

In [6]:
w = 'How many times does each word show up in a sentence? Word word word word.'
words = w.split()

Counter(words)

Counter({'How': 1,
         'Word': 1,
         'a': 1,
         'does': 1,
         'each': 1,
         'in': 1,
         'many': 1,
         'sentence?': 1,
         'show': 1,
         'times': 1,
         'up': 1,
         'word': 3,
         'word.': 1})

In [7]:
c = Counter(words)

# returns top n most common words
c.most_common(2)

[('word', 3), ('How', 1)]

## Common patterns when using counter() object

In [None]:
sum(c.values())              # total  of all counts
c.clear()                    # reset all counts
list(c)                      # list unique elements
set(c)                       # convert to a set
dict(c)                      # convert to a regular dictionary
c.items                      # convert to a list of (elem, cnt) pairs
c.most_common()[:n-1:-1]     # n least common elements
c += Counter()               # remove zero and negative counts

# defaultdict

`defaultdict` is a dictionary like object which provides all methods provided by dictionary but takes a first argument (default_factory) as a default data type for the dictionary. Using defaultdict is faster than doing the same thing with `dict.set_defualt method`

**A defaultdict will never raise a KeyError. Any key that does not exist gets assigned the value returned by the default factory**

In [8]:
from collections import defaultdict

In [9]:
d = {'k1':1}

In [10]:
d['k1']

1

In [11]:
d['k2']

KeyError: 'k2'

In [12]:
d = defaultdict(object)

In [13]:
d['one']

<object at 0x10cb557f0>

In [14]:
for item in d:
    print(item)

one


In [15]:
d = defaultdict(lambda: 0)

In [16]:
d['one']

0

In [17]:
d['two']

0

In [18]:
d

defaultdict(<function __main__.<lambda>>, {'one': 0, 'two': 0})

## OrderedDict
`OrderedDict` ensures that the order in which values were assigned to the dictionary remains intact.

In [25]:
d = {}

d['d'] = 4
d['a'] = 1
d['e'] = 5
d['b'] = 2
d['c'] = 3

In [26]:
d

{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}

In [27]:
for k,v in d.items():
    print(k,v)

d 4
a 1
e 5
b 2
c 3


In [30]:
from collections import OrderedDict

In [34]:
d = OrderedDict()

d['d'] = 4
d['a'] = 1
d['e'] = 5
d['b'] = 2
d['c'] = 3

In [35]:
d

OrderedDict([('d', 4), ('a', 1), ('e', 5), ('b', 2), ('c', 3)])

In [32]:
for k,v in d.items():
    print(k,v)

d 4
a 1
e 5
b 2
c 3


## namedtuple

In [36]:
t = (1,2,3)

In [37]:
t[0]

1

In [38]:
from collections import namedtuple

In [39]:
Dog = namedtuple('Dog','age breed name')

In [40]:
sam = Dog(age=2,breed='Lab',name='Sammy')

In [41]:
sam.age

2

In [42]:
sam.breed

'Lab'

In [43]:
sam[0]

2

In [44]:
Cat = namedtuple('Cat','fur claws name')

In [45]:
c = Cat(fur='fuzzy',claws=False,name='Ron')

In [46]:
c.name

'Ron'