# The `collections` module
* the __`collections`__ module contains a bunch of useful types which are derived from (read: inherited from) some of the built-in types we're already familiar with

In [1]:
# Python 3.6 dicts retain insertion order by default
# see https://mail.python.org/pipermail/python-dev/2016-September/146327.html
d = {}
d['one'] = 3
d['two'] = 6
d['three'] = 0
print(d)

{'one': 3, 'two': 6, 'three': 0}


# The `collections` module: Default Dictionaries

## Default Dictionaries
* suppose we need a default value for any key which does not exist in the dictionary
 * we can use the __`get()`__ function, or __`setdefault()`__ (or the __`in`__ operator), or we can use a `Default Dictionary`

In [2]:
# what we did before...

def count_letters(word):
    '''Returns a dict of letters and how many times the letter
    appeared in the word passed in'''
    count = {}
    for ltr in word:
        #count[ltr] = count.setdefault(ltr, 0) + 1
        count[ltr] = count.get(ltr, 0) + 1
    return count

count_letters('antidisestablishmentarianism')

{'a': 4,
 'n': 3,
 't': 3,
 'i': 5,
 'd': 1,
 's': 4,
 'e': 2,
 'b': 1,
 'l': 1,
 'h': 1,
 'm': 2,
 'r': 1}

In [3]:
from collections import defaultdict

def count_letters(word):
    '''Returns a dict of letters and how many times the letter
    appeared in the string passed in.'''
    # When creating a defaultdict,
    # the passed argument dictates what the
    # default value will be (int = 0, str = "", list = [])
    count = defaultdict(int)
    for ltr in word:
        count[ltr] += 1
    return count

count_letters('one two three four two three three')

defaultdict(int,
            {'o': 4,
             'n': 1,
             'e': 7,
             ' ': 6,
             't': 5,
             'w': 2,
             'h': 3,
             'r': 4,
             'f': 1,
             'u': 1})

## Lab: Default Dictionaries
* read from a file where each line is a word followed by a count, e.g.,
<pre>
    apple 2
    pear 3
    cherry 5
    apple 3
    pear 6
    apple 1
</pre>
(as shown above, words may be duplicated)
* generate a __`defaultdict`__ where the keys are the words and the value are a _list_ of all the counts for that word, e.g.,
<pre>
defaultdict(&lt;class 'list'>, {'apple': ['2', '3', '1'], 'pear': ['3', '6'], 'cherry': ['5']})
</pre>

## Now, for more fun, let's implement a default dictionary without using the __`collections`__ module
* In other words, make your own class (e.g., MyDefaultDict)
* What class or classes should it inherit from?
* You will need to create the method `__getitem__(self, key)__` which is what Python uses under the hood to retrieve an item from a dictionary
 * if the key in question is not currenty in the dict, what should you return?

# The `collections` module: Deque

# Deque
* double ended queue
* pronounced "deck"

In [4]:
from collections import deque
dq = deque(range(10), maxlen=10) # maxlen is optional
print(dq)

deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10)


In [5]:
dq.rotate(3) # +n takes items from right, prepends to left, vice versa for -n
print(dq)

deque([7, 8, 9, 0, 1, 2, 3, 4, 5, 6], maxlen=10)


In [6]:
dq.rotate(-4)
print(dq)

deque([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], maxlen=10)


In [7]:
dq.appendleft('a') # appending to full deque discards item(s) from other end
print(dq)

deque(['a', 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10)


In [8]:
dq.extend(['bcd', 6.6, 9.8])
print(dq)

deque([3, 4, 5, 6, 7, 8, 9, 'bcd', 6.6, 9.8], maxlen=10)


In [9]:
dq.extendleft((-1, -2, -3))
print(dq)

deque([-3, -2, -1, 3, 4, 5, 6, 7, 8, 9], maxlen=10)


In [10]:
dq.pop() # same as list

9

In [11]:
dq.maxlen =7

AttributeError: attribute 'maxlen' of 'collections.deque' objects is not writable

In [12]:
dq.popleft() # specific to deque, as is rotate()

-3

In [13]:
print(dq)
dq.remove(3) # same as list
print(dq)

deque([-2, -1, 3, 4, 5, 6, 7, 8], maxlen=10)
deque([-2, -1, 4, 5, 6, 7, 8], maxlen=10)


In [14]:
dq.reverse()
print(dq)

deque([8, 7, 6, 5, 4, -1, -2], maxlen=10)


In [None]:
dq.append(0)
dq

In [None]:
dq.extend(['bcd', 6.6, 9.8])
dq

# Lab: Deque
* use a deque to print the last *n* lines of file, much like __`tail`__ in Linux
* remember that you can iterate through a file a line at a time

In [None]:

    
    
    poem = 'poem.txt'
    def tail(filename, numlines=3):
        with open(filename) as fi:

            return lines

print(lines)


# The `collections` module: Named Tuples


## Named Tuples
* tuples are quite handy, but they are missing a key feature when using them as records–sometimes we want to name the fields
 * more efficient (i.e., less memory) than dictionaries because instances don't need to contain the keys themselves, as dictionaries do, just the values

## Advanced Programming concept: Metaprogramming 
* Metaprogramming is creating a class that create new classes of objects. It is one of those programming concepts that is not used frequently, but can be very powerful. 
* __`namedtuple()`__ returns not an individual object but a new class, customized for the given names

In [None]:
from collections import namedtuple
Point = namedtuple('Point', 'x y')
# first argument is the name of the tuple class itself
# second argument is attribute names as an iterable of strings or a
# single space/comma-delimited string
point1 = Point(1, 3)
print(point1, type(point1))

In [None]:
point2 = Point(-3, -2)
print(point2)
print(point1[0], point2[1]) # what we would do if just a tuple

In [None]:
print(point1.x, point1.y) # much nicer, because fields are named

In [None]:
from collections import namedtuple
City = namedtuple('City', 'name country population coordinates')
tokyo = City('Tokyo', 'JP', 36.933, (35.689722, 139.691667))
print(tokyo)

In [None]:
print(tokyo.population) # Prefer to use attribute or field names
print(tokyo.coordinates)
print(tokyo[1]) # use indexing if I wish

In [None]:
type(City), type(tokyo)

In [None]:
for field in City._fields: # tuple containing field names
    print(field)

In [None]:
LatLong = namedtuple('LatLong', 'lat long')
delhi_data = ('Delhi NCR', 'IN', 21.935,
              LatLong(28.613889, 77.2098889)) # tuple

In [None]:
delhi = City._make(delhi_data)
delhi

In [None]:
delhi2 = City(*delhi_data)
delhi2

In [None]:
delhi == delhi2

In [None]:
d = delhi._asdict() # returns an OrderedDict built from named tuple
print(d)

# Lab: Named Tuples
1. Create a named tuple called __`Card`__ (representing a playing card) which has two fields, __`rank`__ and __`suit`__
2. Create a list of __`Card`__s, which, when initialized, contains all 52 cards in a deck
3. In other words, the list (or deck) should contain  

`[Card(rank=2, suit='clubs'), Card(rank=3, suit='clubs'), Card(rank=4, suit='clubs'), ..., Card(rank='Q', suit='spades'), Card(rank='K', suit='spades'), Card(rank='A', suit='spades')] `

# The __`collections`__ module: Counters

## Counters
* __`dict`__ subclass for counting things
* unordered collection where things being counted are `dict` keys and the counts are `dict` values
* __`Counters`__ can have negative values

In [15]:
from collections import Counter
c = Counter()
c

Counter()

In [16]:
c = Counter('antidisestablishmentarianism')
c

Counter({'a': 4,
         'n': 3,
         't': 3,
         'i': 5,
         'd': 1,
         's': 4,
         'e': 2,
         'b': 1,
         'l': 1,
         'h': 1,
         'm': 2,
         'r': 1})

In [17]:
c.update('establish' * 10)
c

Counter({'a': 14,
         'n': 3,
         't': 13,
         'i': 15,
         'd': 1,
         's': 24,
         'e': 12,
         'b': 11,
         'l': 11,
         'h': 11,
         'm': 2,
         'r': 1})

In [18]:
c = Counter({'red': 5, 'blue': -1})
c

Counter({'red': 5, 'blue': -1})

In [19]:
c = Counter(foo=1, bar=2)
c

Counter({'foo': 1, 'bar': 2})

In [20]:
c = Counter(red=6, blue=5, green=3, pink=1,
            yellow=-3)
c.elements() # returns an iterator

<itertools.chain at 0x10fd56f10>

In [21]:
for thing in c.elements(): # cf. list(...)
    print(thing, end=' ')

# What about yellow?

red red red red red red blue blue blue blue blue green green green pink 

In [22]:
c.most_common(3) # returns the n most common elements

[('red', 6), ('blue', 5), ('green', 3)]

In [23]:
d = Counter(fuschia=3, pink=0, red=3, blue=5, green=2)
c.subtract(d) # preserves negative values
c

Counter({'red': 3,
         'blue': 0,
         'green': 1,
         'pink': 1,
         'yellow': -3,
         'fuschia': -3})

In [24]:
c.items() # remember that under the hood, this is a dict

dict_items([('red', 3), ('blue', 0), ('green', 1), ('pink', 1), ('yellow', -3), ('fuschia', -3)])

In [25]:
+c # generates new Counter, discarding 0s or negatives

Counter({'red': 3, 'green': 1, 'pink': 1})

In [26]:
c = +c
c

Counter({'red': 3, 'green': 1, 'pink': 1})

In [28]:
c = Counter(red=6, blue=-5, green=3, pink=1, yellow=-3)
c = -c # discard positives and multiply remaining negatives by -1
c

Counter()

In [None]:
d = Counter(red=6, yellow=7, green=9)
c.update(d)
c

In [None]:
c = Counter(a=3, b=1, c=4)
d = Counter(a=1, b=2, c=5)
c + d

In [None]:
c - d

In [None]:
print(c, d, sep='\n')

## Lab: Counters
* Use a __`Counter`__ to count the words in a file
* That is, read in a file, separate it into words, and use a __`Counter`__ to count the number of occurrences of each word in the file.
* Print out the 10 most common words in the file

# The __`dataclasses`__ module: 

A newer alternative to Named Tuples is the dataclass (new in Python 3.7).  It works quite similarly, but it has typing built in and is pricipally applied through decorators.  We haven't yet covered these topics so we will revisit the dataclass later.  