### DefaultDict

#### Missing Keys

Recall with a standard dictionary that is empty...

In [1]:
d = {}
d['a']

KeyError: 'a'

We can use the get method to handle default values for non-existent keys

In [2]:
d.get('a', 100)

100

We get 100 if 'a' is not present, but 'a' is still not in dictionary

In [3]:
d

{}

If we want to set that key though, we would have to do something like this

In [4]:
d['a'] = d.get('a', 0) + 1

In [5]:
d['a']

1

This is the general pattern for counters

In [None]:
d.get(key, value) # value could be returned from calling a callable

So this certainly works, but you have to remember to always use the same default in multiple places for the dict

It is easier to define the default once (per dict)

That is exactly what defaultdict is for!

#### defaultdict

It is in the collections module, and is a subclass of dict type (defaultdict instance IS-A dict instance)
- So it has all the functionality of a standard dict

We create a default dict like this:

In [None]:
defaultdict(callable, [...]) 

Where the callable is called to calculate a default

The remaining arguments are simply passed to the dict constructor
- the callable MUST have zero arguments!
- if not specified, the default is None

This is referred to as a factory method

#### Other Factory Functions

Often we want to initialize values to 0, an empty string, an empty list, etc

The default behaviour of many callables can initialize values to zero!

In [7]:
int()

0

So if we want a defaultdict to do the same, we can do the following:

In [9]:
from collections import defaultdict

In [12]:
a = defaultdict(lambda: 0)
a['One']

0

But you can also call it using int!

In [13]:
a = defaultdict(int)

In [14]:
a['two']

0

List() returns an empty list and can be used as well!

In [16]:
a = defaultdict(lambda: [])
a['Three']

[]

In [17]:
a = defaultdict(list)
a['Four']

[]

So a factory must simply be a callable that can take zero arguments and returns the desired default value

It can even be a function that calls a database and returns some value

The factory is invoked every time a default value is needed

The function does not have to be deterministic

#### Code Examples

In [18]:
from collections import defaultdict

In [19]:
d = {}

In [20]:
d['a']

KeyError: 'a'

In [21]:
result = d.get('a')

In [22]:
type(result)

NoneType

In [23]:
result = d.get('a', 100)

In [24]:
result

100

In [25]:
d

{}

In [26]:
counts = {}
sentence ='able was I ere I saw elba'

In [27]:
for c in sentence:
    if c in counts:
        counts[c] += 1
    else:
        counts[c] = 1

In [28]:
counts

{'a': 4, 'b': 2, 'l': 2, 'e': 4, ' ': 6, 'w': 2, 's': 2, 'I': 2, 'r': 1}

In [29]:
counts = {}
for c in sentence:
    counts[c] = counts.get(c, 0) + 1

In [30]:
counts

{'a': 4, 'b': 2, 'l': 2, 'e': 4, ' ': 6, 'w': 2, 's': 2, 'I': 2, 'r': 1}

In [31]:
counts = defaultdict(lambda: 0)

In [32]:
for c in sentence:
    counts[c] += 1

In [33]:
counts

defaultdict(<function __main__.<lambda>()>,
            {'a': 4,
             'b': 2,
             'l': 2,
             'e': 4,
             ' ': 6,
             'w': 2,
             's': 2,
             'I': 2,
             'r': 1})

In [34]:
isinstance(counts, defaultdict)

True

In [35]:
isinstance(counts, dict)

True

In [36]:
counts.items()

dict_items([('a', 4), ('b', 2), ('l', 2), ('e', 4), (' ', 6), ('w', 2), ('s', 2), ('I', 2), ('r', 1)])

In [37]:
counts.keys()

dict_keys(['a', 'b', 'l', 'e', ' ', 'w', 's', 'I', 'r'])

In [38]:
counts.values()

dict_values([4, 2, 2, 4, 6, 2, 2, 2, 1])

In [39]:
counts['a']

4

In [40]:
counts['z']

0

In [41]:
'z' in counts

True

In [42]:
counts['hello'] = 'world'

In [43]:
counts

defaultdict(<function __main__.<lambda>()>,
            {'a': 4,
             'b': 2,
             'l': 2,
             'e': 4,
             ' ': 6,
             'w': 2,
             's': 2,
             'I': 2,
             'r': 1,
             'z': 0,
             'hello': 'world'})

In [44]:
del counts['hello']

In [45]:
counts

defaultdict(<function __main__.<lambda>()>,
            {'a': 4,
             'b': 2,
             'l': 2,
             'e': 4,
             ' ': 6,
             'w': 2,
             's': 2,
             'I': 2,
             'r': 1,
             'z': 0})

In [46]:
c = defaultdict(int)

In [47]:
int()

0

In [48]:
c = defaultdict(lambda: 0)

In [49]:
bool()

False

In [50]:
str()

''

In [51]:
list()

[]

In [52]:
persons = {
    'john': {'age': 20, 'eye_color': 'blue'},
    'jack': {'age': 25, 'eye_color': 'brown'},
    'jill': {'age': 22, 'eye_color': 'blue'},
    'eric': {'age': 35},
    'michael': {'age': 27}
    }

In [54]:
eye_colors = {}
for  person, details in persons.items():
    if 'eye_color' in details:
        color = details['eye_color']
    else:
        color = 'unknown'
    if color in eye_colors:
        eye_colors[color].append(person)
    else:
        eye_colors[color] = [person]

In [55]:
eye_colors

{'blue': ['john', 'jill'], 'brown': ['jack'], 'unknown': ['eric', 'michael']}

In [57]:
eye_colors = {}
for person, details in persons.items():
    color = details.get('eye_color', 'unknown')
    person_list = eye_colors.get(color, [])
    person_list.append(person)
    eye_colors[color] = person_list

In [58]:
eye_colors

{'blue': ['john', 'jill'], 'brown': ['jack'], 'unknown': ['eric', 'michael']}

In [59]:
eye_colors = defaultdict(list)
for person, details in persons.items():
    color = details.get('eye_color', 'unknown')
    eye_colors[color].append(person)

In [60]:
eye_colors

defaultdict(list,
            {'blue': ['john', 'jill'],
             'brown': ['jack'],
             'unknown': ['eric', 'michael']})

In [61]:
d = dict(a=10, b=20)

In [62]:
d

{'a': 10, 'b': 20}

In [63]:
d = defaultdict(lambda: '', k1=100, k2=200)

In [64]:
d

defaultdict(<function __main__.<lambda>()>, {'k1': 100, 'k2': 200})

In [65]:
persons = {
    'john': defaultdict(lambda: 'unknown', age=20, eye_color='blue'),
    'jack': defaultdict(lambda: 'unknown', age=25, eye_color='brown'),
    'jill': defaultdict(lambda: 'unknown', age=22, eye_color='blue'),
    'eric': defaultdict(lambda: 'unknown', age=35),
    'michael': defaultdict(lambda: 'unknown', age=27)
    }

In [66]:
eye_colors = defaultdict(list)
for person, details in persons.items():
    eye_colors[details['eye_color']].append(person)

In [67]:
eye_colors

defaultdict(list,
            {'blue': ['john', 'jill'],
             'brown': ['jack'],
             'unknown': ['eric', 'michael']})

In [68]:
from functools import partial

In [71]:
eyedict = partial(defaultdict, lambda: 'unknown')

In [72]:
persons = {
    'john': eyedict(age=20, eye_color='blue'),
    'jack': eyedict(age=25, eye_color='brown'),
    'jill': eyedict(age=22, eye_color='blue'),
    'eric': eyedict(age=35),
    'michael': eyedict(age=27)
    }

In [73]:
eye_colors = defaultdict(list)
for person, details in persons.items():
    eye_colors[details['eye_color']].append(person)

In [75]:
eye_colors

defaultdict(list,
            {'blue': ['john', 'jill'],
             'brown': ['jack'],
             'unknown': ['eric', 'michael']})

In [70]:
eyedict = lambda * args, **kwargs: defaultdict(lambda: 'unknown', *args, **kwargs)

In [76]:
persons = {
    'john': eyedict(age=20, eye_color='blue'),
    'jack': eyedict(age=25, eye_color='brown'),
    'jill': eyedict(age=22, eye_color='blue'),
    'eric': eyedict(age=35),
    'michael': eyedict(age=27)
    }

In [77]:
eye_colors = defaultdict(list)
for person, details in persons.items():
    eye_colors[details['eye_color']].append(person)

In [78]:
eye_colors

defaultdict(list,
            {'blue': ['john', 'jill'],
             'brown': ['jack'],
             'unknown': ['eric', 'michael']})

In [87]:
from collections import defaultdict, namedtuple
from datetime import datetime
from functools import wraps

def function_stats():
    d = defaultdict(lambda: {'count': 0, 'first_called': datetime.utcnow()})
    Stats = namedtuple('Stats', 'decorator data')
    
    def decorator(fn):
        @wraps(fn)
        def wrapper(*args, **kwargs):
            d[fn.__name__]['count'] += 1
            return fn(*args, **kwargs)
        return wrapper
    
    return Stats(decorator, d)

In [88]:
stats = function_stats()

In [89]:
stats.data

defaultdict(<function __main__.function_stats.<locals>.<lambda>()>, {})

In [90]:
stats.decorator

<function __main__.function_stats.<locals>.decorator(fn)>

In [91]:
@stats.decorator
def func_1():
    pass

@stats.decorator
def func_2(x, y):
    pass

In [92]:
stats.data

defaultdict(<function __main__.function_stats.<locals>.<lambda>()>, {})

In [93]:
func_1()

In [94]:
stats.data

defaultdict(<function __main__.function_stats.<locals>.<lambda>()>,
            {'func_1': {'count': 1,
              'first_called': datetime.datetime(2020, 11, 18, 23, 12, 50, 927718)}})

In [95]:
func_1()

In [96]:
stats.data

defaultdict(<function __main__.function_stats.<locals>.<lambda>()>,
            {'func_1': {'count': 2,
              'first_called': datetime.datetime(2020, 11, 18, 23, 12, 50, 927718)}})

In [97]:
func_2(10, 20)

In [98]:
stats.data

defaultdict(<function __main__.function_stats.<locals>.<lambda>()>,
            {'func_1': {'count': 2,
              'first_called': datetime.datetime(2020, 11, 18, 23, 12, 50, 927718)},
             'func_2': {'count': 1,
              'first_called': datetime.datetime(2020, 11, 18, 23, 13, 31, 725289)}})

In [99]:
func_2(20, 30)

In [100]:
stats.data

defaultdict(<function __main__.function_stats.<locals>.<lambda>()>,
            {'func_1': {'count': 2,
              'first_called': datetime.datetime(2020, 11, 18, 23, 12, 50, 927718)},
             'func_2': {'count': 2,
              'first_called': datetime.datetime(2020, 11, 18, 23, 13, 31, 725289)}})