# Item 22: Prefer Helper Classes Over Bookkeeping with Dictionaries and Tuples


In [3]:
class SimpleGradebook(object):
    def __init__(self):
        self._grades = {}
        
    def add_student(self, name):
        self._grades[name] = []
        
    def report_grade(self, name, score):
        self._grades[name].append(score)
        
    def average_grade(self, name):
        grades = self._grades[name]
        return sum(grades) / len(grades)

In [4]:
book = SimpleGradebook()
book.add_student('Isaac Newton')
book.report_grade('Isaac Newton', 90)

print(book.average_grade('Isaac Newton'))

90.0


In [6]:
class BySubjectGradebook(object):
    def __init__(self):
        self._grades = {}
    
    def add_student(self, name):
        self._grades[name] = {}
    
    def report_grade(self, name, subject, grade):
        by_subject = self._grades[name]
        grade_list = by_subject.setdefault(subject, [])
        grade_list.append(grade)
        
    def average_grade(self, name):
        by_subject = self._grades[name]
        total, count = 0, 0
        for grades in by_subject.values():
            total += sum(grades)
            count += len(grades)
        return total/count

In [7]:
book = BySubjectGradebook()
book.add_student('Albert Einstein')
book.report_grade('Albert Einstein', 'Math', 75)
book.report_grade('Albert Einstein', 'Math', 65)
book.report_grade('Albert Einstein', 'Gym', 90)
book.report_grade('Albert Einstein', 'Gym', 95)

In [9]:
print(book.average_grade('Albert Einstein'))

81.25


In [10]:
book._grades

{'Albert Einstein': {'Math': [75, 65], 'Gym': [90, 95]}}

In [12]:
class WeightedGradebook():
    def __init__(self):
        self._grades = {}
    
    def add_student(self, name):
        self._grades[name] = {} 
        
    def average_grade(self, name):
        by_subject = self._grades[name]
        total, count = 0, 0
        for grades in by_subject.values():
            total += sum(grades)
            count += len(grades)
        return total/count
    
    def report_grade(self, name, subject, score, weight):
        by_subject = self._grades[name]
        grade_list = by_subject.setdefault(subject, [])
        grade_list.append((score, weight))
        
    def average_grade(self, name):
        by_subject = self._grades[name]
        score_sum, score_count = 0, 0
        for subject, scores in by_subject.values():
            subject_avg, total_weight = 0.0
            for score, weight in scores:
                
        return score_sum / score_count

In [13]:
book = WeightedGradebook()
book.add_student('Albert Einstein')
book.report_grade('Albert Einstein', 'Math', 80, 0.10)

In [14]:
book._grades

{'Albert Einstein': {'Math': [(80, 0.1)]}}

In [None]:
d

# Item 23: Accept Functions for Simple Interfaces Instead of Classes

1.  Some functions use hooks (pass through functions) because functions are first class citizens in Python

In [38]:
names = ['Socrates', 'Archimedes', 'Plato', 'Aristotle']
names.sort(key=lambda x: len(x))
print(names)

['Plato', 'Socrates', 'Aristotle', 'Archimedes']


In [39]:
names.sort(key=lambda x: len(x), reverse=True)
print(names)

['Archimedes', 'Aristotle', 'Socrates', 'Plato']


* For example, let's say you want to customize the behavior of defaultdict.
* defaultdict allows you to supply a function that is called each time a missing key is accessed. 
* function must return the default value the missing key should have in the dictionary

In [17]:
# the following defines a hook that logs each time a key is missing and returns 0 for the default value

In [1]:
def log_missing():
    print('Key added')
    return 0

In [4]:
from collections import defaultdict
current = {'green': 12, 'blue': 3}
increments = [
    ('red', 5),
    ('blue', 17),
    ('orange', 9)
]
result = defaultdict(log_missing, current)

In [3]:
type(current)

dict

In [5]:
type(result)

collections.defaultdict

In [6]:
print('Before: ', dict(result))
for key, amount in increments:
    result[key] += amount
print('After: ', dict(result))

Before:  {'green': 12, 'blue': 3}
Key added
Key added
After:  {'green': 12, 'blue': 20, 'red': 5, 'orange': 9}


In [7]:
def log_missing():
    print('Key added')
    return 1

In [8]:
from collections import defaultdict
current = {'green': 12, 'blue': 3}
increments = [
    ('red', 5),
    ('blue', 17),
    ('orange', 9)
]
result = defaultdict(log_missing, current)
print('Before: ', dict(result))
for key, amount in increments:
    result[key] += amount
print('After: ', dict(result))

Before:  {'green': 12, 'blue': 3}
Key added
Key added
After:  {'green': 12, 'blue': 20, 'red': 6, 'orange': 10}


Using hooks makes APIs easy to test and build because it separates side effects from deterministic behavior.

Let's say you want the default value hook for defaultdict count the total number of keys that were misssing

In [9]:
def increment_with_report(current, increments):
    added_count = 0
    
    def missing():
        nonlocal added_count
        added_count += 1
        return 0
    
    result = defaultdict(missing, current)
    
    for key, amount in increments:
        result[key] += amount
        
    return result, added_count

In [10]:
current

{'green': 12, 'blue': 3}

In [11]:
increments

[('red', 5), ('blue', 17), ('orange', 9)]

In [12]:
result, count = increment_with_report(current, increments)
print(count)

2


In [30]:
result

defaultdict(<function __main__.increment_with_report.<locals>.missing()>,
            {'green': 12, 'blue': 20, 'red': 5, 'orange': 9})

But defining a closure for stateful hooks are harder to reader than stateless ones. 

Alternative is to define a small class that tracks state

In [14]:
class CountMissing(object):
    # attribute that is a counter (count in above code)
    def __init__(self):
        self.added = 0
        
    def missing(self):
    # function to add to counter if missing data in defaultdict from increment
        self.added += 1
        return 0

In [34]:
counter = CountMissing()
result = defaultdict(counter.missing, current)
for key, amount in increments:
    result[key] += amount
    
print(counter.added)

2


In [35]:
callable(counter)

False

Helper functions like this are clearer than the increment_with_report function above

But what's the purpose of CountMissing? Who constructs CountMissing? Who calls missing method? Will the class need other public methods to be added in the future?

Python allows classes to define \_\_call\_\_

\_\_call\_\_ allows objects to be called like a function

Also sets callable built in function to True

In [26]:
class BetterCountMissing(object): 
    def __init__(self):
        self.added = 0

    def __call__(self): 
        self.added += 1
        return 0

counter = BetterCountMissing()
counter()
assert callable(counter)

In [33]:
callable(counter)

True

In [27]:
type(counter)

__main__.BetterCountMissing

In [28]:
counter()

0

In [29]:
counter = BetterCountMissing()
result = defaultdict(counter, current)
for key, amount in increments:
    result[key] += amount
assert counter.added == 2

In [30]:
counter()

0

In [32]:
counter.added

3

In [23]:
counter

<__main__.BetterCountMissing at 0x10c83adf0>

In [24]:
dir(counter)

['__call__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'added']

In [25]:
counter.added

2

# Item 25. Initialize Parent Classes with super

Summary
* Python's MRO (Method Resolution Order) solves superclass initialization order and diamond inheritance
    * MRO goes depth first, left-right
* Always use super to initialize parent classes

## How to use

In [8]:
class MyBaseClass(object):
    def __init__(self, value):
        self.value = value
        
class MyBaseClass2(object):
    def __init__(self, value):
        self.value = 3
        
class MyChildClass(MyBaseClass, MyBaseClass2):
    def __init__(self):
        MyBaseClass.__init__(self, 5)

In [9]:
b = MyBaseClass(2)

In [10]:
b.value

2

In [11]:
c = MyChildClass()

In [12]:
c.value

5

## Problem 1

### 1. Issues with multi-inheritance order of calls
\_\_init\_\_ call order isn't specified across all subclasses

In [14]:
# Parent classes
class TimesTwo(object):
    def __init__(self):
        self.value *= 2
        
class PlusFive(object):
    def __init__(self):
        self.value += 5

In [15]:
# Children class with multiple inheritance

In [16]:
class OneWay(MyBaseClass, TimesTwo, PlusFive):
    def __init__(self, value):
        MyBaseClass.__init__(self, value)
        TimesTwo.__init__(self)
        PlusFive.__init__(self)

In [17]:
foo = OneWay(5)
print('First ordering is (5 * 2) + 5 =', foo.value)

First ordering is (5 * 2) + 5 = 15


In [18]:
# Children class with multiple inheritance another way
class AnotherWay(MyBaseClass, PlusFive, TimesTwo):
    def __init__(self, value):
        MyBaseClass.__init__(self, value)
        TimesTwo.__init__(self)
        PlusFive.__init__(self)

In [19]:
bar = AnotherWay(5)
print('Second ordering is still (5 * 2) + 5 =', foo.value)

Second ordering is still (5 * 2) + 5 = 15


**Changing the order of inheritance changes the order in which classes are called despite \_\_init\_\_ order**

## 2. Issues with diamond inheritance
* Diamond inheritance = when subclass inherits from two separate classes that have the same superclass somewhere in the hierarchy
* Diamond inheritance causes common superclass's \_\_init\_\_ methood to run multile times causing unexpected behavior

In [21]:
# Two child classes that inherit from MyBaseClass

class TimesFive(MyBaseClass):
    def __init__(self, value):
        MyBaseClass.__init__(self, value)
        self.value *= 5

class PlusTwo(MyBaseClass):
    def __init__(self, value):
        MyBaseClass.__init__(self, value)
        self.value += 2

In [22]:
# Child class that inherits from above two child classes
class ThisWay(TimesFive, PlusTwo):
    def __init__(self, value):
        TimesFive.__init__(self, value)
        PlusTwo.__init__(self, value) # -> calls MyBaseClass.__init__ a second time
                                      # This resets class.value to 5

In [23]:
foo = ThisWay(5)
print("Should be (5 * 5) + 2 = 27, but is", foo.value)

Should be (5 * 5) + 2 = 27, but is 7


* This is because call to second parent class's construct PlusTwo.\_\_init\_\_ causes self.value to be reset back to 5 when MyBaseClass.\_\_init\_\_ is called a second time
* To fix this issue Python 2.2 added super function
    * This defines MRO (Method Resolution Order)
    * depth first, left to right when superclasses are initialied before others

In [37]:
# Solution to fix issue
# Python 2
class TimesFiveCorrect(MyBaseClass):
    def __init__(self, value):
        super(TimesFiveCorrect, self).__init__(value)
        self.value *= 5
        
class PlusTwoCorrect(MyBaseClass):
    def __init__(self, value):
        super(PlusTwoCorrect, self).__init__(value)
        self.value += 2
        
class GoodWay(TimesFiveCorrect, PlusTwoCorrect):
    def __init__(self, value):
        super(GoodWay, self).__init__(value)
        
foo = GoodWay(5)
print("Should be 5 * (5 + 2) = 35 and is", foo.value)

Should be 5 * (5 + 2) = 35 and is 35


This is weird. Shouldn't TimesFiveCorrect.\_\_init\_\_ have run first? Shouldn't it be (5 * 5) + 2 = 27?

No because the ordering matches what the MRO defines. You can check MRO ordering via mro


In [38]:
from pprint import pprint
pprint(GoodWay.mro())

[<class '__main__.GoodWay'>,
 <class '__main__.TimesFiveCorrect'>,
 <class '__main__.PlusTwoCorrect'>,
 <class '__main__.MyBaseClass'>,
 <class 'object'>]


1. GoodWay(5) calls TimesFiveCorrect.\_\_init\_\_
2. TimesFiveCorrect.\_\_init\_\_ calls MyBaseClass.\_\_init\_\_
    a. Once this reaches top of the diamond then the initialization methods do their work in the opposite order from how \_\_init\_\_ was called

### What happens
1. MyBaseClass.\_\_init\_\_ assigns value to 5
2. PlusTwoCorrect.\_\_init\_\_ adds 2 to make value equal to 7
3. TimesFiveCorrect.\_\_init\_\_ multiplies it by 5 to make value equal to 35

super works, but there's two issues with Python2 syntax
* It's verbose. Have to specify which class you're in in self
* Have to specify current class by name in call to super i.e. have to update every call to super

Python3 fixes this by making super with no arguments same as callign super with \_\_class\_\_ and self specified -> always use super

In [39]:
class Explicit(MyBaseClass):
    def __init__(self, value):
        super(__class__, self).__init__(value * 2)

class Implicit(MyBaseClass):
    def __init__(self, value):
        super().__init__(value * 2)
        
assert Explicit(10).value == Implicit(10).value

# Item 26. Use Multiple Inheritance Only for Mix-in Utility Classes

Summary
* Best to avoid multiple inheritance
* Use a mix-in (dynamic inspection) instead. 
    * Mix-ins are small classes that only defines a set of additional methods that a class should provide.
    * Mix-ins don't define their own instance attributes or require \_\_init\_\_ to be called
* Use pluggable behaviors at instance lelvel to provide per class customization
* Compose mix-ins to create complex functionality from simple behaviors

In [78]:
## How to use mix-ins
class ToDictMixin(object):
    def to_dict(self):
        return self._traverse_dict(self.__dict__)

Relies on dynamic attribute access using hasattr, dynamic type inspection with isinstance, and accessing instance dictionary with \_\_dict\_\_

In [41]:
type(d)

dict

In [40]:
d = {}
dir(d)

['__class__',
 '__class_getitem__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__ior__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__ne__',
 '__new__',
 '__or__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__ror__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'clear',
 'copy',
 'fromkeys',
 'get',
 'items',
 'keys',
 'pop',
 'popitem',
 'setdefault',
 'update',
 'values']

In [43]:
class ToDictMixin(object):
    def to_dict(self):
        return self._traverse_dict(self.__dict__)

    def _traverse_dict(self, instance_dict):
        output = {}
        for key, value in instance_dict.items():
            output[key] = self._traverse(key, value)
        return output

    def _traverse(self, key, value):
        if isinstance(value, ToDictMixin):
            return value.to_dict()
        elif isinstance(value, dict):
            return self._traverse_dict(value)
        elif isinstance(value, list):
            return [self._traverse(key, i) for i in value]
        elif hasattr(value, '__dict__'):
            return self._traverse_dict(value.__dict__)
        else:
            return value

class BinaryTree(ToDictMixin):
    def __init__(self, value, left=None, right=None):
        self.value = value
        self.left = left
        self.right = right

tree = BinaryTree(
        10,
        left=BinaryTree(7, right=BinaryTree(9)),
        right=BinaryTree(13, left=BinaryTree(11))
)

pprint(tree.to_dict())

{'left': {'left': None,
          'right': {'left': None, 'right': None, 'value': 9},
          'value': 7},
 'right': {'left': {'left': None, 'right': None, 'value': 11},
           'right': None,
           'value': 13},
 'value': 10}


Translating a lot of Python objects into a dictionary is now easier

In [45]:
class BinaryTreeWithParent(BinaryTree):
    def __init__(self, value, left=None, right=None, parent=None):
        super().__init__(value, left=left, right=right)
        self.parent = parent
    def _traverse(self, key, value):
        if (isinstance(value, BinaryTreeWithParent) and key == 'parent'):
            return value.value
        else:
            return super()._traverse(key, value)

In [46]:
root = BinaryTreeWithParent(10)
root.left = BinaryTreeWithParent(7, parent=root)
root.left.right = BinaryTreeWithParent(9, parent=root.left)
pprint(root.to_dict())

{'left': {'left': None,
          'parent': 10,
          'right': {'left': None, 'parent': 7, 'right': None, 'value': 9},
          'value': 7},
 'parent': None,
 'right': None,
 'value': 10}


By defining BinaryTreeWithParent.\_traverse any class with attribute of type BinaryTreeWithParent will automatically work with ToDictMixin

In [89]:
class NamedSubTree(ToDictMixin):
    def __init__(self, name, tree_with_parent):
        self.name = name
        self.tree_with_parent = tree_with_parent
        
my_tree = NamedSubTree('foobar', root.left.right)
pprint(my_tree.to_dict())

{'name': 'foobar',
 'tree_with_parent': {'left': None, 'parent': 7, 'right': None, 'value': 9}}


### Mix-in can also be composed together

In [50]:
class JsonMixin(object):
    @classmethod
    def from_json(cls, data):
        kwargs = json.loads(data)
        return cls(**kwargs)
    def to_json(self):
        return json.dumps(self.to_dict())

JsonMixin defines both instance methods and class methods
* Mix-ins let you add either behavior
* In this example only requirement of JsonMixin is that class has a to_dict method and its \_\_init\_\_ method takes keyword arguments
* Mix-in makes it simple to create hierarchy of utility classes to be serialized to and from JSON

In [51]:
class DatacenterRack(ToDictMixin, JsonMixin):
    def __init__(self, switch=None, machines=None):
        self.switch = Switch(**switch)
        self.machines = [
            Machine(**kwargs) for kwargs in machines
        ]
        
class Switch(ToDictMixin, JsonMixin):
    def __init__(self, ports, speed):
        self.ports = ports
        self.speed = speed

class Machine(ToDictMixin, JsonMixin):
    def __init__(self, cores, ram, disk):
        self.cores = cores
        self.ram = ram
        self.disk = disk

In [55]:
import json
serialized = """{
       "switch": {"ports": 5, "speed": 1e9},
       "machines": [
           {"cores": 8, "ram": 32e9, "disk": 5e12},
           {"cores": 4, "ram": 16e9, "disk": 1e12},
           {"cores": 2, "ram": 4e9, "disk": 500e9}
] }"""
deserialized = DatacenterRack.from_json(serialized) 
roundtrip = deserialized.to_json()
assert json.loads(serialized) == json.loads(roundtrip)

In [56]:
json.loads(serialized)

{'switch': {'ports': 5, 'speed': 1000000000.0},
 'machines': [{'cores': 8, 'ram': 32000000000.0, 'disk': 5000000000000.0},
  {'cores': 4, 'ram': 16000000000.0, 'disk': 1000000000000.0},
  {'cores': 2, 'ram': 4000000000.0, 'disk': 500000000000.0}]}

In [57]:
json.loads(roundtrip)

{'switch': {'ports': 5, 'speed': 1000000000.0},
 'machines': [{'cores': 8, 'ram': 32000000000.0, 'disk': 5000000000000.0},
  {'cores': 4, 'ram': 16000000000.0, 'disk': 1000000000000.0},
  {'cores': 2, 'ram': 4000000000.0, 'disk': 500000000000.0}]}