# Item 22: Prefer Helper Classes Over Bookkeeping with Dictionaries and Tuples

Instead of a dictionary containing a dictionary:

In [16]:
class WeightedGradebook(object):
    def __init__(self):
        self._grades = {}
        
    def add_student(self, name):
        self._grades[name] = {}
        
    def report_grade(self, name, subject, score, weight):
        by_subject = self._grades[name]
        grade_list = by_subject.setdefault(subject, [])
        grade_list.append((score, weight))
        
    def average_grade(self, name):
        by_subject = self._grades[name]
        score_sum, score_count = 0, 0
        for subject, scores in by_subject.items():
            subject_avg, total_weight = 0, 0
            for score, weight in scores:
                score_sum += score * weight
                score_count += weight
        return score_sum / score_count
    
student = 'Albert Einstein'
book = WeightedGradebook()
book.add_student(student)
book.report_grade(student, 'Math', 75, 1.0)
book.report_grade(student, 'Math', 65, 1.0)
book.report_grade(student, 'Gym', 90, 0.7)
book.report_grade(student, 'Gym', 95, 0.7)

print("Average for %s is %r" % (student, book.average_grade(student)))

Average for Albert Einstein is 79.26470588235294


Use helper classes for Grade, Subject and Student as well as Gradebook

In [13]:
import collections
Grade = collections.namedtuple('Grade', ('score', 'weight'))

class Subject(object):
    def __init__(self):
        self._grades = []

    def report_grade(self, score, weight):
        self._grades.append(Grade(score, weight))

    def average_grade(self):
        total, total_weight = 0, 0
        for grade in self._grades:
            total += grade.score * grade.weight
            total_weight += grade.weight
        return total / total_weight
    
class Student(object):
    def __init__(self):
        self._subjects = {}

    def subject(self, name):
        if name not in self._subjects:
            self._subjects[name] = Subject()
        return self._subjects[name]

    def average_grade(self):
        total, count = 0, 0
        for subject in self._subjects.values():
            total += subject.average_grade()
            count += 1
        return total / count
    
class Gradebook(object):
    def __init__(self):
        self._students = {}

    def student(self, name):
        if name not in self._students:
            self._students[name] = Student()
        return self._students[name]
    
name = 'Albert Einstein'
gradebook = Gradebook()
student = gradebook.student(name)
math = student.subject('Math')
math.report_grade(75, 1.0)
math.report_grade(65, 1.0)
gym = student.subject('Gym')
gym.report_grade(90, 0.7)
gym.report_grade(95, 0.7)
average = gradebook.student(name).average_grade()

print("Average for %s is %r" % (name, average))

Average for Albert Einstein is 81.25


# Item 23: Accept Functions for Simple Interfaces Instead of Classes

Here is an example of a function for default dictionary where you can add a function for supplying a default value when a key is missing.

In [19]:
import collections

def log_missing():
   print('Key added')
   return 0

current = {'green': 12, 'blue': 3}
increments = [
    ('red', 5),
    ('blue', 17),
    ('orange', 9),
]
result = collections.defaultdict(log_missing, current)
print('Before:', dict(result))
for key, amount in increments:
    result[key] += amount
print('After: ', dict(result))

Before: {'blue': 3, 'green': 12}
Key added
Key added
After:  {'orange': 9, 'blue': 20, 'red': 5, 'green': 12}


# Item 24: Use @classmethod Polymorphism to Construct Objects Generically

First we create a set of MapReduce objects for counting the total words in a set of files.

In [20]:
class InputData(object):
    def read(self):
        raise NotImplementedError
        
class PathInputData(InputData):
    def __init__(self, path):
        super().__init__()
        self.path = path

    def read(self):
        return open(self.path).read()
    
class Worker(object):
    def __init__(self, input_data):
        self.input_data = input_data
        self.result = None

    def map(self):
        raise NotImplementedError

    def reduce(self, other):
        raise NotImplementedError
        
class LineCountWorker(Worker):
    def map(self):
        data = self.input_data.read()
        self.result = data.count('\n')

    def reduce(self, other):
        self.result += other.result

To use these classes we can create some helper functions and call them to complete the implementation:

In [21]:
def generate_inputs(data_dir):
    for name in os.listdir(data_dir):
        yield PathInputData(os.path.join(data_dir, name))
        
def create_workers(input_list):
    workers = []
    for input_data in input_list:
        workers.append(LineCountWorker(input_data))
    return workers

def execute(workers):
    threads = [Thread(target=w.map) for w in workers]
    for thread in threads: thread.start()
    for thread in threads: thread.join()

    first, rest = workers[0], workers[1:]
    for worker in rest:
        first.reduce(worker)
    return first.result

def mapreduce(data_dir):
    inputs = generate_inputs(data_dir)
    workers = create_workers(inputs)
    return execute(workers)

The issue is the `mapreduce` function is not generic at all. If you want to write another `InputData` or `Worker` subclass, you would also have to rewrite the `generate_inputs`, `create_workers`, and `mapreduce` functions to match.

Here is generic solution using `@classmethod`

In [22]:
class GenericInputData(object):
    def read(self):
        raise NotImplementedError

    @classmethod
    def generate_inputs(cls, config):
        raise NotImplementedError
        
class PathInputData(GenericInputData):
    # ...
    def read(self):
        return open(self.path).read()

    @classmethod
    def generate_inputs(cls, config):
        data_dir = config['data_dir']
        for name in os.listdir(data_dir):
            yield cls(os.path.join(data_dir, name))

class GenericWorker(object):
    # ...
    def map(self):
        raise NotImplementedError

    def reduce(self, other):
        raise NotImplementedError

    @classmethod
    def create_workers(cls, input_class, config):
        workers = []
        for input_data in input_class.generate_inputs(config):
            workers.append(cls(input_data))
        return workers

Now with a small change to `LineCountWorker`

In [26]:
class LineCountWorker(GenericWorker):
    def map(self):
        data = self.input_data.read()
        self.result = data.count('\n')

    def reduce(self, other):
        self.result += other.result

We can use the new versions of the classes and interfaces.

In [None]:
def mapreduce(worker_class, input_class, config):
    workers = worker_class.create_workers(input_class, config)
    return execute(workers)

# Item 25: Initialize Parent Classes with super

Instead of explicitly calling the base class `__init__` like:

In [27]:
class MyBaseClass(object):
    def __init__(self, value):
        self.value = value

class MyChildClass(MyBaseClass):
    def __init__(self):
        MyBaseClass.__init__(self, 5)

Call `super`

In [None]:
class MyBaseClass1(object):
    def __init__(self, value):
        self.value = value

class MyChildClass1(MyBaseClass1):
    def __init__(self):
        super().__init__(self, 5)

# Item 26: Use Multiple Inheritance Only for Mix-in Utility Classes

A mix-in is a small class that only defines a set of additional methods that a class should provide. Mix-in classes don’t define their own instance attributes nor require their __init__ constructor to be called.

In [44]:
class ToDictMixin(object):
    def to_dict(self):
        return self._traverse_dict(self.__dict__)
    
    def _traverse_dict(self, instance_dict):
        output = {}
        for key, value in instance_dict.items():
            output[key] = self._traverse(key, value)
        return output

    def _traverse(self, key, value):
        if isinstance(value, ToDictMixin):
            return value.to_dict()
        elif isinstance(value, dict):
            return self._traverse_dict(value)
        elif isinstance(value, list):
            return [self._traverse(key, i) for i in value]
        elif hasattr(value, '__dict__'):
            return self._traverse_dict(value.__dict__)
        else:
            return value

The ToDictMixin can be combined with another mixin to provide additional functionality.  In this case we create a JsonMixin that uses the to_dict function of the ToDictMixin to convert an object to/from json.

In [42]:
import json
class JsonMixin(object):
    @classmethod
    def from_json(cls, data):
        kwargs = json.loads(data)
        return cls(**kwargs)

    def to_json(self):
        return json.dumps(self.to_dict())

Here is an example of using the mixins in a class DatacenterRack

In [45]:
class Switch(object):
    def __init__(self, ports, speed):
        self.ports = ports
        self.speed = speed
        
class Machine(object):
    def __init__(self, cores, ram, disk):
        self.cores = cores
        self.ram = ram
        self.disk = disk
        
class DatacenterRack(ToDictMixin, JsonMixin):
    def __init__(self, switch=None, machines=None):
        self.switch = Switch(**switch)
        self.machines = [
            Machine(**kwargs) for kwargs in machines]

switch = dict(ports=5, speed=1e9)
machines = []
machines.append(dict(cores = 4, ram = 32e9, disk = 500e9))
dc = DatacenterRack(switch, machines)
print(dc.to_json())

{"machines": [{"ram": 32000000000.0, "disk": 500000000000.0, "cores": 4}], "switch": {"speed": 1000000000.0, "ports": 5}}


# Item 27: Prefer Public Attributes Over Private Ones

In python there are only two types of attribute visibility for a class's attributes: public and private.

In [None]:
class MyObject(object):
    def __init__(self):
        self.public_field = 5
        self.__private_field = 10

    def get_private_field(self):
        return self.__private_field

# Item 28: Inherit from collections.abc for Custom Container Types

say you want to create your own custom list type that has additional methods for counting the frequency of its members.

In [46]:
class FrequencyList(list):
    def __init__(self, members):
        super().__init__(members)

    def frequency(self):
        counts = {}
        for item in self:
            counts.setdefault(item, 0)
            counts[item] += 1
        return counts

Here you can use all of the builtin list functionality as well as the new function.

In [47]:
foo = FrequencyList(['a', 'b', 'a', 'c', 'b', 'a', 'd'])
print('Length is', len(foo))
foo.pop()
print('After pop:', repr(foo))
print('Frequency:', foo.frequency())

Length is 7
After pop: ['a', 'b', 'a', 'c', 'b', 'a']
Frequency: {'b': 2, 'a': 3, 'c': 1}


The built-in collections.abc module defines a set of abstract base classes that provide all of the typical methods for each container type. When you subclass from these abstract base classes and forget to implement required methods, the module will tell you something is wrong.

In [48]:
from collections.abc import Sequence

class BadType(Sequence):
    pass

foo = BadType()

TypeError: Can't instantiate abstract class BadType with abstract methods __getitem__, __len__