# The `reduce()` function

In [2]:
from functools import reduce

In [3]:
help(reduce)

Help on built-in function reduce in module _functools:

reduce(...)
    reduce(function, sequence[, initial]) -> value
    
    Apply a function of two arguments cumulatively to the items of a sequence,
    from left to right, so as to reduce the sequence to a single value.
    For example, reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) calculates
    ((((1+2)+3)+4)+5).  If initial is present, it is placed before the items
    of the sequence in the calculation, and serves as a default when the
    sequence is empty.



# What is the `reduce()` function

The `reduce()` function takes a `function` of a sequence, and then an inital optional value and it reduces teh sequence down to a single output value by applying the initial `function` repeatedly to tie items in the sequence.

In [33]:
import collections

Scientist = collections.namedtuple('Scientist', [
    'name',
    'field',
    'born',
    'nobel',
])

scientists = (
    Scientist(name='Ada Lovelace', field='math', born=1815, nobel=False),
    Scientist(name='Emmy Noether', field='math', born=1882, nobel=False),
    Scientist(name='Marie Curie', field='physics', born=1867, nobel=True),
    Scientist(name='Tu Youyou', field='chemistry', born=1930, nobel=True),
    Scientist(name='Ada Yonath', field='chemistry', born=1939, nobel=True),
    Scientist(name='Vera Rubin', field='astronomy', born=1928, nobel=False),
    Scientist(name='Sally Ride', field='physics', born=1951, nobel=False),
)

In [34]:
## previous derived list 
names_and_ages = tuple(map(
    lambda x:{'name': x.name,'age': 2017 - x.born}
    ,scientists))
names_and_ages

({'name': 'Ada Lovelace', 'age': 202},
 {'name': 'Emmy Noether', 'age': 135},
 {'name': 'Marie Curie', 'age': 150},
 {'name': 'Tu Youyou', 'age': 87},
 {'name': 'Ada Yonath', 'age': 78},
 {'name': 'Vera Rubin', 'age': 89},
 {'name': 'Sally Ride', 'age': 66})

In [35]:
total_age = reduce(
    lambda acc, val: acc + val['age'], names_and_ages, 0)


- loop over the age value in `name_and_ages` and sum it starting with `0`
- add it to variable`acc`, then return that value


In [36]:
# sum of all ages from the derived list
total_age

807

In [37]:
# This can be done using the `sum()` function as well

sum(x['age'] for x in names_and_ages)

807

# Why use the `reduce()` function

- grouping scientist by field

In [38]:
scientists

(Scientist(name='Ada Lovelace', field='math', born=1815, nobel=False),
 Scientist(name='Emmy Noether', field='math', born=1882, nobel=False),
 Scientist(name='Marie Curie', field='physics', born=1867, nobel=True),
 Scientist(name='Tu Youyou', field='chemistry', born=1930, nobel=True),
 Scientist(name='Ada Yonath', field='chemistry', born=1939, nobel=True),
 Scientist(name='Vera Rubin', field='astronomy', born=1928, nobel=False),
 Scientist(name='Sally Ride', field='physics', born=1951, nobel=False))

In [39]:
def reducer(acc,val):
    acc[val.field].append(val.name)
    return acc

scientists_by_field = reduce(
    reducer,
    scientists,
    {'math':[], 'physics': [], 'chemistry': [], 'astronomy': []})

In [40]:
scientists_by_field

{'math': ['Ada Lovelace', 'Emmy Noether'],
 'physics': ['Marie Curie', 'Sally Ride'],
 'chemistry': ['Tu Youyou', 'Ada Yonath'],
 'astronomy': ['Vera Rubin']}

In [41]:
# a better way to do it by using default dict
import collections

scientists_by_field_02 = reduce(
    reducer,
    scientists,
    collections.defaultdict(list))

In [42]:
scientists_by_field_02

defaultdict(list,
            {'math': ['Ada Lovelace', 'Emmy Noether'],
             'physics': ['Marie Curie', 'Sally Ride'],
             'chemistry': ['Tu Youyou', 'Ada Yonath'],
             'astronomy': ['Vera Rubin']})

In [43]:
# explore
dd = collections.defaultdict(list)
dd

defaultdict(list, {})

In [44]:
dd['does not exist']

[]

In [45]:
dd

defaultdict(list, {'does not exist': []})

In [46]:
dd['doesennt exist----2']

dd

defaultdict(list, {'does not exist': [], 'doesennt exist----2': []})

In [47]:
dd['xyz'].append(1)

In [48]:
dd

defaultdict(list,
            {'does not exist': [], 'doesennt exist----2': [], 'xyz': [1]})

In [49]:
dd['xyz'].append(2)
dd['xyz'].append(3)

In [50]:
dd

defaultdict(list,
            {'does not exist': [],
             'doesennt exist----2': [],
             'xyz': [1, 2, 3]})

# Grouping Data With `itertools.groupby()`

In [51]:
import itertools

scientists_by_field_03 = {
    item[0]: list(item[1])
    for item in itertools.groupby(scientists, lambda x:x.field)
}

In [52]:
scientists_by_field_03

{'math': [Scientist(name='Ada Lovelace', field='math', born=1815, nobel=False),
  Scientist(name='Emmy Noether', field='math', born=1882, nobel=False)],
 'physics': [Scientist(name='Sally Ride', field='physics', born=1951, nobel=False)],
 'chemistry': [Scientist(name='Tu Youyou', field='chemistry', born=1930, nobel=True),
  Scientist(name='Ada Yonath', field='chemistry', born=1939, nobel=True)],
 'astronomy': [Scientist(name='Vera Rubin', field='astronomy', born=1928, nobel=False)]}

In [57]:
help(itertools.groupby)

Help on class groupby in module itertools:

class groupby(builtins.object)
 |  groupby(iterable, key=None)
 |  
 |  make an iterator that returns consecutive keys and groups from the iterable
 |  
 |  iterable
 |    Elements to divide into groups according to the key function.
 |  key
 |    A function for computing the group category for each element.
 |    If the key function is not specified or is None, the element itself
 |    is used for grouping.
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.
 |  
 |  __setstate__(...)
 |      Set state information for unpickling.
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Cre