### Immutable data structures

Dont use dictionary, list because it's mutable. Use tuple!

In [1]:
import collections

In [2]:
Scientist = collections.namedtuple('Scientists', [
    'name',
    'field',
    'born',
    'nobel'
])

In [3]:
scientists = (Scientist(name='Ada Lovelace', field='math', born=1815, nobel=False),
              Scientist(name='Emmy Noether', field='math', born=1882, nobel=False),
              Scientist(name='Marie Curie', field='physics', born=1867, nobel=True),
              Scientist(name='Tu Youyou', field='chemistry', born=1930, nobel=True),
              Scientist(name='Ada Yonath', field='chemistry', born=1939, nobel=True),
              Scientist(name='Vera Rubin', field='astronomy', born=1928, nobel=False),
              Scientist(name='Sally Ride', field='physics', born=1951, nobel=True)
             )

In [4]:
scientists

(Scientists(name='Ada Lovelace', field='math', born=1815, nobel=False),
 Scientists(name='Emmy Noether', field='math', born=1882, nobel=False),
 Scientists(name='Marie Curie', field='physics', born=1867, nobel=True),
 Scientists(name='Tu Youyou', field='chemistry', born=1930, nobel=True),
 Scientists(name='Ada Yonath', field='chemistry', born=1939, nobel=True),
 Scientists(name='Vera Rubin', field='astronomy', born=1928, nobel=False),
 Scientists(name='Sally Ride', field='physics', born=1951, nobel=True))

In [5]:
from pprint import pprint

In [6]:
pprint(scientists)

(Scientists(name='Ada Lovelace', field='math', born=1815, nobel=False),
 Scientists(name='Emmy Noether', field='math', born=1882, nobel=False),
 Scientists(name='Marie Curie', field='physics', born=1867, nobel=True),
 Scientists(name='Tu Youyou', field='chemistry', born=1930, nobel=True),
 Scientists(name='Ada Yonath', field='chemistry', born=1939, nobel=True),
 Scientists(name='Vera Rubin', field='astronomy', born=1928, nobel=False),
 Scientists(name='Sally Ride', field='physics', born=1951, nobel=True))


### The "filter()" Function

In [7]:
#get new list off scientists that have won nobel prize
fs = tuple(filter(lambda x: x.nobel is True, scientists))
#lambda is a one line function, put arguments with one expression, no return. Will evaluate expression and return back.
#next(fs) can be used if tuple is not used above

In [8]:
pprint(fs)

(Scientists(name='Marie Curie', field='physics', born=1867, nobel=True),
 Scientists(name='Tu Youyou', field='chemistry', born=1930, nobel=True),
 Scientists(name='Ada Yonath', field='chemistry', born=1939, nobel=True),
 Scientists(name='Sally Ride', field='physics', born=1951, nobel=True))


In [9]:
## List comprehensions
[x for x in scientists if x.nobel is True]
## "pythonic" version of filter expression

[Scientists(name='Marie Curie', field='physics', born=1867, nobel=True),
 Scientists(name='Tu Youyou', field='chemistry', born=1930, nobel=True),
 Scientists(name='Ada Yonath', field='chemistry', born=1939, nobel=True),
 Scientists(name='Sally Ride', field='physics', born=1951, nobel=True)]

### The "map()" Function

In [10]:
## map takes a list of stuff, applies a function and assembles a new list based on that
names_and_ages = tuple(map(
    lambda x: {'name': x.name, 'age':2017-x.born},
    scientists
))

In [11]:
pprint(names_and_ages)

({'age': 202, 'name': 'Ada Lovelace'},
 {'age': 135, 'name': 'Emmy Noether'},
 {'age': 150, 'name': 'Marie Curie'},
 {'age': 87, 'name': 'Tu Youyou'},
 {'age': 78, 'name': 'Ada Yonath'},
 {'age': 89, 'name': 'Vera Rubin'},
 {'age': 66, 'name': 'Sally Ride'})


In [12]:
## make above more pythonic with list comprehension
pprint(tuple({'name': x.name, 'age': 2017-x.born}
for x in scientists))

({'age': 202, 'name': 'Ada Lovelace'},
 {'age': 135, 'name': 'Emmy Noether'},
 {'age': 150, 'name': 'Marie Curie'},
 {'age': 87, 'name': 'Tu Youyou'},
 {'age': 78, 'name': 'Ada Yonath'},
 {'age': 89, 'name': 'Vera Rubin'},
 {'age': 66, 'name': 'Sally Ride'})


### The "reduce()" function

In [13]:
from functools import reduce

In [14]:
#Reduces sequence to value by applying function to sequence
## Calculate total age of group
total_age = reduce(
    lambda acc, val: acc + val['age'],
    names_and_ages,
    0)
total_age

807

In [15]:
## pythonic way
sum(x['age'] for x in names_and_ages)

807

In [16]:
def reducer(acc, val):
    acc[val.field].append(val.name)
    return acc

scientists_by_field = reduce(
    reducer,
    scientists,
    {'math':[], 'physics':[], 'chemistry':[], 'astronomy':[]})

pprint(scientists_by_field)

{'astronomy': ['Vera Rubin'],
 'chemistry': ['Tu Youyou', 'Ada Yonath'],
 'math': ['Ada Lovelace', 'Emmy Noether'],
 'physics': ['Marie Curie', 'Sally Ride']}


In [17]:
##better way of above do not manually define accumulator
scientists_by_field = reduce(
    reducer,
    scientists,
    collections.defaultdict(list))

dd = collections.defaultdict(list)
dd['doesntexist']
dd['doesntexist---2']

[]

In [18]:
dd

defaultdict(list, {'doesntexist': [], 'doesntexist---2': []})

In [19]:
dd['xyz'].append(1)
dd

defaultdict(list, {'doesntexist': [], 'doesntexist---2': [], 'xyz': [1]})

In [20]:
## pythonic version of reducer
import itertools
scientists_by_field = {
    item[0]: list(item[1])
    for item in itertools.groupby(scientists, lambda x: x.field)
}
pprint(scientists_by_field)

{'astronomy': [Scientists(name='Vera Rubin', field='astronomy', born=1928, nobel=False)],
 'chemistry': [Scientists(name='Tu Youyou', field='chemistry', born=1930, nobel=True),
               Scientists(name='Ada Yonath', field='chemistry', born=1939, nobel=True)],
 'math': [Scientists(name='Ada Lovelace', field='math', born=1815, nobel=False),
          Scientists(name='Emmy Noether', field='math', born=1882, nobel=False)],
 'physics': [Scientists(name='Sally Ride', field='physics', born=1951, nobel=True)]}


In [21]:
## another version using lambda function instead of seperatly define function, probably not good method because hard to understand
import functools
scientists_by_field = functools.reduce(
    lambda acc, val: {**acc, **{val.field: acc[val.field] + [val.name]}},
    scientists,
    {'math':[], 'physics':[], 'chemistry':[], 'astronomy':[]}
)

pprint(scientists_by_field)

{'astronomy': ['Vera Rubin'],
 'chemistry': ['Tu Youyou', 'Ada Yonath'],
 'math': ['Ada Lovelace', 'Emmy Noether'],
 'physics': ['Marie Curie', 'Sally Ride']}
