### defaultdict

- accumulating (tabulating)

In [1]:
from collections import defaultdict
d = defaultdict(list)
d['raymond'].append('red')
d['rachel'].append('blue')
d['matthew'].append('yellow')
d['raymond'].append('mac')
d['rachel'].append('pc')
d['matthew'].append('vtech')
from pprint import pprint
pprint(d)

defaultdict(<class 'list'>,
            {'matthew': ['yellow', 'vtech'],
             'rachel': ['blue', 'pc'],
             'raymond': ['red', 'mac']})


In [2]:
pprint(dict(d))

{'matthew': ['yellow', 'vtech'],
 'rachel': ['blue', 'pc'],
 'raymond': ['red', 'mac']}


- Model one-to-many mapping: 
dict(one, list_of_many)

In [3]:
e2s = {
    'one': ['uno'],
    'two': ['dos'],
    'three': ['tres'],
    'trio': ['tres'],
    'free': ['lire', 'gratis'],
}

In [4]:
pprint(e2s, width=40)

{'free': ['lire', 'gratis'],
 'one': ['uno'],
 'three': ['tres'],
 'trio': ['tres'],
 'two': ['dos']}


In [5]:
s2e = defaultdict(list)
for eng, spanwords in e2s.items():
    for span in spanwords:
        s2e[span].append(eng)
pprint(s2e)

defaultdict(<class 'list'>,
            {'dos': ['two'],
             'gratis': ['free'],
             'lire': ['free'],
             'tres': ['three', 'trio'],
             'uno': ['one']})


- biijection (one-to-one mapping)

In [7]:
e2s = dict(one='uno', two='dos', three='tres')
{span: eng for eng, span in e2s.items()}

{'uno': 'one', 'dos': 'two', 'tres': 'three'}

### glob
global wildcard expansion

In [9]:
import glob 
glob.glob('*.ipynb')

['k-means.ipynb',
 'Typehinting & data manipulation.ipynb',
 'Simulations_and_Resampling.ipynb',
 'preparation_for_cluster_analysis.ipynb']

### reading files with encoding

In [12]:
with open('congress_data/congress_votes_114-2016_s20.csv', encoding='utf-8') as f:
    print(f.read())

Senate Vote #20 2016-02-10T17:11:00 - H.R. 757: North Korea Sanctions Enforcement Act of 2016
person,state,district,vote,name,party
300002,TN,,Yea,Sen. Lamar Alexander [R],Republican
300011,CA,,Yea,Sen. Barbara Boxer [D],Democrat
300018,WA,,Yea,Sen. Maria Cantwell [D],Democrat
300019,DE,,Yea,Sen. Thomas Carper [D],Democrat
300023,MS,,Yea,Sen. Thad Cochran [R],Republican
300025,ME,,Yea,Sen. Susan Collins [R],Republican
300027,TX,,Yea,Sen. John Cornyn [R],Republican
300030,ID,,Yea,Sen. Michael Crapo [R],Republican
300038,IL,,Not Voting,Sen. Richard Durbin [D],Democrat
300041,WY,,Yea,Sen. Michael Enzi [R],Republican
300043,CA,,Yea,Sen. Dianne Feinstein [D],Democrat
300047,SC,,Not Voting,Sen. Lindsey Graham [R],Republican
300048,IA,,Yea,Sen. Charles “Chuck” Grassley [R],Republican
300052,UT,,Yea,Sen. Orrin Hatch [R],Republican
300055,OK,,Yea,Sen. James “Jim” Inhofe [R],Republican
300065,VT,,Yea,Sen. Patrick Leahy [D],Democrat
300071,AZ,,Yea,Sen. John McCain [R],Republican
300072,KY,,Yea,Se

### next() / isclice()
to remove elements from iterator

In [13]:
it = iter('abcdef')
next(it)

'a'

In [14]:
next(it)


'b'

In [15]:
list(it)  #fully consumed

['c', 'd', 'e', 'f']

### csv.reader

In [16]:
import csv
with open('congress_data/congress_votes_114-2016_s20.csv', encoding='utf-8') as f:
    for row in csv.reader(f):   ##very very fast
        print(row)

['Senate Vote #20 2016-02-10T17:11:00 - H.R. 757: North Korea Sanctions Enforcement Act of 2016']
['person', 'state', 'district', 'vote', 'name', 'party']
['300002', 'TN', '', 'Yea', 'Sen. Lamar Alexander [R]', 'Republican']
['300011', 'CA', '', 'Yea', 'Sen. Barbara Boxer [D]', 'Democrat']
['300018', 'WA', '', 'Yea', 'Sen. Maria Cantwell [D]', 'Democrat']
['300019', 'DE', '', 'Yea', 'Sen. Thomas Carper [D]', 'Democrat']
['300023', 'MS', '', 'Yea', 'Sen. Thad Cochran [R]', 'Republican']
['300025', 'ME', '', 'Yea', 'Sen. Susan Collins [R]', 'Republican']
['300027', 'TX', '', 'Yea', 'Sen. John Cornyn [R]', 'Republican']
['300030', 'ID', '', 'Yea', 'Sen. Michael Crapo [R]', 'Republican']
['300038', 'IL', '', 'Not Voting', 'Sen. Richard Durbin [D]', 'Democrat']
['300041', 'WY', '', 'Yea', 'Sen. Michael Enzi [R]', 'Republican']
['300043', 'CA', '', 'Yea', 'Sen. Dianne Feinstein [D]', 'Democrat']
['300047', 'SC', '', 'Not Voting', 'Sen. Lindsey Graham [R]', 'Republican']
['300048', 'IA', '', 

### tuple unpacking

In [18]:
t = ('Raymond', 'Hettinger', 54, 'blabla@com')

In [19]:
type(t)

tuple

In [20]:
len(t)

4

In [21]:
fname, lname, age, email = t   #unpack

### Looing idoms
enumerate, zip, reversed, sorted, set

In [32]:
names = "Raymond Rachel Matthew".split()
colors = "red blue yellow".split()
cities = "austin dallas austin houston chicago dallas austin".split()

In [25]:
for i, name in enumerate(names, start=1):
    print(i, name)

1 Raymond
2 Rachel
3 Matthew


In [26]:
for color in reversed(colors):
    print(color)

yellow
blue
red


In [29]:
for name, color in zip(names,colors):
    print(name, color)

Raymond red
Rachel blue
Matthew yellow


In [30]:
for color in sorted(colors):
    print(color)

blue
red
yellow


In [31]:
for color in sorted(colors, key=len):
    print(color)

red
blue
yellow


In [34]:
for city in sorted(set(cities)):
    print(city)        # DISTINT = set; order by = sorted

austin
chicago
dallas
houston


In [35]:
for city in reversed(sorted(set(cities))):
    print(city) 

houston
dallas
chicago
austin


In [36]:
for i, city in enumerate(reversed(sorted(set(cities)))):
    print(i, city) 

0 houston
1 dallas
2 chicago
3 austin


In [37]:
# called functional programming: put output of one function into the next
for i, city in enumerate(map(str.upper, reversed(sorted(set(cities))))):
    print(i, city) 

0 HOUSTON
1 DALLAS
2 CHICAGO
3 AUSTIN


In [39]:
assert 5 + 3 == 10, "It is wrong"

AssertionError: It is wrong