In [1]:
import sys
# sys.path.append('C:\Anaconda3\Lib\site-packages')
sys.version

'3.5.2 |Anaconda 4.2.0 (x86_64)| (default, Jul  2 2016, 17:52:12) \n[GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]'

In [227]:
%%javascript
// Run this to set the table of contents
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')

<IPython.core.display.Javascript object>

# Fluent Python by Luciano Ramalho
This notebook was created by Alex Galea.  
Start date: September 2016   
End date:

<strong id="tocheading">Table of Contents</strong>
&nbsp;
<div id="toc"></div>

## Chapter 1: Python data model

### Card deck

In [3]:
import collections
Card = collections.namedtuple('Card', ['rank', 'suit'])

In [4]:
my_card = Card('8', 'hearts')
my_card

Card(rank='8', suit='hearts')

The `len(a)` function in python is actually doing something like `a.__len__`, which is a special method. Indexing uses the `__getitem__` special method.

In [5]:
class Deck:
    ranks = '1 2 3 4'.split()+list('JQK')
    suits = 'hearts diamonds'.split()
    
    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits
                                      for rank in self.ranks]

    def __len__(self):
        return len(self._cards)
    
    def __getitem__(self, pos):
        return self._cards[pos]

In [6]:
deck = Deck()
deck

<__main__.Deck at 0x1e27133fa58>

In [7]:
deck.ranks

['1', '2', '3', '4', 'J', 'Q', 'K']

In [8]:
len(deck)

14

In [9]:
deck[0], deck[-2]

(Card(rank='1', suit='hearts'), Card(rank='Q', suit='diamonds'))

In [10]:
import random
random.choice(deck)

Card(rank='1', suit='hearts')

In [11]:
deck[2::3]

[Card(rank='3', suit='hearts'),
 Card(rank='Q', suit='hearts'),
 Card(rank='2', suit='diamonds'),
 Card(rank='J', suit='diamonds')]

In [12]:
for card in deck[::-1]:
    print(card)

Card(rank='K', suit='diamonds')
Card(rank='Q', suit='diamonds')
Card(rank='J', suit='diamonds')
Card(rank='4', suit='diamonds')
Card(rank='3', suit='diamonds')
Card(rank='2', suit='diamonds')
Card(rank='1', suit='diamonds')
Card(rank='K', suit='hearts')
Card(rank='Q', suit='hearts')
Card(rank='J', suit='hearts')
Card(rank='4', suit='hearts')
Card(rank='3', suit='hearts')
Card(rank='2', suit='hearts')
Card(rank='1', suit='hearts')


In [13]:
Card('1', 'hearts') in deck

True

In [14]:
suit_values = dict(spades=3, hearts=2, diamonds=1, clubs=0)
suit_values

{'clubs': 0, 'diamonds': 1, 'hearts': 2, 'spades': 3}

In [15]:
def spades_high(card):
    rank_value = Deck.ranks.index(card.rank)
    rank_value = rank_value * len(suit_values) + suit_values[card.suit]
    return rank_value

print(Card('4', 'hearts').rank)
print(Deck.ranks.index)
spades_high(Card('J', 'hearts'))

4
<built-in method index of list object at 0x000001E27133C488>


18

In [16]:
for card in sorted(deck, key=spades_high):
    print(card)

Card(rank='1', suit='diamonds')
Card(rank='1', suit='hearts')
Card(rank='2', suit='diamonds')
Card(rank='2', suit='hearts')
Card(rank='3', suit='diamonds')
Card(rank='3', suit='hearts')
Card(rank='4', suit='diamonds')
Card(rank='4', suit='hearts')
Card(rank='J', suit='diamonds')
Card(rank='J', suit='hearts')
Card(rank='Q', suit='diamonds')
Card(rank='Q', suit='hearts')
Card(rank='K', suit='diamonds')
Card(rank='K', suit='hearts')


### Emulating numeric types

In [17]:
from math import hypot

class Vector:
    
    def __init__(self, x=0, y=0):
        self.x = x
        self.y = y
    
    def __repr__(self):
        return 'Vector(%r, %r)'%(self.x, self.y)
    
    def __abs__(self):
        return hypot(self.x, self.y)
    
    def __bool__(self):
        return bool(abs(self))
    
    def __add__(self, other):
        x = self.x + other.x
        y = self.y + other.y
        return Vector(x, y)
    
    def __mul__(self, scalar):
        return Vector(self.x*scalar, self.y*scalar)

In [18]:
v1 = Vector(2, 4)
v2 = Vector(2, 1)
v1 + v2

Vector(4, 5)

In [19]:
print(abs(Vector(1,2)))
print(abs(Vector(1,2)) == Vector(1,2))
bool(abs(Vector(1,2))) == bool(Vector(1,2))

2.23606797749979
False


True

### Special methods

In [20]:
print(bool(4 and 0))
bool(4 and 5)

False


True

In [21]:
a = [1, 3, 4, 8]
a.__delitem__(1)
a

[1, 4, 8]

In [22]:
print(a.index(8))

2


In [23]:
a = [1, 3, 4, 8]
a_gen = (ai for ai in a)
next(a_gen), next(a_gen), a_gen.__next__()

(1, 3, 4)

## Chapter 2: An array of sequences

### Listcomps and generators

In [24]:
# Unicode code-points from a string
[ord(i) for i in 'a*&#^$']

[97, 42, 38, 35, 94, 36]

A cartesian product of `a` and `b` gives a matrix of shape `(len(a), len(b))`. Below we calculate a cartesian product using a list comp.

In [25]:
a = ['red', 'green']
b = [1, 3, 2, 4]
[(ai, bi) for ai in a for bi in b]

[('red', 1),
 ('red', 3),
 ('red', 2),
 ('red', 4),
 ('green', 1),
 ('green', 3),
 ('green', 2),
 ('green', 4)]

In [26]:
for i in ('%s %s' % (ai, bi) for ai in a for bi in b):
    print(i)

red 1
red 3
red 2
red 4
green 1
green 3
green 2
green 4


### Tuples

In [27]:
# Tuple unpacking examples
a, b = 'zombie attack'.split()
print(a, b)

zombie attack


In [28]:
a, b, *shit = 'a b c d e f g'.split()
print(a, b)
print(shit)

a b
['c', 'd', 'e', 'f', 'g']


In [29]:
a, *shit, b, c = 'a b c d e f g h i j k'.split()
a, shit, b, c

('a', ['b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'], 'j', 'k')

In [30]:
def add(a, b, c):
    return a+b+c

add_me = (5, 4, 2)
add(*add_me)

11

In [31]:
# Emoticon UTF hex strings
a = ('I',  u'\u2764', u'\U0001F40D')
print(*a)

I ❤ 🐍


In [32]:
print('{:10}|{:^4}'.format('', 'name'))
fmt = '{:10}|{:4.6f}'
for a, (b, c) in [('Adam', (48555.5928484559, 'Banana')),
                  ('Mary', (2.352634234, 'Chicken'))]:
    print(fmt.format(a, b))

          |name
Adam      |48555.592848
Mary      |2.352634


In [33]:
# Named tuples
from collections import namedtuple
Rating = namedtuple('Rating', ['male', 'female'])
City = namedtuple('CityTuple', 'name country population coordinates rating')
fake = City('Fake place', 'FK' 'Nowhere', 98374, (-23.4, 231.9), Rating(9.0, 9.5))
fake

CityTuple(name='Fake place', country='FKNowhere', population=98374, coordinates=(-23.4, 231.9), rating=Rating(male=9.0, female=9.5))

### Slices

Slices which commonly look like e.g. 1:5 can be held in variables using `slice`

In [34]:
data = '''
1909 Pimoroni PiBrella      $17.50 3 $52.50
1489 6mm Tactile Switch     x20 $4.95 2 $9.90
1510 Panavise Jr. - PV-201  $28.00 1 $28.00
1601 PiTFT Mini Kit 320x240 $34.95 1 $34.95
'''

# Define slice objects
print('Type:', type(slice(0, 1)))
print('E.g.')
print(data[slice(56, None)])

DESC = slice(5, 28)
for item in data.split('\n'):
    print(item[DESC])

Type: <class 'slice'>
E.g.
ctile Switch     x20 $4.95 2 $9.90
1510 Panavise Jr. - PV-201  $28.00 1 $28.00
1601 PiTFT Mini Kit 320x240 $34.95 1 $34.95


Pimoroni PiBrella      
6mm Tactile Switch     
Panavise Jr. - PV-201  
PiTFT Mini Kit 320x240 



In [35]:
# Get 2D list elements
data = [['The', 'only', 'one'],
        ['is', 'diagonal', 'this'],
        ['nonsense', 'group of', 'entries']]
print(data[2][0])
print(data.__getitem__(2).__getitem__(2))

nonsense
entries


In [36]:
try:
    data.__getitem__((2, 0))
except Exception as e:
    print('Error raised:', e)

Error raised: list indices must be integers or slices, not tuple


In [37]:
# Numpy modifies the __getitem__ special method to
# accept tuples
import numpy as np
data = np.array(data)
print(data.__getitem__((2, 0)))

nonsense


In [38]:
print(*data.diagonal())

The diagonal entries


### Augmented assignment

Some ways to change lists around

In [39]:
data = data.tolist()
data

[['The', 'only', 'one'],
 ['is', 'diagonal', 'this'],
 ['nonsense', 'group of', 'entries']]

In [40]:
# Unique ID for object
id(data)

2072069792584

In [41]:
# Same object with an extra row
data += [['extra', 'row']]
print(id(data))
data

2072069792584


[['The', 'only', 'one'],
 ['is', 'diagonal', 'this'],
 ['nonsense', 'group of', 'entries'],
 ['extra', 'row']]

In [42]:
# Extending a list within a tuple
data = tuple(data)
data[3].extend(['of', 'nothing!'])
data

(['The', 'only', 'one'],
 ['is', 'diagonal', 'this'],
 ['nonsense', 'group of', 'entries'],
 ['extra', 'row', 'of', 'nothing!'])

In [43]:
# Bytecode operations
from dis import dis
dis('s[a] += b')

  1           0 LOAD_NAME                0 (s)
              3 LOAD_NAME                1 (a)
              6 DUP_TOP_TWO
              7 BINARY_SUBSCR
              8 LOAD_NAME                2 (b)
             11 INPLACE_ADD
             12 ROT_THREE
             13 STORE_SUBSCR
             14 LOAD_CONST               0 (None)
             17 RETURN_VALUE


In [44]:
# A simple example
dis('a + b')

  1           0 LOAD_NAME                0 (a)
              3 LOAD_NAME                1 (b)
              6 BINARY_ADD
              7 RETURN_VALUE


### Managing ordered sequences with `bisect`

Searching with bisect

In [45]:
import bisect

HAYSTACK = [1, 4, 5, 6, 8, 12, 15, 20, 21, 23, 23, 26, 29, 30]

num = 3
insert_index = bisect.bisect(sorted(HAYSTACK), num)
print('insert number %d at index %d of sorted HAYSTACK' %
      (num, insert_index)) 

insert number 3 at index 1 of sorted HAYSTACK


Using bisect for numerical ID

In [46]:
def label(score, breakpoints=[25, 50, 70, 90],
          categories=['Crap', 'Bad', 'Average', 'Good', 'Great']):
    i = bisect.bisect(breakpoints, score)
    return categories[i]
    
label(0), label(80), label(100)

('Crap', 'Good', 'Great')

Inserting with bisect

In [47]:
a = ['GPU', 'Computer', 'RAM']
a.sort()
print(a)
bisect.insort(a, 'HDD')
print(a)

['Computer', 'GPU', 'RAM']
['Computer', 'GPU', 'HDD', 'RAM']


### When list is not the answer

For storing numbers, `array.array` is more efficient than a list.

In [48]:
import array
from random import random
# d = double precision datatype
# (can also use e.g. b for integers from -128 to 127)
floats = array.array('d', (random() for i in range(10**7)))
%time sum(floats)

Wall time: 61.1 ms


4998686.863009888

In [49]:
# Numpy is faster
import numpy as np
floats = np.array(floats)
%time floats.sum()

Wall time: 15.6 ms


4998686.8630094891

In [50]:
# Sorting an array
a = array.array('b', (-100, 90, -50, 110))
print(a.typecode, sorted(a))
a_sorted = array.array(a.typecode, sorted(a))

b [-100, -50, 90, 110]


Using `memoryview` with `array.array`

In [51]:
memv = memoryview(a)
print('type =', memv)
print('length =', len(memv))
print('item 1 =', memv[1])

type = <memory at 0x000001E2715D3A08>
length = 4
item 1 = 90


In [52]:
print(memv.tolist())
# Cast as unsigned char
print(memv.cast('B').tolist())

[-100, 90, -50, 110]
[156, 90, 206, 110]


Using `numpy` to save binary files and load memory-mapped file

In [53]:
import os
a = np.random.random(size=(3, 3))
if not os.path.isfile('output/random-numbers.npy'): # random-numbers will not be re-written if exists
    np.save('output/random-numbers', a)
b = np.load('output/random-numbers.npy', 'r+')
b

memmap([[ 0.56988553,  0.93844959,  0.62341531],
       [ 0.33186081,  0.42509331,  0.65144824],
       [ 0.89561299,  0.86748856,  0.70790833]])

In [54]:
b[:, 2].mean()

memmap(0.6609239591365338)

Memory-mapped filed can be loaded even if the array doesn't fit entirely into memory!

The `deque` que can be efficiently loaded and unloaded from each end and can have a maximum length. It also has a cool rotate method.

In [55]:
from collections import deque
a = deque(np.linspace(0, 55, 5), maxlen=5)
print(a)

a.rotate(-3)
print(a)

a.extend([-1.0, -2.0])
print(a)

a.extendleft([-3.0, -4.0])
print(a)

deque([0.0, 13.75, 27.5, 41.25, 55.0], maxlen=5)
deque([41.25, 55.0, 0.0, 13.75, 27.5], maxlen=5)
deque([0.0, 13.75, 27.5, -1.0, -2.0], maxlen=5)
deque([-4.0, -3.0, 0.0, 13.75, 27.5], maxlen=5)


## Chapter 3: Dictionaries and sets

Dictionaries and sets are highly optimized hash table implementations in python.

Starting with __dictionaries__:

In [56]:
from collections import abc

a = {(('this', 'is'), 1): 'allowed dictionary'}

print(a[(('this', 'is'), 1)])

print('Set?', isinstance(a, abc.MutableSet))
print('Mapping?', isinstance(a, abc.MutableMapping))

allowed dictionary
Set? False
Mapping? True


Common hashable variables include str, int, float. Lists are not hashable.

In [57]:
a = [28, 5, 1991]
try:
    print(hash(a))
    print('We are able to hash a')
except:
    print(hash(frozenset(a)))
    print("We can hash the list once it's converted to %s" % str(type(frozenset(a))))

461166807522139490
We can hash the list once it's converted to <class 'frozenset'>


`.get` can be used as a lookup method where the default can be defined

In [58]:
a = {'Einstein': 1, 'Bohr': 2}
a['Einstein'] == a.get('Einstein')

True

In [59]:
a.get('Dirac', 3)

3

In [60]:
# Dirac is not added
a

{'Bohr': 2, 'Einstein': 1}

In [61]:
a.setdefault('Dirac', 3)

3

In [62]:
# Dirac is added
a

{'Bohr': 2, 'Dirac': 3, 'Einstein': 1}

We can also select using `setdefault`

In [63]:
a.setdefault('Bohr', 99)

2

In [64]:
# Bohr remains 2
a

{'Bohr': 2, 'Dirac': 3, 'Einstein': 1}

### Mappings with flexible key lookup

Dictionaries can assign values for missing key searches using `defaultdict`.

In [65]:
a = collections.defaultdict(str)
a['Bear'] = 'Pet Dog'; a['Clara'] = 'Pet Dog'; a['Moses'] = 'Pet Cat'
a['Morty']
a

defaultdict(str,
            {'Bear': 'Pet Dog',
             'Clara': 'Pet Dog',
             'Morty': '',
             'Moses': 'Pet Cat'})

This can also be done with a custom class that contains a `__missing__` method.

In [66]:
# The class below will inherit from dict

class MFGuessDict(dict):
    ''' Assign the gender (M or F) to a name,
    if new name and no gender is specified we
    make a random guess. 
    '''
    
    def __missing__(self, key):
        self[key] = np.random.choice(['Male', 'Female'])
        return self[key]
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            # This exepction will not occur because of how we defined __missing__
            print('Got a key error')
            return default
        
    def __contatins__(self, key):
        return key in self.keys()

In [67]:
a = MFGuessDict()
a['Bob']

'Female'

In [68]:
# Notice how the 'Got a key error' print staement does
# not get executed
print(a.get('Bob'))
print(a.get('Cindy'))

Female
Female


In [69]:
'Judy' in a

False

### Dict variations

`collections.OrderedDict` seems usefull for maintaining a dictionary where the items remain in order. I think this is the new standard dict behaviour in the new release of python (3.6)

`collections.Counter` counts things:

In [70]:
a = [5, 4, 6, 4, 6, 3, 4]
counts = collections.Counter(a)
counts

Counter({3: 1, 4: 3, 5: 1, 6: 2})

In [71]:
b = [4, 6, 3, 3, 3, 3, 7]
counts.update(b)
counts

Counter({3: 5, 4: 4, 5: 1, 6: 3, 7: 1})

In [72]:
counts.most_common(3)

[(3, 5), (4, 4), (6, 3)]

### Immutable Mappings

In [73]:
from types import MappingProxyType
a = dict(zip([1, 2, 3, 4, 5], 'abcde'))
a_proxy = MappingProxyType(a)
a_proxy

mappingproxy({1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e'})

In [74]:
a_proxy[3]

'c'

As desired, we are unable to edit a_proxy.

In [75]:
a_proxy[6] = 'f'

TypeError: 'mappingproxy' object does not support item assignment

We can still edit a and changes will be reflected in a_proxy

In [76]:
a[6] = 'f'
a_proxy[6]

'f'

### Set theory

"A set is a collection of unique objects" where elements must be hashable

In [77]:
set(['Bob', 'is', 'is', 5, 5, 'Bob'])

{'Bob', 'is', 5}

How does it perform compared to `np.unique`?

In [78]:
a = np.random.choice(list('abcdefghijk'), size=10**8)

In [79]:
%time set(a)

Wall time: 30.2 s


{'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k'}

In [80]:
%time np.unique(a)

Wall time: 8.83 s


array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k'], 
      dtype='<U1')

Can do set operations like these:

In [81]:
a = {0, 2, 4}
b = {2, 2, 3}

In [82]:
# OR
a | b

{0, 2, 3, 4}

In [83]:
# AND
a & b

{2}

In [84]:
# IS SUBSET?
a = {1, 2, 3}
b = {2, 3}
b <= a

True

There are a few ways to define sets:

In [85]:
a = set()
type(a)

set

In [86]:
dis('set(["apple"])')

  1           0 LOAD_NAME                0 (set)
              3 LOAD_CONST               0 ('apple')
              6 BUILD_LIST               1
              9 CALL_FUNCTION            1 (1 positional, 0 keyword pair)
             12 RETURN_VALUE


In [87]:
# Less operations are performed by doing sets this way
dis('{"apple"}')

  1           0 LOAD_CONST               0 ('apple')
              3 BUILD_SET                1
              6 RETURN_VALUE


In [88]:
latin = {chr(i) for i in range(300, 310)}
latin

{'Ĭ', 'ĭ', 'Į', 'į', 'İ', 'ı', 'Ĳ', 'ĳ', 'Ĵ', 'ĵ'}

In [89]:
from unicodedata import name # Returns the name assigned to the character chr as a string
get_names = lambda x: name(x)
set(map(get_names, latin))

{'LATIN CAPITAL LETTER I WITH BREVE',
 'LATIN CAPITAL LETTER I WITH DOT ABOVE',
 'LATIN CAPITAL LETTER I WITH OGONEK',
 'LATIN CAPITAL LETTER J WITH CIRCUMFLEX',
 'LATIN CAPITAL LIGATURE IJ',
 'LATIN SMALL LETTER DOTLESS I',
 'LATIN SMALL LETTER I WITH BREVE',
 'LATIN SMALL LETTER I WITH OGONEK',
 'LATIN SMALL LETTER J WITH CIRCUMFLEX',
 'LATIN SMALL LIGATURE IJ'}

### `dict` and `set` under the hood
These are implemented using hash tables. The memory usage is large but the result is quick speeds for lookups. 

One particular takeaway note is that dictionary keys should not be edited while iterating through the dictionary. Instead we should keep track of the changes and update after.

## Chapter 4: Text vs bytes

The Unicode Glossary defines plain text as "Computer-encoded text that consists only of _a sequence of code points_ from a given standard"

### Byte essentials
Encoding bytes and decoding bytes to strings

In [104]:
s = '$5.67'
b = s.encode('utf8')
b

b'$5.67'

In [105]:
b.decode('utf8')

'$5.67'

The two basic built in binary types are immutable `bytes` and mutable `bytearray`. Each item in the object is an integer form 0 to 255

In [111]:
a = bytes('A+', encoding='utf_32')
a

b'\xff\xfe\x00\x00A\x00\x00\x00+\x00\x00\x00'

Note: \x00 is the null byte

In [114]:
print(a[0]) # this returns an integer
print(a[:1]) # this slice returns a byte

255
b'\xff'


In [112]:
a.decode('utf32')

'A+'

In [115]:
a_arr = bytearray(a)
a_arr

bytearray(b'\xff\xfe\x00\x00A\x00\x00\x00+\x00\x00\x00')

In [116]:
a_arr[:1] # this slice returns a bytearray

bytearray(b'\xff')

Binary sequences have a class method to convert hex digits:

In [123]:
bytes.fromhex('31 48 CE A9')

b'1H\xce\xa9'

Bytes can be initialized from arrays:

In [124]:
bytes(array.array('h', [-2, 1, 0, -1, 2])) # h for short integers (16 bit)

b'\xfe\xff\x01\x00\x00\x00\xff\xff\x02\x00'

### Structs and memory views

Structs can be used to parse packed bytes or convert to packed byte structure. Memory views provide shared memory access to binary sequences of data without copying the bytes.

Below we use them to get the width and height of a picture

In [36]:
import struct

# struct format:
# < little-endian;
# 3s3s two sequences of 3 bytes;
# HH two 16-bit integers
fmt = '<3s3sHH'

with open('input/karl-jilg.gif', 'rb') as f:
    img = memoryview(f.read())

header = img[:10]
bytes(header)

b'GIF89aX\x02/\x03'

In [37]:
struct.unpack(fmt, header)
# file type, ??, width, height

(b'GIF', b'89a', 600, 815)

In [38]:
# delete references to free up memory
del header
del img;

U+1D11E is an example of a __code point__ representation. Byte representations can include ascii, latin1, cp437, utf-8, utf-16le, ...

UTF encodings can handle every Unicode code point (but not every unicode character)

### Understanding encode / decode problems

`UnicodeEncodeError` handling

In [43]:
city = 'São Paulo'
city.encode('utf_16')

b'\xff\xfeS\x00\xe3\x00o\x00 \x00P\x00a\x00u\x00l\x00o\x00'

In [44]:
city.encode('cp437')

UnicodeEncodeError: 'charmap' codec can't encode character '\xe3' in position 1: character maps to <undefined>

In [45]:
city.encode('cp437', errors='ignore')

b'So Paulo'

In [46]:
city.encode('cp437', errors='replace')

b'S?o Paulo'

`UnicodeDecodeError` handling.

Using the wrong decoding can result in garbled characters known as _gremlins_.

In [50]:
team = b'Montr\xe9al Canadi\xe9ns'
team.decode('cp1252')

'Montréal Canadiéns'

In [53]:
# This produces a gremlin
team.decode('cp437')

'MontrΘal CanadiΘns'

In [54]:
# As does ths
team.decode('utf-16')

'潍瑮\ue972污䌠湡摡\ue969獮'

In [55]:
team.decode('utf-8')

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe9 in position 5: invalid continuation byte

UTF-8 is the default encoding for Python 3 (ASCII was default for Py2)

In [67]:
ϵ = 'my epsilon'
print(len(ϵ))
print(ϵ)

10
my epsilon


There is really no way to detect the encoding of a byte sequence. You have to be told.

### Handling text files

Bytes should be decoded to strings as early as possible on input and encoded back to strings only at the end.

Below is an example of how reading in files without specifying the format can cause issues.

In [68]:
open('input/bug.txt', 'w', encoding='utf_8').write('ϵ')

1

In [69]:
# Oh no!
open('input/bug.txt').read()

'Ïµ'

In [71]:
# Better
open('input/bug.txt', encoding='utf_8').read()

'ϵ'

In [76]:
f = open('input/bug.txt', 'rb')
line = f.read()
print(line)
print(line.decode('utf_8'))

b'\xcf\xb5'
ϵ


This bug would not occur on a Mac of Linux machine where the default encoding is UTF-8 (silly windows :P). Also, it's not generally a good idea to open files in binary format.

Encoding defaults can be seen like this:

In [78]:
import locale

expressions = '''
        locale.getpreferredencoding()
        sys.stdout.encoding
        sys.stdin.encoding
        sys.stderr.encoding
'''

for e in expressions.split():
    print(e.rjust(30), '->', str(eval(e)))

 locale.getpreferredencoding() -> cp1252
           sys.stdout.encoding -> UTF-8
            sys.stdin.encoding -> cp1252
           sys.stderr.encoding -> UTF-8


The default encoding when opening a file is given by ` locale.getpreferredencoding()`.

### Normalizing Unicode for saner comparisons

In [88]:
# This works just fine
a = 'äho'
b = '\u00E4ho'
print(a, b)
a == b

äho äho


True

In [99]:
from unicodedata import name
name(b[0])

'LATIN SMALL LETTER A'

In [96]:
# This doesn't work
a = 'äho'
b = 'a\u0308ho'
print(a, b)
a == b

äho äho


False

In [98]:
name(b[1])

'COMBINING DIAERESIS'

There are different normalization forms that can be used. We use NFC below.

In [93]:
from unicodedata import normalize
a = normalize('NFC', a)
b = normalize('NFC', b)
print(a, b)
a == b

äho äho


True

__Translation dictionaries__ can be created to ... well ... translate!

In [104]:
a = 'this coffee z@* is crap'
dict_trans = str.maketrans({'z': 'b',
                            '@': 'a',
                            '*': 'r'})
a.translate(dict_trans)

'this coffee bar is crap'

### Sorting Unicode text

In [107]:
fruits = ['caju', 'atemoia', 'cajá', 'açaí']
sorted(fruits)

['atemoia', 'açaí', 'caju', 'cajá']

The order is wrong due to the accents and sorting can be done properly using the `pyuca` library

In [108]:
import pyuca
sorted(fruits, key=pyuca.Collator())

ImportError: No module named 'pyuca'

### The Unicode database

There exists a standard Unicode database that includes mappings of code points to characer names and metadata. This information is what allows methods like `isnumeric` and `name` to work.

In [109]:
sample = '1\xbc\xb2\u0969\u136b\u216b\u2466\u2480\u3285'
sample

'1¼²३፫Ⅻ⑦⒀㊅'

In [121]:
import unicodedata
for char in sample:
    print('Code point = U+%04x' % ord(char),
          str(char).center(7),
          'digit = %s'.ljust(12) % str(char.isdigit()),
          'numeric = %s'.ljust(15) % str(char.isnumeric()),
          format(unicodedata.numeric(char), '4.2f').ljust(6),
          unicodedata.name(char))

Code point = U+0031    1    digit = True   numeric = True    1.00   DIGIT ONE
Code point = U+00bc    ¼    digit = False   numeric = True    0.25   VULGAR FRACTION ONE QUARTER
Code point = U+00b2    ²    digit = True   numeric = True    2.00   SUPERSCRIPT TWO
Code point = U+0969    ३    digit = True   numeric = True    3.00   DEVANAGARI DIGIT THREE
Code point = U+136b    ፫    digit = True   numeric = True    3.00   ETHIOPIC DIGIT THREE
Code point = U+216b    Ⅻ    digit = False   numeric = True    12.00  ROMAN NUMERAL TWELVE
Code point = U+2466    ⑦    digit = True   numeric = True    7.00   CIRCLED DIGIT SEVEN
Code point = U+2480    ⒀    digit = False   numeric = True    13.00  PARENTHESIZED NUMBER THIRTEEN
Code point = U+3285    ㊅    digit = False   numeric = True    6.00   CIRCLED IDEOGRAPH SIX


## Chapter 5: First-class functions

### Treating a function like an object

Functions automatically have doc attributes as seen below:

In [123]:
def Fallon_SNL(quote):
    ''' Return a Jimmy Fallon quote. '''
    
    if quote == 'Jeopardy':
        return 'I like it, your mother likes it, your grandmother watches it every night on the VCR'
    
    elif quote == 'Barry Gibbs':
        return 'I will put you in the ground!'
    
    else:
        return 'Unknown keyword'
    
Fallon_SNL.__doc__

' Return a Jimmy Fallon quote. '

`map` is an example of a higher-order function:

In [125]:
list(map(Fallon_SNL, ['Barry Gibbs', 'Morning Report', 'Jeopardy']))

['I will put you in the ground!',
 'Unknown keyword',
 'I like it, your mother likes it, your grandmother watches it every night on the VCR']

Custom functions can be created as keys for higher-order functions!

In [6]:
def vowel_count(word):
    ''' Return the number of vowels in the word. '''
    vowels = list('aeiou')
    return sum([w in vowels for w in list(word)])

sorted(['aaaeee', 'zoo', 'zoola', 'mmrrrnnzzzi', 'pbj'],
       key=vowel_count)

['pbj', 'mmrrrnnzzzi', 'zoo', 'zoola', 'aaaeee']

An anonymous function can be created with the `lambda` keyword but it must be one line and can not contain while, try, etc...

### User defined callable types

We can make a class that behaves like a function using the `__call__` special method

In [4]:
class rbg_picker():
    ''' Give and item and return a string stating the color. '''
    
    def __init__(self, color):
        self._color = color
        self._items = []
    
    def return_color(self, item):
        self._items.append(item)
        return 'The %s is %s' % (self._items[-1], self._color)
    
    def __call__(self, item):
        return self.return_color(item)
    
stuff = rbg_picker('green')
stuff('pickle')

'The pickle is green'

Using this idea we can keep a history of the calls

In [5]:
print(stuff('grass'), stuff('man'), stuff('apple'))
stuff._items

The grass is green The man is green The apple is green


['pickle', 'grass', 'man', 'apple']

### Function introspection

We can list the attributes of a function like this:

In [7]:
dir(vowel_count)

['__annotations__',
 '__call__',
 '__class__',
 '__closure__',
 '__code__',
 '__defaults__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__get__',
 '__getattribute__',
 '__globals__',
 '__gt__',
 '__hash__',
 '__init__',
 '__kwdefaults__',
 '__le__',
 '__lt__',
 '__module__',
 '__name__',
 '__ne__',
 '__new__',
 '__qualname__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__']

Functions can be assigned attributes like this:

In [10]:
def numbers():
    return 'Now you know everything...'

numbers.meaning_of_life = 42
numbers.dark_tower_number = 19
numbers.__dict__

{'dark_tower_number': 19, 'meaning_of_life': 42}

What attributes exist for functions but not for (bare) classes?

In [12]:
class C: 
    pass
obj = C()

def func(): 
    pass

set(dir(func)) - set(dir(obj))

{'__annotations__',
 '__call__',
 '__closure__',
 '__code__',
 '__defaults__',
 '__get__',
 '__globals__',
 '__kwdefaults__',
 '__name__',
 '__qualname__'}

Of course these can be added to classes like we did for the `rbg_picker` object above with `__call__`

### From positional to keyword-only parameters

In [3]:
def make_me_feel_feelings(your_name, *friends_names, typ='happy', **bonus):
    ''' Generates "inspirational" quotes that fit your mood. '''
    
    print('your name: %s' % your_name)
    print('friends names:', friends_names)
    print('type: %s' % typ)
    print('bonus:', bonus)
    
    if friends_names:
        if len(friends_names) > 0:
            str1 = 'Dear %s, your friends ' % (your_name)
            str2 = ' and '.join(friends_names) + ' think you are'
        else:
            str1 = 'Dear %s, your friend ' % (your_name)
            str2 = ' and '.join(friends_names) + ' thinks you are'
        
        if typ == 'happy':
            str3 = ' great'
        elif typ == 'sad':
            str3 = ' crappy'
        else:
            return 'Invalid typ argument, no quote can be generated'
        
    else:
        str1 = 'Dear %s, you have no friends ' % (your_name)
        str2 = ''
        if typ == 'happy':
            str3 = 'but you are great'
        elif typ == 'sad':
            str3 = 'and you are crappy'
        else:
            return 'Invalid typ argument, no quote can be generated'
     
    if bonus:
        str4 = " and you're " + ' and '.join(bonus.values())
    else:
        str4 = ''
    
    return str1 + str2 + str3 + str4 + '.'

In [18]:
args = {'your_name': 'Alex', 'typ': 'sad', 'sad_truth': 'talking to yourself'}
make_me_feel_feelings(**args)

your name: Alex
friends names: ()
type: sad
bonus: {'sad_truth': 'talking to yourself'}


"Dear Alex, you have no friends and you are crappy and you're talking to yourself."

In [19]:
make_me_feel_feelings('Alex', 'Billy', 'Victoria', note_1='good at soccer', note_2='fun')

your name: Alex
friends names: ('Billy', 'Victoria')
type: happy
bonus: {'note_2': 'fun', 'note_1': 'good at soccer'}


"Dear Alex, your friends Billy and Victoria think you are great and you're fun and good at soccer."

In [20]:
make_me_feel_feelings('Alex', 'Kevin', 'Frank', 'Cory', 'Scott', typ='sad', note_1='an imbecile')

your name: Alex
friends names: ('Kevin', 'Frank', 'Cory', 'Scott')
type: sad
bonus: {'note_1': 'an imbecile'}


"Dear Alex, your friends Kevin and Frank and Cory and Scott think you are crappy and you're an imbecile."

### Retrieving Information About Parameters

In [4]:
from inspect import signature

sig = signature(make_me_feel_feelings)
sig

<Signature (your_name, *friends_names, typ='happy', **bonus)>

In [5]:
dict(sig.parameters)

{'bonus': <Parameter "**bonus">,
 'friends_names': <Parameter "*friends_names">,
 'typ': <Parameter "typ='happy'">,
 'your_name': <Parameter "your_name">}

We can bind a function signature from a dictionary of arguments.

In [15]:
args = {'your_name': 'Alex', 'typ': 'sad', 'sad_truth': 'talking to yourself'}
sig.bind(**args)

<BoundArguments (your_name='Alex', typ='sad', bonus={'sad_truth': 'talking to yourself'})>

If we remove mandatory parameters it will raise an error.

### Function annotations
Metadata that can be added to functions. As of python 3.5, no internal checks are done using these.

In [27]:
def do_stuff_to_things(a:'int < 5', b:str, c:list=['python', '3.5']) -> str:
    ''' Input things and return something else. '''
    return ' '.join(c) + a * b

do_stuff_to_things(3, ' is great')

'python 3.5 is great is great is great'

These can be seen and checked manually using `inspect.signature`.

In [32]:
signature(do_stuff_to_things).parameters

mappingproxy({'a': <Parameter "a:'int < 5'">,
              'b': <Parameter "b:str">,
              'c': <Parameter "c:list=['python', '3.5']">})

### The `operator` module

`itemgetter` can be used in place of lambdas

In [35]:
from operator import itemgetter
silly_things = [('Ziggy', 4, 2.1),
                ('Lotus', 2, 4.5),
                ('Nomad', 1, 6.5)]

print(sorted(silly_things, key=lambda x: x[0])[-1])
print(sorted(silly_things, key=itemgetter(0))[-1])

('Ziggy', 4, 2.1)
('Ziggy', 4, 2.1)


It can be used like a function

In [38]:
my_items = itemgetter(2, 1)
my_items(silly_things[0])

(2.1, 4)

`attrgetter` does similar things for attributes

In [65]:
# Setting up example
import pandas as pd
df = pd.DataFrame({'a': [1, 2], 'b': ['cat', 'dog'],
                   'c': [4, 3], 'd': [3.5, 5.5]})
from IPython.display import display
display(df)

from operator import attrgetter
info = attrgetter('shape', 'a.dtype', 'a.name')
print('Getting some attributes:', info(df))

Unnamed: 0,a,b,c,d
0,1,cat,4,3.5
1,2,dog,3,5.5


Getting some attributes: ((2, 4), dtype('int64'), 'a')


`methodcaller` does similar things for methods

In [71]:
from operator import methodcaller
my_sort = methodcaller('sort_values', 'c')
my_sort(df)

Unnamed: 0,a,b,c,d
1,2,dog,3,5.5
0,1,cat,4,3.5


### Freezing arguments with `functools.partial`

In [80]:
def print_5_inputs(a, b, c, d, e):
    items = a, b, c, d, e
    print(' '.join([str(i) for i in items]))
    
from functools import partial
print_2_inputs = partial(print_5_inputs, 'The', 'horse') # set first 2 args
print_2_inputs('ran', 'very', 'fast')

print_4_inputs = partial(print_5_inputs, b='cow') # set "b" arg
print_4_inputs(a='The', c='ate', d='grass', e='...')

The horse ran very fast
The cow ate grass ...


## Chapter 6: Design patterns with first-class functions

### Classic Strategy
design pattern e.g.

![](input/design-patterns/strategy.png)

Can call `globals` to get a dictionary of the current global symbol table.

Note: I restarted the kernel before running cells below.

In [1]:
class TestGlobals():
    def my_method_1():
        print('Design patterns')
    def my_method_2():
        print('are cool')

In [2]:
test = globals()['TestGlobals']

In [3]:
test.my_method_1()
test.my_method_2()

Design patterns
are cool


In [4]:
globals()['In']

['',
 "class TestGlobals():\n    def my_method_1():\n        print('Design patterns')\n    def my_method_2():\n        print('are cool')",
 "test = globals()['TestGlobals']",
 'test.my_method_1()\ntest.my_method_2()',
 "globals()['In']"]

Can use inspect to find functions inside a class.

In [5]:
import inspect
funcs = [func for name, func in
         inspect.getmembers(TestGlobals, inspect.isfunction)]
funcs

[<function __main__.TestGlobals.my_method_1>,
 <function __main__.TestGlobals.my_method_2>]

In [6]:
funcs[0]()
funcs[1]()

Design patterns
are cool


### Command

design pattern e.g.

![](input/design-patterns/command.png)

This design pattern is aimed at decoupling an object that invokes an operation (e.g. Menu) from the object that implements it (e.g. Application, Document) by putting a command object in between.

## Chapter 7: Function decorators and closures

### Decorators 101
A decorator calls a function using the below defined function as an argument.

In [4]:
def myprint(myfunction):
    print(myfunction())
    return 'myprint function return'

@myprint
def thing_to_print():
    return "It's nearly Christmas 2016"

It's nearly Christmas 2016


In [5]:
thing_to_print

'myprint function return'

A decorator usually replaces a function. Here we replace `target()` with `inner()`.

In [6]:
def deco(func):
    def inner():
        print('running inner()')
    return inner

@deco
def target():
    print('running target()')

In [7]:
target()

running inner()


In [8]:
target

<function __main__.deco.<locals>.inner>

Decorators are executed in _import time_ - but the decorated functions are not run until _run time_ (if specified).

A cool use case is appending a bunch of defined functions to a list.

In [11]:
animals = []

def animal(func):
    animals.append(func)
    return func

@animal
def cow():
    print('I am cow')
    
@animal
def dog():
    print('I am dog')
    
animals

[<function __main__.cow>, <function __main__.dog>]

In [12]:
[a() for a in animals]

I am cow
I am dog


[None, None]

### Variable scope rules

This fails because b is recognized as a local variable:

In [18]:
b = 2
def f(a):
    print(a)
    print(b)
    b = 5
    
f(3)

3


UnboundLocalError: local variable 'b' referenced before assignment

We can decide to treat b as a global:

In [19]:
b = 2
def f(a):
    global b
    print(a)
    print(b)
    b = 5
    
f(3)

3
2


But keep in mind that b's value can then be changed.

In [17]:
b

5

### Closures

Here is a functional approach to an average tracker (although the object oriented approach seems more intuitive - i.e. define an Averager class).

![](input/closure.png)

In [57]:
def averager():
    history = []
    
    def get_avg(val):
        history.append(val)
        total = sum(history)
        return total / len(history)
    
    return get_avg

avg = averager()

In [25]:
avg(100)

100.0

In [26]:
avg(50)

75.0

In [27]:
avg(10)

53.333333333333336

In [30]:
[d for d in dir(avg.__code__) if not d.startswith('_')]

['co_argcount',
 'co_cellvars',
 'co_code',
 'co_consts',
 'co_filename',
 'co_firstlineno',
 'co_flags',
 'co_freevars',
 'co_kwonlyargcount',
 'co_lnotab',
 'co_name',
 'co_names',
 'co_nlocals',
 'co_stacksize',
 'co_varnames']

In [31]:
avg.__code__.co_varnames

('val', 'total')

In [32]:
avg.__code__.co_freevars

('history',)

The history values can be retrieved from the `__closure__` attribute.

In [37]:
len(avg.__closure__)

1

In [39]:
avg.__closure__[0].cell_contents

[100, 50, 10]

Roughly speaking, a closure will hold the local variables of a function as they are kept outisde the global scope. Only a function can have a `__closure__` attribute.

Here is another example where we see that no closure is created if our returned function does not reference the variable `a`.

In [225]:
def f():
    a = 3
    return lambda x: x + a

func = f()

func.__closure__[0].cell_contents

3

In [226]:
def f():
    a = 3
    return lambda x: x

func = f()

func.__closure__[0].cell_contents

TypeError: 'NoneType' object is not subscriptable

### The `nonlocal` declaration

This allows variables to be flagged as `free variables` even if they are reassigned. This is good for immutable objects like variables holding integers:

In [53]:
def f():
    a = 0
    
    def update():
        a = a + 2
        return a
    
    return update

f_ = f()()
f_

UnboundLocalError: local variable 'a' referenced before assignment

The `update` function, seeing that a is being assigned, treats it as local. But in doing so it forgets its out-of-scope value! We do not want to tell the function that `a` is global - because it isn't. So we say it's `nonlocal`.

In [55]:
def f():
    a = 0
    
    def update():
        nonlocal a
        a = a + 2
        return a
    
    return update

f_ = f()
f_()

2

In [56]:
f_()

4

### Implementing a simple decorator

A decorator that tracks total elapsed time of function calls

In [89]:
import time

def clock(f):
    def time_func_call(*args):
        t0 = time.time()
        val = f(*args)
        t1 = time.time()
        print('Got result of "%d" in %.2f seconds' % (val, t1-t0))

    return time_func_call

In [90]:
@clock
def f(a, b, c):
    ''' Loopy lizard. '''
    val = 0
    for i in range(a):
        for j in range(b):
            for k in range(c):
                val += (i + j + k)
    return val

f(398, 442, 453)

Got result of "51400016460" in 10.30 seconds


We do not get to keep the docstring or name of the original function:

In [91]:
f.__name__

'time_func_call'

In [93]:
f.__doc__

We can use `functools.wraps` to do this

In [96]:
from functools import wraps

def clock(f):
    @wraps(f)
    def time_func_call(*args):
        t0 = time.time()
        val = f(*args)
        t1 = time.time()
        print('Got result of "%d" in %.2f seconds' % (val, t1-t0))

    return time_func_call

@clock
def f(a, b, c):
    ''' Loopy lizard. '''
    return None

In [97]:
f.__name__

'f'

In [98]:
f.__doc__

' Loopy lizard. '

### Memoization with `functools.lru_cache`

This confuses the hell out of me. Below we see how we can use the decorator to cache `f(5, 5, 5)` - causing the timeit ipython magic is able to run much faster.

In [168]:
def f(a, b, c):
    ''' Loopy lizard. '''
    val = 0
    for i in range(a):
        for j in range(b):
            for k in range(c):
                val += (i + j + k)
    return val

%timeit f(5, 5, 5)

10000 loops, best of 3: 22.7 µs per loop


In [169]:
@lru_cache()
def f(a, b, c):
    ''' Loopy lizard. '''
    val = 0
    for i in range(a):
        for j in range(b):
            for k in range(c):
                val += (i + j + k)
    return val

%timeit f(5, 5, 5)

The slowest run took 169.37 times longer than the fastest. This could mean that an intermediate result is being cached.
10000000 loops, best of 3: 151 ns per loop


This can take a `maxsize` argument.

### Generic functions with single dispatch

Aside on a `html.excape` - from the standard library - that helps format HTML.

In [181]:
import html

thing = repr(abs)
print('%s -> %s' % (thing, html.escape(thing)))

thing = 'set(a) & set(b)'
print('%s -> %s' % (thing, html.escape(thing)))

<built-in function abs> -> &lt;built-in function abs&gt;
set(a) & set(b) -> set(a) &amp; set(b)


Using the `functools.singledispatch` decorator converts a function into a generic function that can behave differently depending on the type of the first argument.

In [199]:
from functools import singledispatch

@singledispatch
def merge_somehow(obj1, obj2):
    ''' Base function deals with objects '''
    return '%s ^^^ %s' % (obj1, obj2)

@merge_somehow.register(str)
def _(string1, string2):
    ''' This function deals with strings '''
    return '%s --- %s' % (string1, string2)

@merge_somehow.register(int)
def _(int1, int2):
    ''' This function deals with integers '''
    return '%d' % (int1 * int2)

In [200]:
merge_somehow('apple', 'pie')

'apple --- pie'

In [201]:
merge_somehow(4, 6)

'24'

In [202]:
class No1():
    def __repr__(self):
        return 'pumpkin'
    
class No2():
    def __repr__(self):
        return 'pie'

obj1, obj2 = No1(), No2()
merge_somehow(obj1, obj2)

'pumpkin ^^^ pie'

### Stacked decorators
They are applied starting with the one directly above the function.

In [221]:
def d1(func):
    print('Calling d1')
    return func

def d2(func):
    print('Calling d2')
    return func

@d1
@d2
def f():
    return None

f()

Calling d2
Calling d1


### Parameterized decorators

Decorators can be nexted within functions which can be called in order to pass arguments, e.g.

In [224]:
def d(switch=True):
    def deco(func):
        if switch:
            print('Doing stuff')
        else:
            print('Doing other stuff')
        return func
    return deco

@d()
def f():
    return None

f()

@d(switch=False)
def f():
    return None

f()

Doing stuff
Doing other stuff


## Chapter 8: Object references, mutability and recycling