In [1]:
import sys
sys.path.append('C:\Anaconda3\Lib\site-packages')
sys.version

'3.5.1 |Continuum Analytics, Inc.| (default, Jan 29 2016, 15:01:46) [MSC v.1900 64 bit (AMD64)]'

In [2]:
%%javascript
// Run this to set the table of contents
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')

<IPython.core.display.Javascript object>

# Fluent Python by Luciano Ramalho
This notebook was created by Alex Galea.  
Start date: September 2016   
End date:

<strong id="tocheading">Table of Contents</strong>
&nbsp;
<div id="toc"></div>

## Chapter 1: Python data model

### Card deck

In [3]:
import collections
Card = collections.namedtuple('Card', ['rank', 'suit'])

In [4]:
my_card = Card('8', 'hearts')
my_card

Card(rank='8', suit='hearts')

The `len(a)` function in python is actually doing something like `a.__len__`, which is a special method. Indexing uses the `__getitem__` special method.

In [5]:
class Deck:
    ranks = '1 2 3 4'.split()+list('JQK')
    suits = 'hearts diamonds'.split()
    
    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits
                                      for rank in self.ranks]

    def __len__(self):
        return len(self._cards)
    
    def __getitem__(self, pos):
        return self._cards[pos]

In [6]:
deck = Deck()
deck

<__main__.Deck at 0x1e27133fa58>

In [7]:
deck.ranks

['1', '2', '3', '4', 'J', 'Q', 'K']

In [8]:
len(deck)

14

In [9]:
deck[0], deck[-2]

(Card(rank='1', suit='hearts'), Card(rank='Q', suit='diamonds'))

In [10]:
import random
random.choice(deck)

Card(rank='1', suit='hearts')

In [11]:
deck[2::3]

[Card(rank='3', suit='hearts'),
 Card(rank='Q', suit='hearts'),
 Card(rank='2', suit='diamonds'),
 Card(rank='J', suit='diamonds')]

In [12]:
for card in deck[::-1]:
    print(card)

Card(rank='K', suit='diamonds')
Card(rank='Q', suit='diamonds')
Card(rank='J', suit='diamonds')
Card(rank='4', suit='diamonds')
Card(rank='3', suit='diamonds')
Card(rank='2', suit='diamonds')
Card(rank='1', suit='diamonds')
Card(rank='K', suit='hearts')
Card(rank='Q', suit='hearts')
Card(rank='J', suit='hearts')
Card(rank='4', suit='hearts')
Card(rank='3', suit='hearts')
Card(rank='2', suit='hearts')
Card(rank='1', suit='hearts')


In [13]:
Card('1', 'hearts') in deck

True

In [14]:
suit_values = dict(spades=3, hearts=2, diamonds=1, clubs=0)
suit_values

{'clubs': 0, 'diamonds': 1, 'hearts': 2, 'spades': 3}

In [15]:
def spades_high(card):
    rank_value = Deck.ranks.index(card.rank)
    rank_value = rank_value * len(suit_values) + suit_values[card.suit]
    return rank_value

print(Card('4', 'hearts').rank)
print(Deck.ranks.index)
spades_high(Card('J', 'hearts'))

4
<built-in method index of list object at 0x000001E27133C488>


18

In [16]:
for card in sorted(deck, key=spades_high):
    print(card)

Card(rank='1', suit='diamonds')
Card(rank='1', suit='hearts')
Card(rank='2', suit='diamonds')
Card(rank='2', suit='hearts')
Card(rank='3', suit='diamonds')
Card(rank='3', suit='hearts')
Card(rank='4', suit='diamonds')
Card(rank='4', suit='hearts')
Card(rank='J', suit='diamonds')
Card(rank='J', suit='hearts')
Card(rank='Q', suit='diamonds')
Card(rank='Q', suit='hearts')
Card(rank='K', suit='diamonds')
Card(rank='K', suit='hearts')


### Emulating numeric types

In [17]:
from math import hypot

class Vector:
    
    def __init__(self, x=0, y=0):
        self.x = x
        self.y = y
    
    def __repr__(self):
        return 'Vector(%r, %r)'%(self.x, self.y)
    
    def __abs__(self):
        return hypot(self.x, self.y)
    
    def __bool__(self):
        return bool(abs(self))
    
    def __add__(self, other):
        x = self.x + other.x
        y = self.y + other.y
        return Vector(x, y)
    
    def __mul__(self, scalar):
        return Vector(self.x*scalar, self.y*scalar)

In [18]:
v1 = Vector(2, 4)
v2 = Vector(2, 1)
v1 + v2

Vector(4, 5)

In [19]:
print(abs(Vector(1,2)))
print(abs(Vector(1,2)) == Vector(1,2))
bool(abs(Vector(1,2))) == bool(Vector(1,2))

2.23606797749979
False


True

### Special methods

In [20]:
print(bool(4 and 0))
bool(4 and 5)

False


True

In [21]:
a = [1, 3, 4, 8]
a.__delitem__(1)
a

[1, 4, 8]

In [22]:
print(a.index(8))

2


In [23]:
a = [1, 3, 4, 8]
a_gen = (ai for ai in a)
next(a_gen), next(a_gen), a_gen.__next__()

(1, 3, 4)

## Chapter 2: An array of sequences

### Listcomps and generators

In [24]:
# Unicode code-points from a string
[ord(i) for i in 'a*&#^$']

[97, 42, 38, 35, 94, 36]

A cartesian product of `a` and `b` gives a matrix of shape `(len(a), len(b))`. Below we calculate a cartesian product using a list comp.

In [25]:
a = ['red', 'green']
b = [1, 3, 2, 4]
[(ai, bi) for ai in a for bi in b]

[('red', 1),
 ('red', 3),
 ('red', 2),
 ('red', 4),
 ('green', 1),
 ('green', 3),
 ('green', 2),
 ('green', 4)]

In [26]:
for i in ('%s %s' % (ai, bi) for ai in a for bi in b):
    print(i)

red 1
red 3
red 2
red 4
green 1
green 3
green 2
green 4


### Tuples

In [27]:
# Tuple unpacking examples
a, b = 'zombie attack'.split()
print(a, b)

zombie attack


In [28]:
a, b, *shit = 'a b c d e f g'.split()
print(a, b)
print(shit)

a b
['c', 'd', 'e', 'f', 'g']


In [29]:
a, *shit, b, c = 'a b c d e f g h i j k'.split()
a, shit, b, c

('a', ['b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'], 'j', 'k')

In [30]:
def add(a, b, c):
    return a+b+c

add_me = (5, 4, 2)
add(*add_me)

11

In [31]:
# Emoticon UTF hex strings
a = ('I',  u'\u2764', u'\U0001F40D')
print(*a)

I ❤ 🐍


In [32]:
print('{:10}|{:^4}'.format('', 'name'))
fmt = '{:10}|{:4.6f}'
for a, (b, c) in [('Adam', (48555.5928484559, 'Banana')),
                  ('Mary', (2.352634234, 'Chicken'))]:
    print(fmt.format(a, b))

          |name
Adam      |48555.592848
Mary      |2.352634


In [33]:
# Named tuples
from collections import namedtuple
Rating = namedtuple('Rating', ['male', 'female'])
City = namedtuple('CityTuple', 'name country population coordinates rating')
fake = City('Fake place', 'FK' 'Nowhere', 98374, (-23.4, 231.9), Rating(9.0, 9.5))
fake

CityTuple(name='Fake place', country='FKNowhere', population=98374, coordinates=(-23.4, 231.9), rating=Rating(male=9.0, female=9.5))

### Slices

Slices which commonly look like e.g. 1:5 can be held in variables using `slice`

In [34]:
data = '''
1909 Pimoroni PiBrella      $17.50 3 $52.50
1489 6mm Tactile Switch     x20 $4.95 2 $9.90
1510 Panavise Jr. - PV-201  $28.00 1 $28.00
1601 PiTFT Mini Kit 320x240 $34.95 1 $34.95
'''

# Define slice objects
print('Type:', type(slice(0, 1)))
print('E.g.')
print(data[slice(56, None)])

DESC = slice(5, 28)
for item in data.split('\n'):
    print(item[DESC])

Type: <class 'slice'>
E.g.
ctile Switch     x20 $4.95 2 $9.90
1510 Panavise Jr. - PV-201  $28.00 1 $28.00
1601 PiTFT Mini Kit 320x240 $34.95 1 $34.95


Pimoroni PiBrella      
6mm Tactile Switch     
Panavise Jr. - PV-201  
PiTFT Mini Kit 320x240 



In [35]:
# Get 2D list elements
data = [['The', 'only', 'one'],
        ['is', 'diagonal', 'this'],
        ['nonsense', 'group of', 'entries']]
print(data[2][0])
print(data.__getitem__(2).__getitem__(2))

nonsense
entries


In [36]:
try:
    data.__getitem__((2, 0))
except Exception as e:
    print('Error raised:', e)

Error raised: list indices must be integers or slices, not tuple


In [37]:
# Numpy modifies the __getitem__ special method to
# accept tuples
import numpy as np
data = np.array(data)
print(data.__getitem__((2, 0)))

nonsense


In [38]:
print(*data.diagonal())

The diagonal entries


### Augmented assignment

Some ways to change lists around

In [39]:
data = data.tolist()
data

[['The', 'only', 'one'],
 ['is', 'diagonal', 'this'],
 ['nonsense', 'group of', 'entries']]

In [40]:
# Unique ID for object
id(data)

2072069792584

In [41]:
# Same object with an extra row
data += [['extra', 'row']]
print(id(data))
data

2072069792584


[['The', 'only', 'one'],
 ['is', 'diagonal', 'this'],
 ['nonsense', 'group of', 'entries'],
 ['extra', 'row']]

In [42]:
# Extending a list within a tuple
data = tuple(data)
data[3].extend(['of', 'nothing!'])
data

(['The', 'only', 'one'],
 ['is', 'diagonal', 'this'],
 ['nonsense', 'group of', 'entries'],
 ['extra', 'row', 'of', 'nothing!'])

In [43]:
# Bytecode operations
from dis import dis
dis('s[a] += b')

  1           0 LOAD_NAME                0 (s)
              3 LOAD_NAME                1 (a)
              6 DUP_TOP_TWO
              7 BINARY_SUBSCR
              8 LOAD_NAME                2 (b)
             11 INPLACE_ADD
             12 ROT_THREE
             13 STORE_SUBSCR
             14 LOAD_CONST               0 (None)
             17 RETURN_VALUE


In [44]:
# A simple example
dis('a + b')

  1           0 LOAD_NAME                0 (a)
              3 LOAD_NAME                1 (b)
              6 BINARY_ADD
              7 RETURN_VALUE


### Managing ordered sequences with `bisect`

Searching with bisect

In [45]:
import bisect

HAYSTACK = [1, 4, 5, 6, 8, 12, 15, 20, 21, 23, 23, 26, 29, 30]

num = 3
insert_index = bisect.bisect(sorted(HAYSTACK), num)
print('insert number %d at index %d of sorted HAYSTACK' %
      (num, insert_index)) 

insert number 3 at index 1 of sorted HAYSTACK


Using bisect for numerical ID

In [46]:
def label(score, breakpoints=[25, 50, 70, 90],
          categories=['Crap', 'Bad', 'Average', 'Good', 'Great']):
    i = bisect.bisect(breakpoints, score)
    return categories[i]
    
label(0), label(80), label(100)

('Crap', 'Good', 'Great')

Inserting with bisect

In [47]:
a = ['GPU', 'Computer', 'RAM']
a.sort()
print(a)
bisect.insort(a, 'HDD')
print(a)

['Computer', 'GPU', 'RAM']
['Computer', 'GPU', 'HDD', 'RAM']


### When list is not the answer

For storing numbers, `array.array` is more efficient than a list.

In [48]:
import array
from random import random
# d = double precision datatype
# (can also use e.g. b for integers from -128 to 127)
floats = array.array('d', (random() for i in range(10**7)))
%time sum(floats)

Wall time: 61.1 ms


4998686.863009888

In [49]:
# Numpy is faster
import numpy as np
floats = np.array(floats)
%time floats.sum()

Wall time: 15.6 ms


4998686.8630094891

In [50]:
# Sorting an array
a = array.array('b', (-100, 90, -50, 110))
print(a.typecode, sorted(a))
a_sorted = array.array(a.typecode, sorted(a))

b [-100, -50, 90, 110]


Using `memoryview` with `array.array`

In [51]:
memv = memoryview(a)
print('type =', memv)
print('length =', len(memv))
print('item 1 =', memv[1])

type = <memory at 0x000001E2715D3A08>
length = 4
item 1 = 90


In [52]:
print(memv.tolist())
# Cast as unsigned char
print(memv.cast('B').tolist())

[-100, 90, -50, 110]
[156, 90, 206, 110]


Using `numpy` to save binary files and load memory-mapped file

In [53]:
import os
a = np.random.random(size=(3, 3))
if not os.path.isfile('output/random-numbers.npy'): # random-numbers will not be re-written if exists
    np.save('output/random-numbers', a)
b = np.load('output/random-numbers.npy', 'r+')
b

memmap([[ 0.56988553,  0.93844959,  0.62341531],
       [ 0.33186081,  0.42509331,  0.65144824],
       [ 0.89561299,  0.86748856,  0.70790833]])

In [54]:
b[:, 2].mean()

memmap(0.6609239591365338)

Memory-mapped filed can be loaded even if the array doesn't fit entirely into memory!

The `deque` que can be efficiently loaded and unloaded from each end and can have a maximum length. It also has a cool rotate method.

In [55]:
from collections import deque
a = deque(np.linspace(0, 55, 5), maxlen=5)
print(a)

a.rotate(-3)
print(a)

a.extend([-1.0, -2.0])
print(a)

a.extendleft([-3.0, -4.0])
print(a)

deque([0.0, 13.75, 27.5, 41.25, 55.0], maxlen=5)
deque([41.25, 55.0, 0.0, 13.75, 27.5], maxlen=5)
deque([0.0, 13.75, 27.5, -1.0, -2.0], maxlen=5)
deque([-4.0, -3.0, 0.0, 13.75, 27.5], maxlen=5)


## Chapter 3: Dictionaries and sets

Dictionaries and sets are highly optimized hash table implementations in python.

Starting with __dictionaries__:

In [56]:
from collections import abc

a = {(('this', 'is'), 1): 'allowed dictionary'}

print(a[(('this', 'is'), 1)])

print('Set?', isinstance(a, abc.MutableSet))
print('Mapping?', isinstance(a, abc.MutableMapping))

allowed dictionary
Set? False
Mapping? True


Common hashable variables include str, int, float. Lists are not hashable.

In [57]:
a = [28, 5, 1991]
try:
    print(hash(a))
    print('We are able to hash a')
except:
    print(hash(frozenset(a)))
    print("We can hash the list once it's converted to %s" % str(type(frozenset(a))))

461166807522139490
We can hash the list once it's converted to <class 'frozenset'>


`.get` can be used as a lookup method where the default can be defined

In [58]:
a = {'Einstein': 1, 'Bohr': 2}
a['Einstein'] == a.get('Einstein')

True

In [59]:
a.get('Dirac', 3)

3

In [60]:
# Dirac is not added
a

{'Bohr': 2, 'Einstein': 1}

In [61]:
a.setdefault('Dirac', 3)

3

In [62]:
# Dirac is added
a

{'Bohr': 2, 'Dirac': 3, 'Einstein': 1}

We can also select using `setdefault`

In [63]:
a.setdefault('Bohr', 99)

2

In [64]:
# Bohr remains 2
a

{'Bohr': 2, 'Dirac': 3, 'Einstein': 1}

### Mappings with flexible key lookup

Dictionaries can assign values for missing key searches using `defaultdict`.

In [65]:
a = collections.defaultdict(str)
a['Bear'] = 'Pet Dog'; a['Clara'] = 'Pet Dog'; a['Moses'] = 'Pet Cat'
a['Morty']
a

defaultdict(str,
            {'Bear': 'Pet Dog',
             'Clara': 'Pet Dog',
             'Morty': '',
             'Moses': 'Pet Cat'})

This can also be done with a custom class that contains a `__missing__` method.

In [66]:
# The class below will inherit from dict

class MFGuessDict(dict):
    ''' Assign the gender (M or F) to a name,
    if new name and no gender is specified we
    make a random guess. 
    '''
    
    def __missing__(self, key):
        self[key] = np.random.choice(['Male', 'Female'])
        return self[key]
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            # This exepction will not occur because of how we defined __missing__
            print('Got a key error')
            return default
        
    def __contatins__(self, key):
        return key in self.keys()

In [67]:
a = MFGuessDict()
a['Bob']

'Female'

In [68]:
# Notice how the 'Got a key error' print staement does
# not get executed
print(a.get('Bob'))
print(a.get('Cindy'))

Female
Female


In [69]:
'Judy' in a

False

### Dict variations

`collections.OrderedDict` seems usefull for maintaining a dictionary where the items remain in order. I think this is the new standard dict behaviour in the new release of python (3.6)

`collections.Counter` counts things:

In [70]:
a = [5, 4, 6, 4, 6, 3, 4]
counts = collections.Counter(a)
counts

Counter({3: 1, 4: 3, 5: 1, 6: 2})

In [71]:
b = [4, 6, 3, 3, 3, 3, 7]
counts.update(b)
counts

Counter({3: 5, 4: 4, 5: 1, 6: 3, 7: 1})

In [72]:
counts.most_common(3)

[(3, 5), (4, 4), (6, 3)]

### Immutable Mappings

In [73]:
from types import MappingProxyType
a = dict(zip([1, 2, 3, 4, 5], 'abcde'))
a_proxy = MappingProxyType(a)
a_proxy

mappingproxy({1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e'})

In [74]:
a_proxy[3]

'c'

As desired, we are unable to edit a_proxy.

In [75]:
a_proxy[6] = 'f'

TypeError: 'mappingproxy' object does not support item assignment

We can still edit a and changes will be reflected in a_proxy

In [76]:
a[6] = 'f'
a_proxy[6]

'f'

### Set theory

"A set is a collection of unique objects" where elements must be hashable

In [77]:
set(['Bob', 'is', 'is', 5, 5, 'Bob'])

{'Bob', 'is', 5}

How does it perform compared to `np.unique`?

In [78]:
a = np.random.choice(list('abcdefghijk'), size=10**8)

In [79]:
%time set(a)

Wall time: 30.2 s


{'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k'}

In [80]:
%time np.unique(a)

Wall time: 8.83 s


array(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k'], 
      dtype='<U1')

Can do set operations like these:

In [81]:
a = {0, 2, 4}
b = {2, 2, 3}

In [82]:
# OR
a | b

{0, 2, 3, 4}

In [83]:
# AND
a & b

{2}

In [84]:
# IS SUBSET?
a = {1, 2, 3}
b = {2, 3}
b <= a

True

There are a few ways to define sets:

In [85]:
a = set()
type(a)

set

In [86]:
dis('set(["apple"])')

  1           0 LOAD_NAME                0 (set)
              3 LOAD_CONST               0 ('apple')
              6 BUILD_LIST               1
              9 CALL_FUNCTION            1 (1 positional, 0 keyword pair)
             12 RETURN_VALUE


In [87]:
# Less operations are performed by doing sets this way
dis('{"apple"}')

  1           0 LOAD_CONST               0 ('apple')
              3 BUILD_SET                1
              6 RETURN_VALUE


In [88]:
latin = {chr(i) for i in range(300, 310)}
latin

{'Ĭ', 'ĭ', 'Į', 'į', 'İ', 'ı', 'Ĳ', 'ĳ', 'Ĵ', 'ĵ'}

In [89]:
from unicodedata import name # Returns the name assigned to the character chr as a string
get_names = lambda x: name(x)
set(map(get_names, latin))

{'LATIN CAPITAL LETTER I WITH BREVE',
 'LATIN CAPITAL LETTER I WITH DOT ABOVE',
 'LATIN CAPITAL LETTER I WITH OGONEK',
 'LATIN CAPITAL LETTER J WITH CIRCUMFLEX',
 'LATIN CAPITAL LIGATURE IJ',
 'LATIN SMALL LETTER DOTLESS I',
 'LATIN SMALL LETTER I WITH BREVE',
 'LATIN SMALL LETTER I WITH OGONEK',
 'LATIN SMALL LETTER J WITH CIRCUMFLEX',
 'LATIN SMALL LIGATURE IJ'}

### `dict` and `set` under the hood
These are implemented using hash tables.

## Chapter 4: Text vs bytes