## Lists and arrays

In [4]:
## Don't add mutable sequences to immutable sequences
a = (1, 2, [30, 40])
try:
    a[2] += [50, 60]
except TypeError as e:
    print(e)
    print(a)
    

'tuple' object does not support item assignment
(1, 2, [30, 40, 50, 60])


In [7]:
## List comprehensions
## Cartesian product: [large, small] * [black, white, red] = [(large, black), (large, white)...]
colors = 'black white red'.split(' ')
sizes = 'large small'.split(' ')
shirts = [(size, color) for size in sizes
                        for color in colors]
print(shirts)


[('large', 'black'), ('large', 'white'), ('large', 'red'), ('small', 'black'), ('small', 'white'), ('small', 'red')]


In [8]:
## List comprehension with ifs
shirts = [(size, color) for size in sizes
                        for color in colors
                        if color not in ['red']]
print(shirts)

[('large', 'black'), ('large', 'white'), ('small', 'black'), ('small', 'white')]


In [11]:
# Method chaining
# Return self in the methods

class Poem(object):
    def __init__(self, content):
        self.content = content

    def indent(self, spaces):
        self.content = " " * spaces + self.content
        return self

    def suffix(self, content):
        self.content = self.content + " - " + content
        return self
    
Poem('Road less traveled').indent(4).suffix('by Cristobal').content


'    Road less traveled - by Cristobal'

In [26]:
## Sorting
## list.sort (sorts in place) or sorted() (returns new list -> can be used for immutable sequences)

## args: key: function that takes one argument, and returns the key that will be used to sort the list

s = 'AsZsc'
s2 = 'BdXs'
s3 = 'CdYssyfd'
s4 = 'DsWdswfdf'
sort_this = [s3, s2, s4, s]

# Declaration order
print('declared:', sort_this)

# Default sorting -> Same as first_char
print('default:', sorted(sort_this))

# Sort by first char
s1 = lambda x: x[0]
print('first char:', sorted(sort_this, key=s1))

# third char
s2 = lambda x: x[2]
print('third char:', sorted(sort_this, key=s2))

declared: ['CdYssyfd', 'BdXs', 'DsWdswfdf', 'AsZsc']
default: ['AsZsc', 'BdXs', 'CdYssyfd', 'DsWdswfdf']
first char: ['AsZsc', 'BdXs', 'CdYssyfd', 'DsWdswfdf']
third char: ['DsWdswfdf', 'BdXs', 'CdYssyfd', 'AsZsc']


In [62]:
## Searching with bisect
# bisect(haystack, needle) --> Binary search for a needle in a haystack (sorted seq.),
# returning the position
# bisect <=> bisect_right: if equal, insert to the right
# bisect_left: if equal, insert to the left

# Example: bucket debt_equity value into bins
BREAKPOINTS = (7, 4, 2, 1)
RANKS = (0, 0.25, 0.5, 0.75, 1)
DE = (2, 5, 1, 4, 8.1)
def bucket_de(de, breakpoints, ranks):
    position = bisect.bisect(breakpoints, de)
    return ranks[position]

# Not working.. still, can do this with numpy
for de in DE:
    print('D/E:', de, 'rank:', bucket_de(de, BREAKPOINTS, RANKS))

    
# insort(seq, item): inserts item into seq, keeping ascending order 

D/E: 2 rank: 1
D/E: 5 rank: 1
D/E: 1 rank: 0
D/E: 4 rank: 1
D/E: 8.1 rank: 1


In [63]:
## Arrays
# good for sequences of floats, mostly if it's going to be very (1M) large
# Use Numpy instead

In [74]:
# Deque
# Double ended queue. Lists allow to insert and remove from both ends too, 
# dequeues are much more efficient
# Also can be bounded, so p.e. it can hold only 10 items and discard olds
from collections import deque

seq = deque(range(10), maxlen=10)
print(seq)
# Rotate (in place)
seq.rotate(3)
print('rotated: ', seq)
seq.appendleft(-1)
print('append left:', seq)
seq.extend([99, 88, 77])
print('extend: ', seq)

deque([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], maxlen=10)
rotated:  deque([7, 8, 9, 0, 1, 2, 3, 4, 5, 6], maxlen=10)
append left: deque([-1, 7, 8, 9, 0, 1, 2, 3, 4, 5], maxlen=10)
extend:  deque([9, 0, 1, 2, 3, 4, 5, 99, 88, 77], maxlen=10)


## Dictionaries

- Keys must be hashable: an object is hashable if it has a value that never changes during its lifetime (it needs a \_\_hash__() method); and can be compared to other objects (has an 
\_\_eq__() method(). Hashable objects which compare equal **must** have the same hash value.
- hasable types: str, bytes, numerics, tuples (only if it's items are hashable


In [103]:
# Dict comprehensions
DIAL_CODES = [
    (86, 'China'),
    (91, 'India'),
    (1, 'United States'),
    (62, 'Indonesia'),
    ]
country_code = {country: code for code, country in DIAL_CODES}
country_code

{'China': 86, 'India': 91, 'Indonesia': 62, 'United States': 1}

In [134]:
# Handing missing keys
key='Chile'
new_value = '56'
try: 
    country_code[key] = '56'
except KeyError:
    print(f'{key} not in dict')

# Use dict.get to provide a default and not have to deal with KeyError
print(country_code.get(key, f'{key} not in dict'))
    
#defaultdict
# Can be instanciated with a default_factory argument, which is a type. this
# this is the type that is called to provide a default value when there is a missing key
from collections import defaultdict
import datetime
import time
import pprint

dd = defaultdict(list)
def def_val():
    return datetime.datetime.now()

dd = defaultdict(def_val)
#dd = defaultdict(list)
#print(dd['a'] == [])
#print(dd['b'] == [])
print(dd['a'])
time.sleep(0.01)
print(dd['b'])
pprint.pprint(dd)

56
2019-01-16 22:34:47.144914
2019-01-16 22:34:47.155882
defaultdict(<function def_val at 0x000001FEF3D83488>,
            {'a': datetime.datetime(2019, 1, 16, 22, 34, 47, 144914),
             'b': datetime.datetime(2019, 1, 16, 22, 34, 47, 155882)})


In [135]:
## OrderedDict
# Preserves keys in the order they were added to te dict


In [146]:
## Counter
# count occurences of items in a sequence; returns as dict
from collections import Counter
ct = Counter('abracadabra')
print(ct)

ct = Counter([1,3,3,3,52,3,4,5,5,2,3,5,5,3,5,5,3,5,5,2,5,3,5])
print(ct)
# Most common: (key, n_occurences)
ct.most_common(2)


Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})
Counter({5: 10, 3: 8, 2: 2, 1: 1, 52: 1, 4: 1})


[(5, 10), (3, 8)]

## Sets

Set elements must be hashable
Sets are NOT ordered
Sets remove duplicates
Set operations:
- &: intersection (and)
- |: union (or)
- \-: difference
- ^: Symmetrical difference (xor)

#### Construction
set([1, 2, 3])

{1, 2, 3} <-- Set literal, fastest way

#### Comparisons:
a <= b --> a is a subset of b

b >= a --> b is a superset of a

#### Other methods
- a.discard(e): remove element `e` from set a *if* it is present
- a.remove(e): remove element `e`, raise KeyError if not present
- a.add(e): add element `e` to set a


In [149]:
a = {1,2,3,4,5}
b = {2,3}
print('union:', a & b)


union: {2, 3}
