# T1 Counting in today's Python

### 2000 version
```python
def top4(seq):
    # count items in sequence
    counts = {}
    for it in seq:
        if it in counts:
            counts[it] += 1
        else:
            counts[it] = 1
    # pick top 4
    histogram = [(counts[it], it) for it in counts]
    histogram.sort()
    return [item for _, item in histogram[-4:]]
```

### 2003 improvement
```python
# pick top 4
top_4 = [(0, None)] * 4
for it in counts:
    heapq.heappush(top_4, (counts[it], it))
    heapq.heappop(top_4)
return [item for count, item in top_4 if count]
```

### 2004 simpler
```python
the_top_4 = heapq.nlargest(4, ((counts[it], it) for it in counts))
return [item for _, item in the_top_4]
```

### 2005 version
```python
# count items in sequence
counts = collections.defaultdict(int)
for it in seq:
    counts[it] += 1
```

### 2007 version
```python
import collections
def top4(seq):
    # count items in sequence
    counts = collections.Counter(seq)
    # pick top 4
    return [item for item, _ in counts.most_common(4)]
```

# T2 Cleaner code with named tuples

### Tuple example
what does those numbers mean?

```python
p = (170, 0.1, 0.6)
p[0] # 170
p[1] # 0.1
```

```python
import collections
Color = collections.namedtuple('Color', ['hue', 'saturation', 'luminosity'])
p = Color(170, 0.1, 0.6)
p.hue        # 170
p.saturation # 0.1
```

clearer construnction
```python
p = Color(hue=170, saturation=0.1, luminosity=0.6)
print(p)
```

### Less code
```python
class Point(object):
    def __init__(self, x, y, z):
        self.x = x
        self.y = y
        self.z = z
```
to
```python
Point = collections.nametuple('Point', 'x y z')
p = Point(x=1, y=3.5, z=9)
p.x = 5             # AttributeError: can't set attribute
p = p._replace(x=5) # Point(x=5, y=3.5, z=9)
```

```python
class Point(collections.namedtuple('Point', 'x y z')):
    
    __slot__ = ()
    
    def distance(self, other):
        """return the euclidean distance to another point."""
        return math.sqrt((self.x - other.x) ** 2 + 
                         (self.y - other.y) ** 2 + 
                         (self.z - other.z) ** 2)
```

### Default Values

```python
class Point(collections.namedtuple('Point', 'x y z')):
    
    __slot__ = ()
    
    def __new__(cls, x, y, z=0):
        return super(Point, cls).__new__(cls, x, y, z)
    
point(Point(x=2, y=9))
```

### Caveats
1. It's iterable.
```python
p = Point(x=4, y=8, z=3)
for x in p:
    point(x)
```
4
8
3

2. It's not formattable via the % operater.
```python
print("My point is %s" % (p,))
print("{} is still my point".format(p))
```
3. They're summable and multipliable.
```python
p = Point(x=1, y=2, x=3)
p + ()
(1, 2, 3)
p * 2
(1, 2, 3, 1, 2, 3)
```
4. Tuple equality
```python
p = (1, 2, 3)  # True
```

a fix
```python
class Point(collections.namedtuple('Point', 'x y z')):
    __slots__ = ()
    def __eq__(self, other):
        return type(self) == type(other) and super(Point, self).__eq__(other)
```
5. Lexicographic comparison
find the point close to origin of coordinates(0,0,0)
```python
p1 = Point(x=2, y=5, z=4)
p2 = Point(x=1, y=9, z=3)
zero = Point(0, 0, 0)
min(p1, p2, key=lambda p: p.distance(zero))
```
6. _replace ignores \__new__()

```python
class Point(collections.namedtuple('Point', 'x y z')):
    
    __slots__ = ()
    
    def __new__(cls, x, y, z):
        if z < 0:
            raise ValueError("z must be a non-negative number, got {}".format(z))
        return super(Point, cls).__new__(cls, x, y, z)

p = Point(x=1, y=2, z=-5)  # Value Error raised

p = p._replace(z=-5)
print(p)
Point(x=1, y=2, z=-5)
```

# T3 is versus equals

### `is` is not euqal to `==`

### `==` tests for equality

```python
5 = 3 + 2
True

x = [2, 3]
y = [2, 3]

x == y
True
```

### `__eq__()`

if our class does not define `__eq__()`, it will compare two objects using the old-style `__cmp__()`, which by default makes all objects comprea uneuqal except with themselves. The rich comparison operators are called for comparison operators in preference to `__cmp__()`.

### `is` tests for identity

```python
x = [2, 3]
y = [2, 3]
x is y

False
```
id() it returns the "identity" of an object: an integer guaranteed to be unique and constant for each object during its lifetime.

### When this matters
```python
print(None == None)
print(None is None)
True
True
```

```python
class EqualToEverything(object):
    def __eq__(self, other):
        return True
    
x = EqualToEverything()
print(x is None)
print(x == None)

False
True
```

### Up to 256... and beyond
```python
x = 23
y = 23

print(id(x))
print(id(y))
print(x is y)

370680616
370680616
True

a = 581
b = 581
print(id(a))
print(id(b))
print(a is b)

37351600
37351456
False

x = 'abc'
y = 'abc'

print(x is y)
True

x = "".join(["a", "b", "c"])
y = "abc"
x is y
False
```

```python
x = intern("".join(["a", "b", "c"]))
y = "abc"
x is y
True
```

### Implicit boolean conversions


In [2]:
import time

def say(something, when=time.ctime()):
    print(something, "happened at", when)
   
say("happiness")
time.sleep(10)
say("Sadness  ")


happiness happened at Wed Jul 29 00:42:49 2020
Sadness   happened at Wed Jul 29 00:42:49 2020


In [3]:
import time

def say(something, when=None):
    if not when:
        when = time.ctime()
    print(something, "happened at", when)
   
say("happiness")
time.sleep(10)
say("Sadness  ")

happiness happened at Wed Jul 29 00:43:39 2020
Sadness   happened at Wed Jul 29 00:43:49 2020


### Testing

self.assertEqual(x, None)  # No

self.assertIs(x, None)     # No

self.assertIsNone(x)       # Yes

self.assertTrue(x)

self.assertFalse(x)

# T4 Iterators

### What is an iterators
list, dictionaries, sets

```python
for item in my_iterator:
    item.do_stuff
```

```python
# a generator expression
def get_bananas(fruits):
    return (fruit for fruit in fruits if is_banana(fruit))

# a yeild statement
def get_banana(fruits):
    for fruit in fruits:
        if is_banana(fruit):
            yield fruit

# the filter built-in        
def get_banana(fruits):
    return filter(is_banana, fruits)
```

### Signs that you should be using iterators

```python
my_list.extend(...)
print "".join(...)
x = set(...)

all(expensive_evaluation_function(item) for item in items)
```

### Ways to make and use iterators
#### Generator functions
```python
def in_order_traversal(tree: Tree[ValueType]) -> Iterator[ValueType]:
    if tree.left:
        for v in in_order_traversal(tree.left):
            yield v
    yield tree.value
    if tree.right:
        for v in in_order_traversal(tree.right):
            yield v
```

Python 3.3
```python
def in_order_traversal(tree: Tree[ValueType]) -> Iterator[ValueType]:
    if tree.left:
        yield from in_order_traversal(tree.left)
    yield tree.value
    if tree.right:
        yield from in_order_traversal(tree.right)
```

#### Iterator protocol
```python
class FibonacciIterator(object):
    def __init__(self):
        self.list, self.before_last = 1, -1
    
    def __next__(self) -> int:
        """Return the next Fibonacci number."""
        next_value = self.last + self.before_last
        self.last, self.before_last = next_value, self.last
        return next_value
    
    def __iter__(self) -> Iterator[int]:
        """Having __iter__ return self is the rest of the iterator protocol."""
        return self
    
#     def __iter__(self) -> Iterator[int]:
#         """Iterables"""
#         return in_order_traversal(self)
```

#### Reasons not to use iterators
```python
def mutate_dict(my_dict: dict) -> None:
    for key, value in tuple(my_dict.items()):
        if condition(key):
            my_dict[key] = mutate_value(value)
        else:
            del my_dict[key]
```

# T5 Comparison operator chaining

```python
False == False in [False]
```

It is True

```python
False == False in [False]  # which is
(False == False) and (False in [False])
True and True
```

```python
3 < 5 <= 7  # which is
(3 < 5) and (5 <= 7) 
True and True
```

### bool subclasses int
```python
issubclass(bool, int)  # True
issubclass(True, int)  # True
issubclass(False, int)  # True
```

# T6 Writing your own test assertions

### The fail() method

```python
def testSomething(self):
    try:
        MyFunction()
    except SomeException:
        self.fail("MyFunction raised SomeException: %s" % traceback.format_exec())
```

### Writing your own assertions
If you find yourself making the same non-trivial checks over and over again, you can refactor those out into a custom assert method. The fail() mehtod also comes in handy in these situations as well.

```python
def assertDictHasIntKeysAndStringValues(self, dictionary):
    if not all(isinstance(k, numbers.Integral) for k in dictionary):
        self.fail("Found non-integer type keys: "+
                  ", ".join(str(k) for k in dictionary
                            if not isinstance(k, numbers.Integral)))
    if not all(isinstance(v, str) for v in dictionary.values()):
        self.fail("Found non-string type values: " +
                  ", ".join(str(v) for v in dictionary.values()
                            if not isinstance(k, str)))

def testMyFunction(self):
    dictionary = MyFunction()
    self.assertDicHasIntKeysAndStringValues(dictionary)
                  
```
                  

# T7 Make your regular expression more readable

`re.VERBOSE` lets you use whitespace and comments in your RE's pattern string to make it well-organized and self-documented.

you need to match strings corresponding to any kind of Python integer literal - decimal,octal, hexadecimal, binary...

19 -> '19' -> '0o23' -> '023' -> '0x13' -> '0b10011'

```python
import re
int_lit = re.compile(r'(0|0o?[0-7]+|0x[\A-F]+0b[01]+[1-9]\d*)L?', re.I)
```

* ? -> optional
* [] -> range of characters
* \* -> zero or more
* \+ -> one or more
* \\d -> a digit
* | -> or

### Squring the circle

```python
int_lit = re.compile(r'''(
    0            |               # zero: the same in any basis
    0o[0-7]+     |               # octal: 0<octal digits> or 0o<octal digits>
    0x[\dA-F]+   |               # hex: 0x<hexadecimal digits>
    0b[01]+      |               # binary: 0b<binary digits>
    [1-9]\d*     |               # decimal: <decimal digits> (no leading 0)
    )L?                          # any basis: optional trailing L
    ''', re.IGNORECASE | re.VERBOSE)
```

### Sepcifying the flags
```python
int_lit = re.compile(r'''(?ix)(  # make the RE case-insensitive and verbose
    0            |               # zero: the same in any basis
    0o[0-7]+     |               # octal: 0<octal digits> or 0o<octal digits>
    0x[\dA-F]+   |               # hex: 0x<hexadecimal digits>
    0b[01]+      |               # binary: 0b<binary digits>
    [1-9]\d*     |               # decimal: <decimal digits> (no leading 0)
    )L?                          # any basis: optional trailing L
    ''', re.IGNORECASE | re.VERBOSE)
```

# T9 Mastering object comparisons

### Types
```python
class Point(object):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    ...
    ...
    def __eq__(self, other):
        if not type(self) == type(other):
            return NotImplemented
        return self.x == other.x and self.y == other.y
    
PolitePoint(3, 4) == Point(3, 4). # This is False
```

### SubClasses
```python
class Point(object):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    ...
    ...
    def __eq__(self, other):
#         if not type(self) == type(other):
#             return NotImplemented
        if not isinstance(other, type(self)):
            return NotImplemented
        return self.x == other.x and self.y == other.y

class PolitePoint(Point):
    def __str__(self):
        return "Please to meet you! I'm (%d, %d) % (self.x, self.y)
    
PolitePoint(3, 4) == Point(3, 4). # This is True, False(When using type comparison)
```

### Hashing
```python
class Point(object):
    ...
    ...
    __hash__ = None  # This tells Python that Point cnnnot be hashed
```

### Python3
If you implement `__eq__` and one of {`__lt__` `__gt__` `__ge__` `__le__`}
After Pyhton3.4 simply decorate your class with `functools.total_ordering` and python will add the missing comparison method for you. 

* prefer `isinstance` checks to direct type checks, especially when you intend for your class to be subclassed.
* Set `__hash__` equal to None when implementing a custom `__eq__` or provide your own `__hash__` method.
* Return `NotImplemented` in `__eq__` where possible. As the Zen of Python reminds us: "in the fact of ambiguity, refuse the temptation to guess"

# T10 Flat is better than Nested

### Nesting hides Bugs
```python
response = stub.Call(reqeust, rpc)

if rpc.status == pywraprpc.RPC.OK:
    if response.GetAuthorizedUser():
        if response.GetEnc() == 'urf-8':
            if response.getRows():
                vals = [ParseRow(r) for r in response.GetRow()]
                avg = sum(vals) / len(vals)
                return avg, vals
            else:
                raise ValueError('no rows')
        else:
            riase AuthError('unauthrized')
    else:
        raise ValueError('wrong encoding')
else:
    raise RpcError(rpc.ErrorText())
```

### Refactoring with guard clauses
```python
response = stub.Call(request, rpc)

if rpc.status != pywraprpc.RPC.OK:
    raise RpcError(rpc.ErrorText())
    
if not response.GetAuthorizedUser():
    raise AuthError('unauthorized')

if response.GetEnc() != 'utf-8':
    raise ValueError('wrong encoding')
 
if not response.GetRows():
    raise ValueError('no rows')
    
vals = [ParseRow(r) for r in response.GetRows()]
avg = sum(vals) / len(vals)
return avg, vals
```

### Rules of thumb
* Keep conditional blocks short.
* Think about refactoring when you branches are 3 or more levels deep.
* Think about moving the inner logic into a separate function. For example, instead of looping through a list of protocol buffers and then looping over a repeated field(which itself has repeated fields that need looping), define a function to process a single protocol buffer, then apply it to each one.

# T11 Parameterized tests

[PIP](https://pypi.org/project/parameterized/)

### Common Antipatterns

1. Lots of repetitive tests
2. The Entire test is in a helper method
3. Tests with loops

### Solution: Parameterize it.

```python
class NumbersTests(parameterized.TestCase):
    
    @parameterized.named_parameters(
        ('english_1', 'en', 1, False, 'one'),
        ('english_1_capitalize', 'en', 1, True, 'One'),
        ('english_2', 'en', 2, False, 'two'),
        ('english_1', 'es', 1, False, 'uno'))
    def test_number_formatting(self, lan, number, capitalize, expected):
        obj = numbers.Formatter()
        obj.set_language(lang)
        self.assertEqual(obj.format(number, capitalize=capitalize), expected)
```

# T12 Better tests through better assertions.

### Type-specific assertions
assertEqual match list, dict, set or tuple, respectively.

### Operator-based assertions
```python
self.assertTrue(myKey in myDict)
```
Better

```python
self.assertIn(myKey, myDict)
self.assertIs(myVal, MySingleton)
self.assertLess(valThatShouldBeLess, otherVal)
self.assertLessEqual(valThatShouldBeLessEqual, otherVal)
self.assertIsNone(val)
self.assertLen(my_list,  5)
```

### The msg argument

```python

self.assertEqual(-1, FooService(bar=None))

self.assertEqual(-1, FooService(bar=None),
                 'FooService should return -1 if bar is None')
```

### Clarify what you're testing

```python
expected = range(5, 7)
actual = find_unique(range(5), range(7))
self.assertEqual(expected, actual)

self.assertEqual(collection.Counter(expected), collections.Counter(actual))

# This checks that every item in expected is in acutal, and vice-versa.
# regardless of order, even for sequnces containing unhashable objects.
self.assertCountEqual(expected, actual)
```

# T13 Use dependencies to mock in tests not pathcing

### What is Explicit Dependencies vs Patching?

Almost all objects depend on other objects and it's often necessary to replace these dependencies for testing via some kind of Dependency Injection (DI). One kind of DI is patching: replacing a function or module in the code under test dynamiccly during testing.

```python
from localweather import weatherdatabase

class WeatherProvider(object):
    '''helper class for use in a local weather widget.'''
    
    def __init__(self):
        self._temperature_database = weatherdatabase.TemperatureDatabase()
        
    def GetFahrenheit(self):
        celsius_temp = self._temperature_database.CurrentCelsiusTemp()
        return celsius_temp * 1.8 + 32
```

```python
from localweather import weatherprovide

class WeatherProviderTest(TestCase):
    
    @mock.patch.object(weatherprovider, 'weatherdatabase'). # mock the module
    def testCanGetFahrenheit(self, mock_weather_database_module):
        temperature_database = mock_weather_database_module.TemperatureDatabase()
        temperature_database.CurrentCelsiusTemp.return_value = 20. # use the mock
        
        unit = weatherprovider.WeatherProvider()
        self.assertEqual(68, unit.GetFahrenheit())
```

An alernate style of DI is explicitly passing dependencies as arguments at initialization

 

```python
from localweather import weatherdatabase

class WeatherProvider(object):
    '''helper class for use in a local weather widget.'''
    
    def __init__(self, temperature_database):
        self._temperature_database = temperature_database
        
    def GetFahrenheit(self):
        celsius_temp = self._temperature_database.CurrentCelsiusTemp()
        return celsius_temp * 1.8 + 32
```

```python
from localweather import weatherProvider
from localweather import weatherDatabase

class WeatherProviderTest(TestCase):
    
    def testCanGetFahrenheit(self):
        temperature_database = mock.create_autospec(weatherdatabase.TemperatureDatabase)  # explicitly create mock
        temperature_database.CurrentCelsiusTemp.return_value = 20. # use the mock
        
        unit = weatherprovider.WeatherProvider(temperature_database)
        self.assertEqual(68, unit.GetFahrenheit())
```

### Advantages of Explicit Dependencies

1. Under test, some can see the souce of the dependency.
2. A consumer of the code under test immediately knows what it depends one.
3. The developer of the code under test is made more conscious of dependencies and coupling with explicit dependencies.

# T14 Pickle -- not even once

## Warning The pickle module is not secure. Only unpickle data you trust.

### Pickle is too powful

Pickle is an easy-to-use and extremely powerful serialization format. Nearly any Python object can be pickled and the pickle format can encode nearly anting. In fact, pickle is so powerful that specially crafted pickled data can take over the Python process!
```python
pickle.load(b"c__builtin__\neval\n(c__builtin__\ninput\n(S'py> '\ntRtR.")
```

### Pickle is too painful
Pickle automateically works with any type you want, with no effort required to define how to hanle data following older formats and works even onojbects that were not meant to be serialized and have no forwards and backwards compatibility considerations at all. This generally mean that pickles are not forwards or backwards compatible, unless you take special care -- but because pickles work on any object, there is no way to know if that object is taking special care or not!

#### Renaming a class breaks pickle-compatibility


In [4]:
import pickle
class A(object):
    pass

x = pickle.dumps(A())

del A
pickle.loads(x)

AttributeError: Can't get attribute 'A' on <module '__main__'>

#### Changing attributes breaks pickle-compatibility

Pickle serializes and deserializes the raw attributes of an object, and doesn't care if the code as-written isn't supposed to contain that attribute.

This has the effect of making all fiedls required.

In [6]:
class A(object):
    def __init__(self, x):
        self.x = x
        
x = pickle.dumps(A(1))

class A(object):
    def __init__(self, y):
        self.y = y

a2 = pickle.loads(x)
a2.x

a2.y

AttributeError: 'A' object has no attribute 'y'

#### Pickle is invisible

There are real workable solutions for using pickle successfully. With caution and careful thought, all compatibility issues can be evaded or worked around. YT has known all these trickes and yet still consistently encountered failure in production due to pickle.

### Protocol Buffers are better

#### But... but
There are circumstances when pickle is safe:
* if the pickle cannot be from an untrusted source, security problems go away. (asuuming that signning key is never leaked)
* If the pickle is only transmitted between processes with identical(Python) source code, then compatibility probles go away.

# T15 collections.defaultdict

This is a useful tool if you ever find yourself checking weather a key is in a dictionary before updating it.

Use of collections.defaultdict
```python
mydict = collections.defaultdict(list)
some_list = [('a', 1), ('b', 3), ('c', 4), ('a', 2)]

# Adding elements
for key, val in some_list:
    mydict[key].append(val)
    
# Lookup
interesting_values = my_dict[interesting_key]
```

**Warning**: `my_defaultdict[key]` will actually insert the key into the dictionary on first loopup. This will change the length of the dictionary and the iteration order. To avoid side effects from key insertion during lookups, you can use `get()` method.

In [9]:
import timeit

setup_code = """
import collections
numbers = range(10000)
"""

dict_native_code = """
my_dict = {}
for n in numbers:
  if n % 10 in my_dict:
    my_dict[n % 10].append(n)
  else:
    my_dict[n % 10] = [n]
"""

dict_setdefault_code = """
my_dict = {}
for n in numbers:
  my_dict.setdefault(n % 10, []).append(n)
"""

dict_exception_code = """
my_dict = {}
for n in numbers:
  try:
    my_dict[n%10].append(n)
  except KeyError:
    my_dict[n%10] = [n]
"""

defaultdict_code = """
my_dict = collections.defaultdict(list)
for n in numbers:
  my_dict[n % 10].append(n)
"""

timeit.timeit(setup=setup_code, stmt=dict_native_code, number=10000)

15.235491362996981

In [10]:
timeit.timeit(setup=setup_code, stmt=dict_setdefault_code, number=10000)

17.441203755995957

In [11]:
timeit.timeit(setup=setup_code, stmt=dict_exception_code, number=10000)

12.214481675997376

In [12]:
timeit.timeit(setup=setup_code, stmt=defaultdict_code, number=10000)

11.078827478995663

# T16 __init__() vs __new__()

An explanation of the difference between `__int__` vs `__new__`

### A very crude attampt
```python
class Weight(tuple):
    pass

w = Weight([0.75, 0.25])
print(w)

(0.75, 0.25)
```

### Checking that input is valid
```python
import math

class Weights(tuple):
    def __init__(self, values):
        super(Weights, self).__init__(values)
        if math.fsum(values) != 1:
            raise ValueError("weights must add up to one")
```

### Doing the work ourselves
we could scale the numbers so that they add up to one.
```python
import math

class Weights(tuple):
    def __int__(self, values):
        total = math.fsum(values)
        normalized = [v / total for v in values]
        super(Weights, self).__init__(normalized)

w = Weight([3, 1])
print(w)
>> (3, 1)
```
**This does not work**

### Tuples are immutable

The reason why our code is not working is that we inheriting from an immutable class, `tuple`. By the time we get to `__init__` the object has already ben created and immutable as it is, it's thus too late to modify anything.

###  `__init__()` is the initializer

### `__new__()` is the constructor

```python
import math

class Weights(tuple):
    def __new__(cls, values):
        total = math.fsum(values)
        normalized = [v / total for v in values]
        return super(Weights, cls).__new__(cls, normalized)
    
w = Weights([3, 1])
print(w)

>>(0.75, 0.25)
```

### Use a generator
```python
import math

class Weights(tuple):
    def __new__(cls, values):
        total = math.fsum(values)
        normalized = (v / total for v in values)
        return super(Weights, cls).__new__(cls, normalized)
    
w = Weights([3, 1])
print(w)

>>(0.75, 0.25)
```

### Trivia `__init__()` is meaningless for immutable types


# T17 Optimizing your regular expressions

### Unnecessary Punctuation
`re.search()` searches for a match anywhere in the string.
`re.match()` only checks for a match at the beginning of the given string.

* if your pattern starts with `.*` remove it and use `re.search()`
* if your pattern starts with `^` remove it and use `re.match()`
* if your pattern starts with `^` and ends with `$` remove them and use `re.fullmatch()`

```python
hexchars = '1234567890abcdef'

re.match(r'.*\w+', hexchars)
re.search('r\w+', hexchars)

re.search(r'^\d+', hexchars)
re.match(r'\d+', hexchars)

re.search(r'^\d+\w+$, hexchars)
re.fullmatch(r'\d+\w+', hexchars)

### Unnecessary Regexes

```python
# do not do this
re.search(r'foo', 'dogfood feedback')
re.match(r'Google', product_name)
re.search(r'py$', file_name)

# Do this
'foo' in 'dogfood feedback'
product_name.startwith('Google')
file_name.endwith('py')
```

# T18 Multiprocessing


### Parallezation in python

There are several ways in writing parallel or concurrent programs in python. In thi sone we will talk about parallelization using multithreading and multiprocessing

unlike other languages, the performance benefit of threads in Python may be severely limited by the Global Interpreter Lock (GIL). In short the GIL is a mutex that protects access to Python objects, preventing multple threads from executing Pyhton bytecode at once.

There are some operations that release the GIL, such as IO operatrions, functions with cusom C/C++ implementation like NumPy. In such cases, threds my be viable option for parallelization.

In order to avoid limitations caused by GIL one can use processes instead of threads. Python has the multiprocessing package that wraps complexity of process management and synchronizations. But is uses the `fork()` system call for process creation many libraries are not fork safe.

#### threads

In [6]:
import multiprocessing.pool
from absl import app

INPUT_COUNT = 100
WORKER_COUNT = 10

def fib(n):
    return n if n <= 1 else fib(n-1) + fib(n-2)

def main():
    data = [30] * INPUT_COUNT
    pool = multiprocessing.TreadPool(WORKER_COUNT)
    print(pool.map(fib, data))
    
if __name__ == '__main__':
    app.run(main)

FATAL Flags parsing error: Unknown command line flag 'f'
Pass --helpshort or --helpfull to see help on flags.


SystemExit: 1

#### The `concurrent.futures` package

The `concurrent.futures` module provides a high-level interface for asynchronously executing callables. This package uses `multiprocessing` underneath so only the `TreadPoolExcutor` should be used.

In [7]:
from concurrent import futures
from absl import app

INPUT_COUNT = 100
WORKER_COUNT = 10

def fib(n):
    return n if n <= 1 else fib(n-1) + fib(n-2)

def main():
    data = [30] * INPUT_COUNT
    with futures.ThreadPoolExecutor(max_workers=WORK_COUNT) as executor:
        print(executor.map(fib, data))
    
if __name__ == '__main__':
    app.run(main)

FATAL Flags parsing error: Unknown command line flag 'f'
Pass --helpshort or --helpfull to see help on flags.


SystemExit: 1

# T19 Defining simple classes, simply

## Magic-free classes
The traditional way to define a class in Python was very manual.

```python
class Point(object):
    def __init__(self, x, y, z=0):
        self.x = x
        self.y = y
        self.z = z
        
    def __eq__(self, other):
        if not instance(other, Point):
            return NotImplemented
        return self.x == other.x and self.y == other and self.z == other.z
    
    def __ne__(self, other):
        if not instance(other, Point):
            return NotImplemented
        return not (self == other)
    
    def __hash__(self):
        return hasn((self.x, self.y, self.z))
    
    def __repr__(self):
        return (
            '{self.__class__.__name__}(x={self.x}, y={self.y}, z={self.z})'
            .format(self))
```

## [attrs](https://www.attrs.org/en/stable/)

```python
@attr.s
class Point(object):
    x = attr.ib()
    y = attr.ib()
    z = attr.ib(default=0)
```
This files in all the methods for you!

In python 3
```python
@attr.s(auto_attribs=True)
class Point:
    x: float
    y: float
    z: float = 0.0
```

## dataclasses

Python 3.7 adopted a subset of attrs into the standard library with the `dataclasses` module.
```python
@dataclasses.dataclass
class Point:
    x: float
    y: float
    z: float = 0.0
```

## Introspection

If you want to wirte a method that used all the field of Point, the most obvious thing to do write them out.

```python
def norm(self):
    return math.sqrt(self.x ** 2 + self.y ** 2 + self.z **2)
```

`attrs` and `dataclasses` offer introspection APIs to list out and loop over all the fields of a class.
```python
# attrs
def norm(self):
    return math.sqrt(sum(axis ** 2 for axis in attr.astuple(self)))

# dataclasses
def norm(self):
    return math.sqrt(sum(axis ** 2 for axis in dataclasses.astuple(self)))

```

## Dynamic attributes
All of these libraries use `exec` under hood to generate a class and `attrs` and `dataclasses` make no effort to sanitize this input. If all you need is a bag of dynamically-defined attributes better options exist:

```python
d = {'a': 2}
ns = types.SimpleNamespace(**d)
ns.a  # 2
```



# T20 Mastering default arguments

### The unexpected behaviour

```python
def append_to(element, target_list=[]):
    """Adds an element to a list and returns the list."""
    target_list.append(element)
    return target_list

list_result = append_to(9, [])
print('list returned: {}'.format(list_result))
[9]
```
Later
```python
list_result = append_to(4)
print('list returned: {}'.format(list_result))
[4]
```

```python
list_result = append_to(2)
print('list returned: {}'.format(list_result))
[4, 2]
```

Every time that we call the function, we might expect that default argument would be evaluted and new list created if none was supplied as an argument, but that's not what it's happening; we are getting the same list for a default argument each time. What would happen if the argument were mutable? Well, it would mutate the next time it was modified, and that's the behaviour that we just saw.

In summary , default arguments are evaluated and stored when the function is defined(not when it's called)
When dealing with time.
```python
def important_lot(self, message, timestamp=time.time()):
    logging.info('[%s] Output log %s', timestamp, message)
    
important_log('This is now', time.time())
important_log('hello')
important_log('world')
```

### How to avoid the error: sentinels are our best friends
```python
def append_to_with_sentinel(element, target_list=None):
    """Adds an element to a list and returns the list."""
    if target_list is None:
        return [element]
    else:
        target_list.append(element)
        return target_list
    
list_result = append_to_with_sentinel(4)
list_result = append_to_with_sentinel(9, [])
list_result = append_to_with_sentinel(2)
```

### Why on earth did this happen?
**The actual parameters(arguments) to a function call are introduced in the local symbol table of the called functiion when it is called; thus, arguments are passed using call by use(where the value is always an object reference, not the value of the object)**
footnote
**Actually, call by object reference would be a better description, since if a mutable object is passed that caller will see any changes the callee makes to it(items inserted into a list)**


### Caveats
Use case for this 
```python
def power_with_memory(key, cache={}):
    if key in cache:
        return cache[key]
    else:
        cache[key] = 2**key
        return cache[key]
```

# T21 Managing resource with Context managers

### Built-in Context Manager

A context manager is any type that implements the context manager interface which automatically performs setup and teardown actions around a block of code when triggered by `with` keyword.

```python
def my_managed_function(lock) -> None:
    with lock:
        # use resource protected by lock
```

The python file object.
```python
def foo_file(filename) -> None:
    with open(filename) as opened_file:
        foo(opened_file)
```
Using a context manager ensures that `open_file.close()` is called enven if `foo` throws an expceiton.

### Composition of context managers
```python
def find_first_foo(
    first_filename, second_filename) -> Optional[Tuple[Text, Text]]:
    with open(first_filename) as first_file, open(second_filename) as second_file:
        for first_file_line, second_file_line in zip(first_file, second_file):
            if foo(first_file_line, second_file_line):
                return first_file_line, second_file_line
    return None
```

### Creating your own context managers

#### Using contextlib
The easiest way to create your own context manger is using the `contextlib.contextmanager` annotation.

Annotate a generator function (one that return yield) that:
*  Calls yield exactly once
*  Does any required setup before yielding and
*  Does any teardown after the yield returns control to the generator function.

```python
import contextlib
import datetime

@contextlib.contextmanager
def logging_open(filename, *args):
    logging.info("file opened at", datetime.datetime.now())
    try:
        with open(filename, *args) as f:
            yield f
    finally:
        logging.info("File closed at", datetime.datetime.now())
```

#### Using context manager as decorators
```python
import contextlib
import datetime

@contextlib.contextmanager
def log_envocation():
    logging.info("{} invoked at {}".format(self._name, datetime.datetime.now()))
    try:
        yield
    finally:
        logging.info("{} finished at {}".format(self._name, datetime.datetime.now()))

@log_invocation("Suspicious Function")
def my_suspicious_function():
    ...
```

#### Using special methods
```python
from typing import BinaryIO

class BinaryOpen(object):
    
    def __init__(self, filename, *args) -> None:
        self._filename = filename
        self._args = args
        self._opened_file = None
        
    def __enter__(self) -> BinaryIO:
        self._opened_file = open(self._filename, 'rb', *self._args)
        return self._opened_file
    
    def __exit__(self, exc_type, exc_val, exc_tb) -> bool:
        self._opened_file.closed()
        self._opened_file = None
        return False
```

# T22 Data Manipulation with NumPy

### Quick Examples
#### Storing Data
```python
import numpy as np
x = np.random.random(1000)
```
#### Element-wise Operations
```python
y = [val + 1 for val in x]
#np
y = x + 1
```
#### Mathematical Operations
```python
y = np.sin(x)
```

#### Filtering Values
```python
y = x[x > 0.5]
```

#### Aggregations and Statistics
```python
mean = x.mean
```

#### Combining Operations
```python
x[np.sin(10*x) >0.99].min()
```

### Advantages of NumPy
#### NumpPy provides effcient computations on arrays
Elementwise operations on NumPy arrays can be far more effcient than the same operations over list within CPython.
This is because each repeated operation in Cpython accrues a small overhead due to dynamic type-checking and function dispatch; with NumPy API this overhead is required only once per array rather than once per element in array.

#### NumPy stores data efficiently
NumPy is saving roughly 5%-10% storage overhead than the grows with the size of list.
```python
import sys
x = np.random.random(1000000)
sys.getsizeof(x)
```

#### NumPy natively support N-dimensional arrays
```python
x = np.random.rand(5, 3)

x[2, 1]

x[2:4, 1:3]
```

NumPy arrays have methods such as `mean() sum() min() max()`

#### NumPy provides a general domain-speciic language for array computing

> Given 1000 points on a two-dimensional plane, print the 5 closest point to the origin

```python
x = np.random.rand(1000, 2)  # 2D array of 1000 point in 2 dimensions
d = (x ** 2).sum(axis=1)     # squared distance between points and origin
i = np.argpartition(d, 5)[:5] # indices of the 5 smallest distances
print(x[i])
```

### Disadvantages of NumPy

#### NumpPy's compuational efficiency often comes at the expense of memory efficiency


# T23 Pretty Code

### Use list comprehensions
```python
result = [x + 1 for x in numbers]
```

### Iterate without indices
```python
for x in numbers:
    print(x * 2)
```

### Enumerate when you need the index
```python
for index, frog in enumerate(frogs):
    print(f'{frog} is at index {index}')
```

### Zip everything
```python
for item, trinket, gizmo in zip(item, trinkets, gizmos):
    Juggle(item, trinket, gizmo)

dict(zip(keys,values))
```

### Know the built-in functions
```python
def is_palindrome(sequence):
    s = list(sequence)
    return s == s[::-1]
```

### Consolidate expcetion handlers
```python
try:
    DoSomething()
except (FooError, BarError, BuzzError):
    logging.exception('Doing something failed.')
    return
```

# T24 Hashable classes done right

## Classes implementing custom __eq__() are not hashable in Python3
refer to T9
Check whether a class is hashable by testing weather it implements `collections.Hashable`
```python
print("Point is hashable: {}".format(issubclass(Point, collections.Hashable)))
my_set = { Point(1, 2) }
```
```
Point is hashable: False
TypeError: unhashable type: 'Point'
```

## Implementing a __hash__() method
If two objects compare as equal, they must have the same hash.
Identical hashes do not guarantee equality.
```python
class Point(object):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    
    def __eq__(self, other):
        if not type(self) == type(other):
            return NotImplemented
        return self.x == other.x and self.y == other.y
    
    def __ne__(self, other):
        return not self == other
    
    def __hash__(self):
        return hash((self.x, self.y))
```
The class above is mutable.


Here is the final class that satisifies this requirement (except for Python not being able to completely disaloow changing private values).
```python
class Point(object):
    def __init__(self, x, y):
        self._x = x
        self._y = y
    
    @property
    def x(self):
        return self._x
    
    @property
    def y(self):
        return self._y
    
    def __eq__(self, other):
        if not type(self) == type(other):
            return NotImplemented
        return self._x == other._x and self._y == other._y
    
    def __ne__(self, other):
        return not self == other
    
    def __hash__(self):
        return hash((self._x, self._y))
```

# T25 The handy powerful standard library module heapq

```python
class LargestSoFar:
    def __init__(self, max_length):
        self.max_length = max_length
        self.heap = []
        
    def incoming(self, item):
        if len(self.heap) < self.max_length:
            heapq.heappush(self.heap, item)
        else:
            heapq.heappushpop(self.heap, item)
    
    def top_so_far(self):
        return sorted(self.heap, reversed=True)
```

### Gettting the smallest(or largest) K items