# ITERATION TOOLS

In [1]:
help(all)

Help on built-in function all in module builtins:

all(iterable, /)
    Return True if bool(x) is True for all values x in the iterable.

    If the iterable is empty, return True.



In [17]:
l = [1, 2, 3, 4, 100]

all(map(lambda x: x < 10, l)), any(map(lambda x: x < 10, l))

(False, True)

In [20]:
all(x<10 for x in l), any(x<10 for x in l)

(False, True)

### WARNING: be careful how you generate above

In [44]:
funcs = []

for x in range(5):
    funcs.append(lambda: x)

print(funcs[0].__closure__)

for f in funcs:
    print(f())

None
4
4
4
4
4


# all lambdas reference x which is 4 at end of the loop 

In [47]:
funcs

[<function __main__.<lambda>()>,
 <function __main__.<lambda>()>,
 <function __main__.<lambda>()>,
 <function __main__.<lambda>()>,
 <function __main__.<lambda>()>]

In [48]:
funcs = []

for x in range(5):
    funcs.append(lambda x=x: x)

for f in funcs:
    print(f())

0
1
2
3
4


In [51]:
# below we yield lambda expressions not a value hence all are truthy
l = [1, 2, 3, 4, 100]
all(lambda x=x: x<10 for x in l), any(lambda x=x: x<10 for x in l)

(True, True)

In [52]:
l = [1, 2, 3, 4, 100]
all((lambda x=x: x<10)() for x in l), any((lambda x=x: x<10)() for x in l)

(False, True)

# everything in python is true unless otherwise told so, it does so by looking `__bool__` or `__len__` methods 

In [59]:
def squares(n):
    for i in range(1, n+1):
        yield i**2

In [61]:
sq = squares(5)
list(sq)

[1, 4, 9, 16, 25]

In [65]:
# nothing left in sq generator object hence StopIteration
next(sq)

StopIteration: 

#### then why is below true

In [67]:
bool(sq)

True

In [69]:
class Person:
    pass

p = Person()
bool(p)

True

In [70]:
class Person:
    def __bool__(self):
        return False

p = Person()
bool(p)

False

In [71]:
class Person:
    def __len__(self):
        return 0

p = Person()
bool(p)

False

In [73]:
class Person:
    def __bool__(self): # first preference, 
        return True
        
    def __len__(self): # if bool not defined
        return 0

p = Person()
bool(p)

True

In [74]:
from itertools import islice

In [75]:
l = [1, 2, 3, 4, 5]

l[0:2]

[1, 2]

In [76]:
s = slice(0, 2)
l[s]

[1, 2]

In [81]:
import math

def factorials(n):
    for i in range(n):
        yield math.factorial(i)

In [82]:
facts = factorials(100)

In [83]:
facts[0:2]

TypeError: 'generator' object is not subscriptable

In [86]:
def slice_(iterable, start, stop):
    for _ in range(0, start):
        next(iterable)
    for _ in range(start, stop):
        yield next(iterable)

slice_(factorials(100), 0, 10)

<generator object slice_ at 0x10bc3fbc0>

In [88]:
list(slice_(factorials(100), 0, 10))

[1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880]

In [89]:
list(slice_(factorials(100), 2, 5))

[2, 6, 24]

In [90]:
from itertools import islice

In [91]:
help(islice)

Help on class islice in module itertools:

class islice(builtins.object)
 |  islice(iterable, stop) --> islice object
 |  islice(iterable, start, stop[, step]) --> islice object
 |
 |  Return an iterator whose next() method returns selected values from an
 |  iterable.  If start is specified, will skip all preceding elements;
 |  otherwise, start defaults to zero.  Step defaults to one.  If
 |  specified as another value, step determines how many values are
 |  skipped between successive calls.  Works like a slice() on a list
 |  but returns an iterator.
 |
 |  Methods defined here:
 |
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |
 |  __iter__(self, /)
 |      Implement iter(self).
 |
 |  __next__(self, /)
 |      Implement next(self).
 |
 |  __reduce__(self, /)
 |      Return state information for pickling.
 |
 |  __setstate__(self, object, /)
 |      Set state information for unpickling.
 |
 |  -------------------------------------------------------------

In [92]:
list(islice(factorials(100), 2, 5))

[2, 6, 24]

In [93]:
list(islice(factorials(100), 1, 10, 2))

[1, 6, 120, 5040, 362880]

In [109]:
def factorials():
    index = 0 
    while True:
        print(f'yielding factorial of {index}')
        yield math.factorial(index)
        index += 1

In [96]:
facts = factorials()
for _ in range(0, 5):
    print(next(facts))

yielding factorial of 0
1
yielding factorial of 1
1
yielding factorial of 2
2
yielding factorial of 3
6
yielding factorial of 4
24


In [104]:
islice(factorials(), 3, 10)

<itertools.islice at 0x10b7edbc0>

In [110]:
sl = islice(factorial(), 3, 10)

In [111]:
list(sl)

[6, 24, 120, 720, 5040, 40320, 362880]

# filters

In [112]:
def gen_cubes(n):
    for i in range(n):
        print(f'yielding {i}')
        yield i**3

In [113]:
def is_odd(x):
    return x % 2 == 1

In [115]:
is_odd(4), is_odd(5)

(False, True)

In [117]:
filtered = filter(is_odd, gen_cubes(10))

In [118]:
type(filtered)

filter

In [119]:
list(filtered)

yielding 0
yielding 1
yielding 2
yielding 3
yielding 4
yielding 5
yielding 6
yielding 7
yielding 8
yielding 9


[1, 27, 125, 343, 729]

# all results yielded on consumption (lazy) and then filtered out

In [120]:
from itertools import filterfalse

In [121]:
filtered = filterfalse(is_odd, gen_cubes(10))
list(filtered)

yielding 0
yielding 1
yielding 2
yielding 3
yielding 4
yielding 5
yielding 6
yielding 7
yielding 8
yielding 9


[0, 8, 64, 216, 512]

# dropwhile and takewhile

In [122]:
from itertools import dropwhile, takewhile

In [125]:
from math import sin, pi

def sine_wave(n):
    start = 0
    max_ = 2 * pi
    step = (max_ - start) / (n-1)

    for _ in range(n):
        yield round(sin(start), 2)
        start += step

In [126]:
list(sine_wave(15))

[0.0,
 0.43,
 0.78,
 0.97,
 0.97,
 0.78,
 0.43,
 0.0,
 -0.43,
 -0.78,
 -0.97,
 -0.97,
 -0.78,
 -0.43,
 -0.0]

In [129]:
result = takewhile(lambda x: 0 <= x <= 0.9, sine_wave(15))
type(result)

itertools.takewhile

# takewhile - starts iterating from begining until predicate is false and then stops iterating

In [130]:
list(result)

[0.0, 0.43, 0.78]

# dropwhile - starts iterating when predicate turns false, and continues iteration all the way without checking predicate again

In [133]:
l = [1, 2, 3, 4, 5, 6, 7, 1, 2, 3]

list(dropwhile(lambda x: x < 5, l))

[5, 6, 7, 1, 2, 3]

In [134]:
list(takewhile(lambda x: x < 5, l))

[1, 2, 3, 4]

# compress

In [135]:
data = ['a', 'b', 'c', 'd', 'e']
selectors = [True, False, 1, 0]

list(zip(data, selectors))

[('a', True), ('b', False), ('c', 1), ('d', 0)]

In [137]:
[item for item, truth_value in zip(data, selectors) if truth_value]

['a', 'c']

In [138]:
from itertools import compress

In [139]:
list(compress(data, selectors))

['a', 'c']

In [140]:
help(compress)

Help on class compress in module itertools:

class compress(builtins.object)
 |  compress(data, selectors)
 |
 |  Return data elements corresponding to true selector elements.
 |
 |  Forms a shorter iterator from selected data elements using the selectors to
 |  choose the data elements.
 |
 |  Methods defined here:
 |
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |
 |  __iter__(self, /)
 |      Implement iter(self).
 |
 |  __next__(self, /)
 |      Implement next(self).
 |
 |  __reduce__(self, /)
 |      Return state information for pickling.
 |
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |
 |  __new__(*args, **kwargs)
 |      Create and return a new object.  See help(type) for accurate signature.



# Infinite iterators

In [141]:
from itertools import count, cycle, repeat, islice

## count

In [142]:
g = count(10)
list(islice(g, 0, 5))

[10, 11, 12, 13, 14]

In [143]:
g = count(1, 0.5)
list(islice(g, 0, 10))

[1, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5]

In [144]:
from decimal import Decimal

In [146]:
g = count(Decimal('0'), Decimal('0.15'))
list(islice(g, 0, 10))

[Decimal('0'),
 Decimal('0.15'),
 Decimal('0.30'),
 Decimal('0.45'),
 Decimal('0.60'),
 Decimal('0.75'),
 Decimal('0.90'),
 Decimal('1.05'),
 Decimal('1.20'),
 Decimal('1.35')]

## cycle

In [149]:
g = cycle([1, 2, 3])
list(islice(g, 0, 10))

[1, 2, 3, 1, 2, 3, 1, 2, 3, 1]

In [150]:
def colours():
    yield 'red'
    yield 'green'
    yield 'blue'

cols = colours()
list(cols)

['red', 'green', 'blue']

In [151]:
# exhausts 
next(cols)

StopIteration: 

In [153]:
# produces cycle even if iterator exhausts
cols = colours()
g = cycle(cols)
list(islice(g, 0, 10))

['red', 'green', 'blue', 'red', 'green', 'blue', 'red', 'green', 'blue', 'red']

## REPEAT

In [154]:
g = repeat('Python')
for _ in range(5):
    print(next(g))

Python
Python
Python
Python
Python


In [158]:
g = repeat('Python', 4)
list(g)

['Python', 'Python', 'Python', 'Python']

In [159]:
g = repeat('Python', 4)
l = list(g)
l

['Python', 'Python', 'Python', 'Python']

In [162]:
# same object
hex(id(l[0])), hex(id(l[1])), hex(id(l[2])), hex(id(l[3]))

('0x102a7dc80', '0x102a7dc80', '0x102a7dc80', '0x102a7dc80')

# CHAINING

In [164]:
l1 = (i**2 for i in range(4))
l2 = (i**2 for i in range(5, 9))
l3 = (i**2 for i in range(10, 15))

for gen in l1, l2, l3:
    for item in gen:
        print(item)

0
1
4
9
25
36
49
64
100
121
144
169
196


In [165]:
next(l1)

StopIteration: 

In [166]:
list(l2

_IncompleteInputError: incomplete input (1869239297.py, line 1)

In [167]:
l1 = (i**2 for i in range(4))
l2 = (i**2 for i in range(5, 9))
l3 = (i**2 for i in range(10, 15))

for gen in l1, l2, l3:
    for item in gen:
        print(item)

def chain_iterables(*iterables):
    for iterable in iterables:
        yield from iterable    
        # same as writing below
        # for item in iterable
        # yield item

for item in chain_iterables(l1, l2, l3):
    print(item)

0
1
4
9
25
36
49
64
100
121
144
169
196


In [168]:
from itertools import chain

In [169]:
l1 = (i**2 for i in range(4))
l2 = (i**2 for i in range(5, 9))
l3 = (i**2 for i in range(10, 15))

for item in chain(l1, l2, l3):
    print(item)

0
1
4
9
25
36
49
64
100
121
144
169
196


In [170]:
l1 = (i**2 for i in range(4))
l2 = (i**2 for i in range(5, 9))
l3 = (i**2 for i in range(10, 15))

l = [l1, l2, l3]

for item in chain(l):
    print(item)

<generator object <genexpr> at 0x10ba16260>
<generator object <genexpr> at 0x10ba15f20>
<generator object <genexpr> at 0x10ba15e50>


In [171]:
l1 = (i**2 for i in range(4))
l2 = (i**2 for i in range(5, 9))
l3 = (i**2 for i in range(10, 15))

l = [l1, l2, l3]

for item in chain(*l):
    print(item)

0
1
4
9
25
36
49
64
100
121
144
169
196


In [172]:
help(chain)

Help on class chain in module itertools:

class chain(builtins.object)
 |  chain(*iterables)
 |
 |  Return a chain object whose .__next__() method returns elements from the
 |  first iterable until it is exhausted, then elements from the next
 |  iterable, until all of the iterables are exhausted.
 |
 |  Methods defined here:
 |
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |
 |  __iter__(self, /)
 |      Implement iter(self).
 |
 |  __next__(self, /)
 |      Implement next(self).
 |
 |  __reduce__(self, /)
 |      Return state information for pickling.
 |
 |  __setstate__(self, object, /)
 |      Set state information for unpickling.
 |
 |  ----------------------------------------------------------------------
 |  Class methods defined here:
 |
 |  __class_getitem__(object, /)
 |      See PEP 585
 |
 |  from_iterable(iterable, /)
 |      Alternative chain() constructor taking a single iterable argument that evaluates lazily.
 |
 |  --------------------------

## USECASES OF CHAIN AND TEE
| Tool                | What it does                        | Real-world use                                                                  |
| ------------------- | ----------------------------------- | ------------------------------------------------------------------------------- |
| `itertools.chain()` | Joins multiple iterables seamlessly | Combine data sources, flatten lists, merge files                                |
| `itertools.tee()`   | Duplicates an iterator              | Split stream for two consumers, preview without consuming, pairwise comparisons |


# TEE

In [173]:
from itertools import tee

In [186]:
def squares(n):
    for i in range(n):
        yield i**2

In [187]:
gen = squares(10)

In [188]:
gen

<generator object squares at 0x10ba15d80>

In [189]:
iters = tee(gen, 3)

In [190]:
iters

(<itertools._tee at 0x10b5c9a00>,
 <itertools._tee at 0x10b5c8f40>,
 <itertools._tee at 0x10b5c92c0>)

In [191]:
iters[0] is iters[1]

False

In [192]:
next(iters[0])

0

In [193]:
next(iters[0])

1

In [194]:
for _ in range(5):
    print(next(iters[0]))

4
9
16
25
36


In [196]:
next(iters[1])

0

# ZIP

In [198]:
list(zip([1, 2, 3], ['a' , 'b', 'c', 'd']))

[(1, 'a'), (2, 'b'), (3, 'c')]

In [200]:
from itertools import zip_longest
list(zip_longest([1, 2, 3], ['a' , 'b', 'c', 'd']))

[(1, 'a'), (2, 'b'), (3, 'c'), (None, 'd')]

# GROUPING

In [201]:
import itertools

In [203]:
with open('data/cars.csv') as f:
    for row in itertools.islice(f, 0, 10):
        print(row, end=' ')

make,model
 ACURA,ILX
 ACURA,MDX
 ACURA,RDX
 ACURA,RLX
 ACURA,TL
 ACURA,TSX
 ALFA ROMEO,4C
 ALFA ROMEO,GIULIETTA
 APRILIA,CAPONORD 1200
 

# we want to find out how many of each makes we have

In [205]:
help(itertools.groupby)

Help on class groupby in module itertools:

class groupby(builtins.object)
 |  groupby(iterable, key=None)
 |
 |  make an iterator that returns consecutive keys and groups from the iterable
 |
 |  iterable
 |    Elements to divide into groups according to the key function.
 |  key
 |    A function for computing the group category for each element.
 |    If the key function is not specified or is None, the element itself
 |    is used for grouping.
 |
 |  Methods defined here:
 |
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |
 |  __iter__(self, /)
 |      Implement iter(self).
 |
 |  __next__(self, /)
 |      Implement next(self).
 |
 |  __reduce__(self, /)
 |      Return state information for pickling.
 |
 |  __setstate__(self, object, /)
 |      Set state information for unpickling.
 |
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |
 |  __new__(*args, **kwargs)
 |      Create and return a new ob

In [206]:
with open('data/cars.csv') as f:
    next(f)
    groups = itertools.groupby(f, key=lambda x:x[0])

In [207]:
type(groups)

itertools.groupby

In [208]:
for group in groups:
    print(f'Group: {group}')
    while True:
        try:
            print(next(group))
        except StopIteration:
            break 

ValueError: I/O operation on closed file.

### its because everything returned by any function in itertools is a lazy iterator

In [216]:
from collections import defaultdict

makes = defaultdict(int)

with open('data/cars.csv') as f:
    next(f)
    for row in f:
        make, _ = row.strip('\n').split(',')
        makes[make] += 1

for key, value in makes.items():
    print(f'{key}: {value}')

ACURA: 6
ALFA ROMEO: 2
APRILIA: 4
ARCTIC CAT: 96
ARGO: 4
ASTON MARTIN: 5
AUDI: 27
BENTLEY: 2
BLUE BIRD: 1
BMW: 86
BUGATTI: 1
BUICK: 5
CADILLAC: 7
CAN-AM: 61
CHEVROLET: 33
CHRYSLER: 2
DODGE: 7
DUCATI: 4
FERRARI: 6
FIAT: 2
FORD: 34
FREIGHTLINER: 7
GMC: 12
HARLEY DAVIDSON: 29
HINO: 7
HONDA: 91
HUSABERG: 4
HUSQVARNA: 9
HYUNDAI: 13
INDIAN: 3
INFINITI: 8
JAGUAR: 9
JEEP: 5
JOHN DEERE: 19
KAWASAKI: 59
KENWORTH: 11
KIA: 10
KTM: 13
KUBOTA: 4
KYMCO: 28
LAMBORGHINI: 2
LAND ROVER: 6
LEXUS: 14
LINCOLN: 6
LOTUS: 1
MACK: 9
MASERATI: 3
MAZDA: 5
MCLAREN: 2
MERCEDES-BENZ: 60
MINI: 3
MITSUBISHI: 8
NISSAN: 24
PEUGEOT: 3
POLARIS: 101
PORSCHE: 4
RAM: 6
RENAULT: 4
ROLLS ROYCE: 3
SCION: 5
SEAT: 3
SKI-DOO: 67
SMART: 1
SRT: 1
SUBARU: 10
SUZUKI: 48
TESLA: 2
TOYOTA: 19
TRIUMPH: 10
VESPA: 4
VICTORY: 14
VOLKSWAGEN: 16
VOLVO: 8
YAMAHA: 110


# we can use groups for this

In [217]:
data = (1, 2, 2, 3, 3, 3)

list(itertools.groupby(data))

[(1, <itertools._grouper at 0x10b880ee0>),
 (2, <itertools._grouper at 0x10b8808e0>),
 (3, <itertools._grouper at 0x10b4ba1a0>)]

In [218]:
# iterator(data) needs to be sorted before groupby
data = (1, 2, 2, 3, 3, 3, 1)

list(itertools.groupby(data))

[(1, <itertools._grouper at 0x10b4ba2f0>),
 (2, <itertools._grouper at 0x10b4ba200>),
 (3, <itertools._grouper at 0x10b4ba230>),
 (1, <itertools._grouper at 0x10b4ba170>)]

In [224]:
data = (1, 2, 2, 3, 3, 3)

it = itertools.groupby(data)
for group in it:
    
    print(f'group name: {group[0]}, iterating in group: {list(group[1])}')

group name: 1, iterating in group: [1]
group name: 2, iterating in group: [2, 2]
group name: 3, iterating in group: [3, 3, 3]


In [225]:
data = (
    (1, 'abc'),
    (1, 'bcd'),
    (2, 'python'),
    (2, 'rust'),
    (2, 'zig'),
    (3, 'ice'),
)

data

((1, 'abc'), (1, 'bcd'), (2, 'python'), (2, 'rust'), (2, 'zig'), (3, 'ice'))

In [226]:
groups = itertools.groupby(data, key=lambda x: x[0])

In [227]:
list(groups)

[(1, <itertools._grouper at 0x10b904040>),
 (2, <itertools._grouper at 0x10b9041f0>),
 (3, <itertools._grouper at 0x10b904370>)]

In [228]:
# exhausted now 
list(groups)

[]

In [230]:
groups = itertools.groupby(data, key=lambda x: x[0])
for group_key, sub_iter in groups:
    print(group_key, list(sub_iter))

1 [(1, 'abc'), (1, 'bcd')]
2 [(2, 'python'), (2, 'rust'), (2, 'zig')]
3 [(3, 'ice')]


In [232]:
def gen_groups():
    # key = 1
    for key in range(1, 4):
        for i in range(3):
            yield(key, i)

gg_iter = gen_groups()

In [233]:
type(gg_iter)

generator

In [234]:
for i in gg_iter:
    print(i)

(1, 0)
(1, 1)
(1, 2)
(2, 0)
(2, 1)
(2, 2)
(3, 0)
(3, 1)
(3, 2)


In [235]:
next(gg_iter)

StopIteration: 

In [247]:
groups = itertools.groupby(gen_groups(), key=lambda x: x[0])

for group_key, iter_list in groups:
    print(f'group: {group_key}')
    for _key, _value in iter_list:
        print(f'key: {_key}, value: {_value}')
        

group: 1
key: 1, value: 0
key: 1, value: 1
key: 1, value: 2
group: 2
key: 2, value: 0
key: 2, value: 1
key: 2, value: 2
group: 3
key: 3, value: 0
key: 3, value: 1
key: 3, value: 2


In [252]:
with open('data/cars.csv') as f:
    next(f)
    groups = itertools.groupby(f, key=lambda x: x.split(',')[0])
    for group_key, iter_list in groups:
        print(f'{group_key}: {len(list(iter_list))}')

ACURA: 6
ALFA ROMEO: 2
APRILIA: 4
ARCTIC CAT: 96
ARGO: 4
ASTON MARTIN: 5
AUDI: 27
BENTLEY: 2
BLUE BIRD: 1
BMW: 86
BUGATTI: 1
BUICK: 5
CADILLAC: 7
CAN-AM: 61
CHEVROLET: 33
CHRYSLER: 2
DODGE: 7
DUCATI: 4
FERRARI: 6
FIAT: 2
FORD: 34
FREIGHTLINER: 7
GMC: 12
HARLEY DAVIDSON: 29
HINO: 7
HONDA: 91
HUSABERG: 4
HUSQVARNA: 9
HYUNDAI: 13
INDIAN: 3
INFINITI: 8
JAGUAR: 9
JEEP: 5
JOHN DEERE: 19
KAWASAKI: 59
KENWORTH: 11
KIA: 10
KTM: 13
KUBOTA: 4
KYMCO: 28
LAMBORGHINI: 2
LAND ROVER: 6
LEXUS: 14
LINCOLN: 6
LOTUS: 1
MACK: 9
MASERATI: 3
MAZDA: 5
MCLAREN: 2
MERCEDES-BENZ: 60
MINI: 3
MITSUBISHI: 8
NISSAN: 24
PEUGEOT: 3
POLARIS: 101
PORSCHE: 4
RAM: 6
RENAULT: 4
ROLLS ROYCE: 3
SCION: 5
SEAT: 3
SKI-DOO: 67
SMART: 1
SRT: 1
SUBARU: 10
SUZUKI: 48
TESLA: 2
TOYOTA: 19
TRIUMPH: 10
VESPA: 4
VICTORY: 14
VOLKSWAGEN: 16
VOLVO: 8
YAMAHA: 110


# but this persists the list what if i want an iterable out of it so i can get values as i need ?

In [256]:
with open('data/cars.csv') as f:
    next(f)
    groups = itertools.groupby(f, key=lambda x: x.split(',')[0])
    result = ((group_key, sum(1 for item in iter_list)) for group_key, iter_list in groups)

In [257]:
type(result)

generator

In [260]:
for _ in range(10):
    print(next(result))

ValueError: I/O operation on closed file.

# everything is a lazy iterator, so without persisting anything we have to keep the file open, actual file is read and compute done when result is asked for. 

In [262]:
with open('data/cars.csv') as f:
    next(f)
    groups = itertools.groupby(f, key=lambda x: x.split(',')[0])
    result = ((group_key, sum(1 for item in iter_list)) for group_key, iter_list in groups)
    for _ in range(10):
        print(next(result))

('ACURA', 6)
('ALFA ROMEO', 2)
('APRILIA', 4)
('ARCTIC CAT', 96)
('ARGO', 4)
('ASTON MARTIN', 5)
('AUDI', 27)
('BENTLEY', 2)
('BLUE BIRD', 1)
('BMW', 86)


# Cartesian product

In [265]:
l1 = [1, 2, 3]
l2 = ['a', 'b', 'c', 'd']

def cartesian_product(*args):
    for x in args[0]:
        for y in args[1]:
            yield(x, y)

list(cartesian_product(l1, l2))

[(1, 'a'),
 (1, 'b'),
 (1, 'c'),
 (1, 'd'),
 (2, 'a'),
 (2, 'b'),
 (2, 'c'),
 (2, 'd'),
 (3, 'a'),
 (3, 'b'),
 (3, 'c'),
 (3, 'd')]

In [268]:
p = itertools.product(l1, l2)
type(p)

itertools.product

In [269]:
list(p)

[(1, 'a'),
 (1, 'b'),
 (1, 'c'),
 (1, 'd'),
 (2, 'a'),
 (2, 'b'),
 (2, 'c'),
 (2, 'd'),
 (3, 'a'),
 (3, 'b'),
 (3, 'c'),
 (3, 'd')]

In [273]:
l1 = ['a', 'b', 'c', 'd']
list(itertools.permutations(l1))

[('a', 'b', 'c', 'd'),
 ('a', 'b', 'd', 'c'),
 ('a', 'c', 'b', 'd'),
 ('a', 'c', 'd', 'b'),
 ('a', 'd', 'b', 'c'),
 ('a', 'd', 'c', 'b'),
 ('b', 'a', 'c', 'd'),
 ('b', 'a', 'd', 'c'),
 ('b', 'c', 'a', 'd'),
 ('b', 'c', 'd', 'a'),
 ('b', 'd', 'a', 'c'),
 ('b', 'd', 'c', 'a'),
 ('c', 'a', 'b', 'd'),
 ('c', 'a', 'd', 'b'),
 ('c', 'b', 'a', 'd'),
 ('c', 'b', 'd', 'a'),
 ('c', 'd', 'a', 'b'),
 ('c', 'd', 'b', 'a'),
 ('d', 'a', 'b', 'c'),
 ('d', 'a', 'c', 'b'),
 ('d', 'b', 'a', 'c'),
 ('d', 'b', 'c', 'a'),
 ('d', 'c', 'a', 'b'),
 ('d', 'c', 'b', 'a')]

In [275]:
# combinatinos of length 2
list(itertools.combinations(l1, 2))

[('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')]

In [276]:
list(itertools.combinations(l1, 3))

[('a', 'b', 'c'), ('a', 'b', 'd'), ('a', 'c', 'd'), ('b', 'c', 'd')]

# prob of pulling 4 Aces from a deck of cards

In [300]:
RANKS = tuple(map(str, range(2, 11))) + tuple('JQKA')
SUITS = 'SHDC'

deck = [rank + suit for suit in SUITS for rank in RANKS]
deck[0:5]

['2S', '3S', '4S', '5S', '6S']

In [301]:
card_dealt = itertools.combinations(deck, 4)

outcome, total = 0, 0

for card1, card2, card3, card4 in card_dealt:
    if card1[0] == card2[0] == card3[0] == card4[0] == 'A':
        outcome += 1
    else:
        total += 1
    
print(outcome/(total+outcome))

3.6937852063902484e-06


In [306]:
# cross check
import math
math.comb(52, 4) == (outcome + total)

True

# context managers

In [307]:
try:
    10 / 2
except ZeroDivisionError:
    print('zero division error')
finally:
    print('finally ran!')

finally ran!


In [308]:
try:
    10 / 0
except ZeroDivisionError:
    print('zero division error')
finally:
    print('finally ran!')

zero division error
finally ran!


In [311]:
def my_func():
    try:
        10 / 0
    except ZeroDivisionError:
        return
    finally:
        print('finally ran!')

my_func()

finally ran!


In [323]:
class MyContext:
    def __init__(self):
        self.obj = None

    def __enter__(self):
        print('entering context..')
        self.obj = 'the return object'
        return self.obj

    def __exit__(self, exc_type, exc_value, exc_traceback):
        print('exiting context')
        if exc_type:
            print(f'Error: {exc_type}: {exc_value}')
        return False

In [325]:
with MyContext() as obj:
    print(f'inside with block obj is: {obj}')
    raise ValueError('custom message')

entering context..
inside with block obj is: the return object
exiting context
Error: <class 'ValueError'>: custom message


ValueError: custom message

### `with` statement causes `__enter__` call, which optionally returns some obj, which is assigned to `obj` symbol. On Erorr or exit of context manager, `__exit__` is called.
### returning `False` from `__exit__` method tells python to bubble the error. Returning `True` silences the error

In [326]:
class MyContext:
    def __init__(self):
        self.obj = None

    def __enter__(self):
        print('entering context..')
        self.obj = 'the return object'
        return self.obj

    def __exit__(self, exc_type, exc_value, exc_traceback):
        print('exiting context')
        if exc_type:
            print(f'Error: {exc_type}: {exc_value}')
        return True


with MyContext() as obj:
    print(f'inside with block obj is: {obj}')
    raise ValueError('custom message')

entering context..
inside with block obj is: the return object
exiting context
Error: <class 'ValueError'>: custom message


### Context managers `do not` have its own scope. it creates objects in enclosing scope

In [327]:
obj

'the return object'

### One more example

In [334]:
class Resource:
    def __init__(self, name):
        self.name = name
        self.state = None

class ResourceManager:
    def __init__(self, name):
        self.name = name

    def __enter__(self):
        print(f'entering context')
        self.resource = Resource(self.name)
        self.resource.state = 'created'
        return self.resource

    def __exit__(self, exc_type, exc_value, exc_traceback):
        print(f'exiting context')
        self.resource.state = 'destroyed'
        
        if exc_type:
            print(f'Error: {exc_type}: {exc_value}')
        return False

with ResourceManager('spam') as rm:
    print(f'{rm.name}: {rm.state}')
    # raise ValueError('custom message')

# since rm object exists in global scope we can still access it
print(f'is symbol rm in globals? {'rm' in globals()}')
print(f'{rm.name}: {rm.state}')

entering context
spam: created
exiting context
is symbol rm in globals? True
spam: destroyed


# Caveat with lazy iterators

In [335]:
import csv

In [350]:
def read_data():
    with open('data/AAPL.csv', 'r') as f:
        return csv.reader(f, delimiter=',', quotechar='"')

reader = read_data()

In [351]:
type(reader)

_csv.reader

In [352]:
list(reader)

ValueError: I/O operation on closed file.

In [353]:
'__iter__' in dir(reader), '__next__' in dir(reader)

(True, True)

In [361]:
f.closed

True

### Csv reader is an iterator, but when we start iteration by that time context manager has run `__exit__` and closed the `file: f`

### instead of returning if we `yield` then the execution `halts and un-halts` until all values are yielded from the generator function

In [346]:
def read_data():
    with open('data/AAPL.csv', 'r') as f:
        yield from csv.reader(f, delimiter=',', quotechar='"')

reader = read_data()
type(reader)

generator

In [347]:
for _ in range(5):
    print(next(reader))

['Symbol', ' Date', ' Close', ' Volume', ' Open', ' High', ' Low']
['AAPL', ' 10/29/2020', ' 115.32', ' 146129200', ' 112.37', ' 116.93', ' 112.2']
['AAPL', ' 10/28/2020', ' 111.2', ' 143937800', ' 115.05', ' 115.43', ' 111.1']
['AAPL', ' 10/27/2020', ' 116.6', ' 92276770', ' 115.49', ' 117.28', ' 114.5399']
['AAPL', ' 10/26/2020', ' 115.05', ' 111850700', ' 114.01', ' 116.55', ' 112.88']


In [348]:
list(reader)

[['AAPL',
  ' 10/23/2020',
  ' 115.04',
  ' 82572650',
  ' 116.39',
  ' 116.55',
  ' 114.28'],
 ['AAPL',
  ' 10/22/2020',
  ' 115.75',
  ' 101988000',
  ' 117.45',
  ' 118.04',
  ' 114.59'],
 ['AAPL',
  ' 10/21/2020',
  ' 116.87',
  ' 89945980',
  ' 116.67',
  ' 118.705',
  ' 116.45'],
 ['AAPL',
  ' 10/20/2020',
  ' 117.51',
  ' 124423700',
  ' 116.2',
  ' 118.98',
  ' 115.63'],
 ['AAPL',
  ' 10/19/2020',
  ' 115.98',
  ' 120639300',
  ' 119.96',
  ' 120.419',
  ' 115.66'],
 ['AAPL',
  ' 10/16/2020',
  ' 119.02',
  ' 115393800',
  ' 121.28',
  ' 121.548',
  ' 118.81'],
 ['AAPL',
  ' 10/15/2020',
  ' 120.71',
  ' 112559200',
  ' 118.72',
  ' 121.2',
  ' 118.15'],
 ['AAPL',
  ' 10/14/2020',
  ' 121.19',
  ' 151062300',
  ' 121',
  ' 123.03',
  ' 119.62'],
 ['AAPL',
  ' 10/13/2020',
  ' 121.1',
  ' 262330500',
  ' 125.27',
  ' 125.39',
  ' 119.65'],
 ['AAPL',
  ' 10/12/2020',
  ' 124.4',
  ' 240226800',
  ' 120.06',
  ' 125.18',
  ' 119.2845'],
 ['AAPL',
  ' 10/09/2020',
  ' 116.97',
  ' 

In [349]:
next(reader)

StopIteration: 

# lets try writing a data reader class

In [362]:
class DataReader:
    def __init__(self, fname):
        self._fname = fname

    def __iter__(self):
        return self

    def __next__(self):
        return next(self._fname)

In [363]:
data = DataReader('data/AAPL.csv')

In [365]:
for row in data:
    print(data)

TypeError: 'str' object is not an iterator

In [383]:
class DataReader:
    def __init__(self, fname):
        self._fname = fname

    def __iter__(self):
        return self

    def __next__(self):
        return next(self._f)

    def __enter__(self):
        # we cant return open() directly - else how will we close it?
        self._f = open(self._fname, 'r')
        return self._f

    def __exit__(self, exc_type, exc_value, traceback):
        print('closing file')
        self._f.close()
        if exc_type:
            print(f'Error: {exc_type}: {exc_value}')
        return False
        

In [384]:
with DataReader('data/AAPL.csv') as f:
    print(f'File closed?: {f.closed}')
    for row in f:
        print(row)

File closed?: False
Symbol, Date, Close, Volume, Open, High, Low

AAPL, 10/29/2020, 115.32, 146129200, 112.37, 116.93, 112.2

AAPL, 10/28/2020, 111.2, 143937800, 115.05, 115.43, 111.1

AAPL, 10/27/2020, 116.6, 92276770, 115.49, 117.28, 114.5399

AAPL, 10/26/2020, 115.05, 111850700, 114.01, 116.55, 112.88

AAPL, 10/23/2020, 115.04, 82572650, 116.39, 116.55, 114.28

AAPL, 10/22/2020, 115.75, 101988000, 117.45, 118.04, 114.59

AAPL, 10/21/2020, 116.87, 89945980, 116.67, 118.705, 116.45

AAPL, 10/20/2020, 117.51, 124423700, 116.2, 118.98, 115.63

AAPL, 10/19/2020, 115.98, 120639300, 119.96, 120.419, 115.66

AAPL, 10/16/2020, 119.02, 115393800, 121.28, 121.548, 118.81

AAPL, 10/15/2020, 120.71, 112559200, 118.72, 121.2, 118.15

AAPL, 10/14/2020, 121.19, 151062300, 121, 123.03, 119.62

AAPL, 10/13/2020, 121.1, 262330500, 125.27, 125.39, 119.65

AAPL, 10/12/2020, 124.4, 240226800, 120.06, 125.18, 119.2845

AAPL, 10/09/2020, 116.97, 100506900, 115.28, 117, 114.92

AAPL, 10/08/2020, 114.97, 834

In [385]:
type(f)

_io.TextIOWrapper

In [386]:
f.closed

True

# Check out `contextlib` module for decorators which turn any function into a context manager. It also has a decorator to redirect stdio and put it back to what it was as context is exited.

# read : https://peps.python.org/pep-0343/