# iterables
Any object from which the iter built-in function can obtain an iterator. Objects
implementing an \__iter__ method returning an iterator are iterable. Sequences
are always iterable, as are objects implementing a \__getitem__ method that
accepts 0-based indexes.

In [307]:
#Some people, when confronted with a problem, think "I know, I'll use regular expressions." 
#Now they have two problems.
import re
import reprlib

RE_WORD = re.compile(r'\w+')
RE_WORD.findall('1 2 3')

['1', '2', '3']

In [308]:
class Sentence:
    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)
    
    def __getitem__(self, index):
         return self.words[index]
        
    def __len__(self):
        return len(self.words)
    
    def __repr__(self):
        return 'Sentence(%s)' % reprlib.repr(self.text) #for abbreviation

In [309]:
s = Sentence('"the time has come," the Walrus said,')
s

Sentence('"the time ha... Walrus said,')

In [310]:
#whenever python needs to iterate over an object x, it automatically calls iter(x)
#if __iter__ is missing iter() will call __getitem__()
for word in s:
    print(word)

the
time
has
come
the
Walrus
said


In [311]:
list(s)

['the', 'time', 'has', 'come', 'the', 'Walrus', 'said']

In [312]:
set(s)

{'Walrus', 'come', 'has', 'said', 'the', 'time'}

In [313]:
from collections import abc

class GooseSpam:
    def __iter__(self):
        pass


goose_spam_can = GooseSpam()

issubclass(GooseSpam, abc.Iterable), isinstance(goose_spam_can, abc.Iterable)

(True, True)

# Iterators

In [314]:
import random

def d6():
    return random.randint(1, 6)

In [315]:
d6_iter = iter(d6, 1) # __iter__ will create an iterator out of an itrable
next(d6_iter), type(d6_iter)

(5, callable_iterator)

In [316]:
for roll in d6_iter:
    print(roll)

3
4


In [317]:
s = 'ABC'
it = iter(s) 

while True:
    try:
        print(next(it)) 
    except StopIteration: 
        del it 
        break 

A
B
C


### Python’s standard interface for an iterator has two methods:
\__next__:
Returns the next item in the series, raising StopIteration if there are no more.

\__iter__:
Returns self; this allows iterators to be used where an iterable is expected, for
example, in a for loop.

In [318]:
s3 = Sentence('Life of Brian')
it = iter(s3)

In [319]:
next(it), next(it), next(it)

('Life', 'of', 'Brian')

In [320]:
list(it)

[]

In [321]:
list(iter(s3))

['Life', 'of', 'Brian']

In [322]:
class Sentence:
    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)
    
    def __repr__(self):
        return f'Sentence({reprlib.repr(self.text)})'

    
    def __iter__(self):
        return SentenceIterator(self.words)
    
class SentenceIterator:
    def __init__(self, words):
        self.words = words
        self.index = 0
        
    def __next__(self):
        try:
            word = self.words[self.index]
        except IndexError:
            raise StopIteration()
        self.index += 1
        return word
    
    def __iter__(self):
        return self

In [323]:
s4 = Sentence('hello good bye hi firend')
it = iter(s4)
next(it), list(it)

('hello', ['good', 'bye', 'hi', 'firend'])

A common cause of errors in building iterables and iterators is to confuse the two. To
be clear: iterables have an \__iter__ method that instantiates a new iterator every
time. Iterators implement a \__next__ method that returns individual items, and an
\__iter__ method that returns self.

To “support multiple traversals,” it must be possible to obtain multiple independent
iterators from the same iterable instance, and each iterator must keep its own internal
state, so a proper implementation of the pattern requires each call to iter(my_itera
ble) to create a new, independent, iterator. That is why we need the SentenceItera
tor class in this example

In [324]:
#just ignore this one
class Sentence:
    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)
    
    def __repr__(self):
        return f'Sentence({reprlib.repr(self.text)})'

    def __iter__(self):
        return iter(self.words)

In [325]:
s3 = Sentence('hallo good bye hi')
it = iter(s3)
next(it), list(it)

('hallo', ['good', 'bye', 'hi'])

In [326]:
class Sentence2:
    def __init__(self, text):
        self.text = text
        self.words = RE_WORD.findall(text)
    
    def __repr__(self):
        return f'Sentence({reprlib.repr(self.text)})'

    def __iter__(self):
        for word in self.words:
            yield word       

In [327]:
s = Sentence2('hallo good bye hi')
for w in s:
    print(w)
    
it = iter(s)
next(it), list(it)

hallo
good
bye
hi


('hallo', ['good', 'bye', 'hi'])

In [328]:
#generators implement the iterator interface, so they are also iterable

In [329]:
def gen_123():
    yield 1
    yield 2
    yield 3
    
gen_123, gen_123()

(<function __main__.gen_123()>,
 <generator object gen_123 at 0x000001C39433D7A0>)

In [330]:
for i in gen_123():
    print(i)

1
2
3


In [331]:
g = gen_123()
next(g), next(g), next(g)

(1, 2, 3)

In [332]:
#when the generator function returns it raises StopIteration
#next(g)

In [333]:
#the most lazy version of Sentence 
class Sentence:
    def __init__(self, text):
        self.text = text
        
    def __repr__(self):
        return f'Sentence({reprlib.repr(self.text)})'

    def __iter__(self):
        for match in RE_WORD.finditer(self.text):
            yield match.group()

In [334]:
class Sentence:
    def __init__(self, text):
        self.text = text
        
    def __repr__(self):
        return f'Sentence({reprlib.repr(self.text)})'

    def __iter__(self):
        return (match.group() for match in RE_WORD.finditer(self.text))

In [335]:
s = Sentence('1 2 3 hallo good bye')
it = iter(s)
next(it), list(it)

('1', ['2', '3', 'hallo', 'good', 'bye'])

In [336]:
for i in RE_WORD.finditer('1 2 3'):
    print(i)

<re.Match object; span=(0, 1), match='1'>
<re.Match object; span=(2, 3), match='2'>
<re.Match object; span=(4, 5), match='3'>


# Generators

In [337]:
def gen_AB():
    print('start')
    yield 'A'
    print('continue')
    yield 'B'
    print('end')

In [338]:
res1 = [3*x for x in gen_AB()]

start
continue
end


In [339]:
res1

['AAA', 'BBB']

In [340]:
res2 = (3*x for x in gen_AB())
res2

<generator object <genexpr> at 0x000001C394B4B5E0>

In [341]:
for i in res2:
    print('-->', i)

start
--> AAA
continue
--> BBB
end


In [342]:
#generator expression
(2 * x for x in [1,2,3])

<generator object <genexpr> at 0x000001C394BF5E50>

In [343]:
type(8.0)(6)

6.0

In [344]:
def aritprog_gen(begin, step, end=None):
    result = type(begin+step)(begin)
    index = 0
    while (end is None) or result < end:
        yield result
        index += 1
        result = begin + step * index

In [345]:
list(aritprog_gen(1, .2, 3))

[1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.2, 2.4000000000000004, 2.6, 2.8]

In [346]:
from decimal import Decimal
list(aritprog_gen(1, Decimal('.2'), 3))

[Decimal('1'),
 Decimal('1.2'),
 Decimal('1.4'),
 Decimal('1.6'),
 Decimal('1.8'),
 Decimal('2.0'),
 Decimal('2.2'),
 Decimal('2.4'),
 Decimal('2.6'),
 Decimal('2.8')]

In [347]:
from fractions import Fraction
list(aritprog_gen(1, Fraction(1,3), 3))

[Fraction(1, 1),
 Fraction(4, 3),
 Fraction(5, 3),
 Fraction(2, 1),
 Fraction(7, 3),
 Fraction(8, 3)]

In [348]:
import itertools

In [349]:
gen = itertools.count(1, .5) #never stops so don't use list here
next(gen), next(gen), next(gen)

(1, 1.5, 2.0)

In [350]:
gen = itertools.takewhile(lambda n: n < 3, itertools.count(1, .5))
list(gen)

[1, 1.5, 2.0, 2.5]

In [351]:
def aritprog(begin, step, end=None):
    first = type(begin + step)(begin)
    ap_gen = itertools.count(first, step)
    if end is None:
        return ap_gen
    return itertools.takewhile(lambda n: n < end, ap_gen)

In [352]:
list(aritprog(0,1,10))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

## misc itertools generators

In [353]:
from itertools import compress, dropwhile, accumulate, starmap, filterfalse, islice, takewhile

In [354]:
def vowel(c):
    return c.lower() in 'aeiou'

In [355]:
list(filter(vowel, 'Aardvark'))

['A', 'a', 'a']

In [356]:
list(filterfalse(vowel, 'Aardvark'))

['r', 'd', 'v', 'r', 'k']

In [357]:
list(dropwhile(vowel, 'Aardvark'))

['r', 'd', 'v', 'a', 'r', 'k']

In [358]:
list(takewhile(vowel, 'Aardvark'))

['A', 'a']

In [359]:
list(compress('Aardvark', (1, 0, 1, 1, 0, 1)))

['A', 'r', 'd', 'a']

In [360]:
list(islice('Aardvark', 4))

['A', 'a', 'r', 'd']

In [361]:
list(islice('Aardvark', 4, 7))

['v', 'a', 'r']

In [362]:
list(islice('Aardvark', 1, 7, 2))

['a', 'd', 'a']

In [363]:
sample = [5, 4, 2, 8, 7, 6, 3, 0, 9, 1]

In [364]:
list(accumulate(sample))

[5, 9, 11, 19, 26, 32, 35, 35, 44, 45]

In [365]:
list(accumulate(sample, min))

[5, 4, 2, 2, 2, 2, 2, 0, 0, 0]

In [366]:
list(accumulate(sample, max))

[5, 5, 5, 8, 8, 8, 8, 8, 9, 9]

In [367]:
import operator
list(accumulate(sample, operator.mul))

[5, 20, 40, 320, 2240, 13440, 40320, 0, 0, 0]

In [368]:
list(accumulate(range(1,11), operator.mul)) #factorial

[1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800]

In [369]:
list(starmap(operator.mul, enumerate('albatroz', 1)))

['a', 'll', 'bbb', 'aaaa', 'ttttt', 'rrrrrr', 'ooooooo', 'zzzzzzzz']

In [370]:
list(starmap(operator.mul, enumerate('albatroz', 3)))

['aaa',
 'llll',
 'bbbbb',
 'aaaaaa',
 'ttttttt',
 'rrrrrrrr',
 'ooooooooo',
 'zzzzzzzzzz']

In [371]:
list(starmap(lambda a, b: b/a, enumerate(accumulate(sample), 1)))

[5.0,
 4.5,
 3.6666666666666665,
 4.75,
 5.2,
 5.333333333333333,
 5.0,
 4.375,
 4.888888888888889,
 4.5]

In [372]:
from itertools import chain, product, zip_longest

In [373]:
list(chain('ABC', range(3)))

['A', 'B', 'C', 0, 1, 2]

In [374]:
#chain.from_iterable takes each item from the iterable, and chains them in sequence,
#as long as each item is itself iterable
list(chain.from_iterable(enumerate('ABC')))

[0, 'A', 1, 'B', 2, 'C']

In [375]:
list(enumerate('ABC'))

[(0, 'A'), (1, 'B'), (2, 'C')]

In [376]:
list(product(range(3), range(3))) #cartesian product

[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)]

In [377]:
suits = 'spades diamonds hearts clubs'.split()
suits

['spades', 'diamonds', 'hearts', 'clubs']

In [378]:
list(product(suits, 'AK'))

[('spades', 'A'),
 ('spades', 'K'),
 ('diamonds', 'A'),
 ('diamonds', 'K'),
 ('hearts', 'A'),
 ('hearts', 'K'),
 ('clubs', 'A'),
 ('clubs', 'K')]

In [379]:
[(i, j) for i in range(3) for j in range(3)] #also cartesian product

[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 0), (2, 1), (2, 2)]

In [380]:
list(zip_longest('ABC', range(5)))

[('A', 0), ('B', 1), ('C', 2), (None, 3), (None, 4)]

In [381]:
list(zip_longest('ABC', range(5), fillvalue='?'))

[('A', 0), ('B', 1), ('C', 2), ('?', 3), ('?', 4)]

In [382]:
list(product('ABC', repeat=2))

[('A', 'A'),
 ('A', 'B'),
 ('A', 'C'),
 ('B', 'A'),
 ('B', 'B'),
 ('B', 'C'),
 ('C', 'A'),
 ('C', 'B'),
 ('C', 'C')]

In [383]:
list(product(range(2), repeat=3))

[(0, 0, 0),
 (0, 0, 1),
 (0, 1, 0),
 (0, 1, 1),
 (1, 0, 0),
 (1, 0, 1),
 (1, 1, 0),
 (1, 1, 1)]

In [384]:
list(product('AB', range(2), repeat=2))

[('A', 0, 'A', 0),
 ('A', 0, 'A', 1),
 ('A', 0, 'B', 0),
 ('A', 0, 'B', 1),
 ('A', 1, 'A', 0),
 ('A', 1, 'A', 1),
 ('A', 1, 'B', 0),
 ('A', 1, 'B', 1),
 ('B', 0, 'A', 0),
 ('B', 0, 'A', 1),
 ('B', 0, 'B', 0),
 ('B', 0, 'B', 1),
 ('B', 1, 'A', 0),
 ('B', 1, 'A', 1),
 ('B', 1, 'B', 0),
 ('B', 1, 'B', 1)]

In [385]:
from itertools import (combinations, 
                       combinations_with_replacement,
                       cycle,
                       pairwise,
                       permutations,
                       repeat)

In [386]:
from itertools import cycle as c, pairwise as p

In [387]:
cy = cycle('ABC')
list(islice(cy, 7))

['A', 'B', 'C', 'A', 'B', 'C', 'A']

In [388]:
list(pairwise(range(7)))

[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)]

In [389]:
rp = repeat(4, 5)
list(rp)

[4, 4, 4, 4, 4]

In [390]:
list(map(operator.mul, range(11), repeat(5)))

[0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

In [391]:
list(combinations('ABC', 2))

[('A', 'B'), ('A', 'C'), ('B', 'C')]

In [392]:
list(combinations_with_replacement('ABC', 2))

[('A', 'A'), ('A', 'B'), ('A', 'C'), ('B', 'B'), ('B', 'C'), ('C', 'C')]

In [393]:
list(permutations('ABC', 2))

[('A', 'B'), ('A', 'C'), ('B', 'A'), ('B', 'C'), ('C', 'A'), ('C', 'B')]

-------------

In [394]:
from itertools import groupby, tee

In [395]:
list(groupby('LLLLAAGGG'))

[('L', <itertools._grouper at 0x1c393d31600>),
 ('A', <itertools._grouper at 0x1c393d33370>),
 ('G', <itertools._grouper at 0x1c393d334f0>)]

In [396]:
for char, group in groupby('LLLLAAGGG'):
    print(char, list(group))

L ['L', 'L', 'L', 'L']
A ['A', 'A']
G ['G', 'G', 'G']


In [397]:
animals = ['duck', 'eagle', 'rat', 'giraffe', 'bear', 'bat', 'dolphin', 'shark', 'lion']
animals.sort(key=len)
animals

['rat', 'bat', 'duck', 'bear', 'lion', 'eagle', 'shark', 'giraffe', 'dolphin']

In [398]:
for length, group in groupby(animals, len):
    print(length, list(group))

3 ['rat', 'bat']
4 ['duck', 'bear', 'lion']
5 ['eagle', 'shark']
7 ['giraffe', 'dolphin']


In [399]:
for length, group in groupby(reversed(animals), len):
    print(length, list(group))

7 ['dolphin', 'giraffe']
5 ['shark', 'eagle']
4 ['lion', 'bear', 'duck']
3 ['bat', 'rat']


In [400]:
list(tee('ABC', 3))

[<itertools._tee at 0x1c393ccb6c0>,
 <itertools._tee at 0x1c394bddc80>,
 <itertools._tee at 0x1c394bdd2c0>]

In [401]:
g1, g2 = tee('ABC')
n = next

In [402]:
n(g1), n(g2), n(g2), list(g1), list(g2)

('A', 'A', 'B', ['B', 'C'], ['C'])

In [403]:
list(zip(*tee('ABC')))

[('A', 'A'), ('B', 'B'), ('C', 'C')]

In [404]:
g = (n for n in [0, .0, 7, 8]) # generator expression
any(g), next(g) #as any iterates over g looking for truthy values it will also change its state

(True, 8)

In [405]:
def chain(*iterables):
    for it in iterables:
        for i in it:
            yield i
            
def chain(*iterables):
    for i in iterables:
        yield from i

In [406]:
sr = 'ABC', range(3)

list(chain(*sr))

['A', 'B', 'C', 0, 1, 2]

In [407]:
def sub_gen():
    yield 1.1
    yield 1.2

def gen():
    yield 1
    for i in sub_gen():
        yield i
    yield 2
    
    
list(gen())

[1, 1.1, 1.2, 2]

In [408]:
#the yield from expression allows a generator to delegate work to a subgenerator

def sub_gen():
    yield 1.1
    yield 1.2

def gen():
    yield 1
    yield from sub_gen()
    yield 2

list(gen())

[1, 1.1, 1.2, 2]

### Traversing a Tree

In [409]:
def tree(cls):
    yield cls.__name__
    
def display(cls):
    for cls_name in tree(cls):
        print(cls_name)

display(BaseException)    

BaseException


In [410]:
def tree(cls):
    yield cls.__name__, 0
    for sub_cls in cls.__subclasses__():
        yield sub_cls.__name__, 1
        
def display(cls):
    for cls_name, level in tree(cls):
        indent = ' ' * 4 * level
        print('{}{}'.format(indent, cls_name))

display(BaseException)

BaseException
    BaseExceptionGroup
    Exception
    GeneratorExit
    KeyboardInterrupt
    SystemExit
    CancelledError
    AbortThread


In [411]:
def tree(cls):
    yield cls.__name__, 0
    yield from sub_tree(cls)
    
def sub_tree(cls):
    for sub_cls in cls.__subclasses__():
        yield sub_cls.__name__, 1
        for sub_sub_cls in sub_cls.__subclasses__():
            yield sub_sub_cls.__name__, 2
            
def display(cls):
    for cls_name, level in tree(cls):
        indent = ' ' * 4 * level
        print('{}{}'.format(indent, cls_name))
        
#display(BaseException)

In [412]:
def tree(cls):
    yield cls.__name__, 0
    yield from sub_tree(cls)
    
def sub_tree(cls):
    for sub_cls in cls.__subclasses__():
        yield sub_cls.__name__, 1
        for sub_sub_cls in sub_cls.__subclasses__():
            yield sub_sub_cls.__name__, 2
            for sub_sub_sub_cls in sub_sub_cls.__subclasses__():
                yield sub_sub_sub_cls.__name__, 3
            
def display(cls):
    for cls_name, level in tree(cls):
        indent = ' ' * 4 * level
        print('{}{}'.format(indent, cls_name))

#display(BaseException)

In [413]:
#can traverse trees of any depth

def tree(cls):
    yield cls.__name__, 0
    yield from sub_tree(cls, 1)
    
def sub_tree(cls, level):
    for sub_cls in cls.__subclasses__():
        yield sub_cls.__name__, level
        yield from sub_tree(sub_cls, level+1)
            
def display(cls):
    for cls_name, level in tree(cls):
        indent = ' ' * 4 * level
        print('{}{}'.format(indent, cls_name))

#display(BaseException)

In [8]:
def tree(cls, level=0):
    yield cls.__name__, level
    for sub_cls in cls.__subclasses__():
        yield from tree(sub_cls, level + 1)

def display(directory):
    for item_name, level in tree(directory):
        indent = ' ' * 4 * level
        print('{}{}'.format(indent, item_name))
        
display(ArithmeticError)

ArithmeticError
    FloatingPointError
    OverflowError
    ZeroDivisionError
        DivisionByZero
        DivisionUndefined
    DecimalException
        Clamped
        Rounded
            Underflow
            Overflow
        Inexact
            Underflow
            Overflow
        Subnormal
            Underflow
        DivisionByZero
        FloatOperation
        InvalidOperation
            ConversionSyntax
            DivisionImpossible
            DivisionUndefined
            InvalidContext


In [9]:
import collections as c
display(c.abc.Collection)

Collection
    Set
        MutableSet
        KeysView
            _OrderedDictKeysView
        ItemsView
            _OrderedDictItemsView
    Mapping
        MutableMapping
            _Environ
            ChainMap
            UserDict
            WeakValueDictionary
            WeakKeyDictionary
            SignalDict
            RawConfigParser
                ConfigParser
                    SafeConfigParser
                    CaseSensitiveConfigParser
            SectionProxy
            ConverterMapping
            PickleShareDB
        _SelectorMapping
        UsedNamesMapping
    ValuesView
        _OrderedDictValuesView
    Sequence
        ByteString
        MutableSequence
            UserList
        UserString
        _PathParents
        numeric_range
        SequenceView


In [17]:
display(builtins.object)

object
    type


TypeError: unbound method type.__subclasses__() needs an argument

In [416]:
import os

def tree(directory, level=0):
    yield directory, level
    if os.path.isdir(directory): 
        for item in os.listdir(directory):
            item_path = os.path.join(directory, item)
            yield from tree(item_path, level + 1)

def display(directory):
    for item_name, level in tree(directory):
        indent = ' ' * 4 * level
        print('{}{}'.format(indent, os.path.basename(item_name)))

        
#traverses directory path 
display(r'C:\Users\jensm\Desktop\local\GitHub\pycharm_projects\vanilla_javascript')

vanilla_javascript
    hallo
        regular_expr.js
        template.html
        vanilla_javascript.js
    p5
        code.js
        code2.js
        index.html
        p5.js


In [417]:
def tree_to_dict(cls):
    subclasses = cls.__subclasses__()
    if not subclasses:
        return {cls.__name__: None}
    else:
        tree_dict = {cls.__name__: {}}
        for sub_cls in subclasses:
            tree_dict[cls.__name__].update(tree_to_dict(sub_cls))
        return tree_dict


dict1 = tree_to_dict(BaseException)
dict1['BaseException']['Exception']['ArithmeticError']['ZeroDivisionError']

{'DivisionByZero': None, 'DivisionUndefined': None}

In [4]:
import builtins

In [22]:
builtins.object.__subclasses__()

[type,
 async_generator,
 bytearray_iterator,
 bytearray,
 bytes_iterator,
 bytes,
 builtin_function_or_method,
 callable_iterator,
 PyCapsule,
 cell,
 classmethod_descriptor,
 classmethod,
 code,
 complex,
 _contextvars.Token,
 _contextvars.ContextVar,
 _contextvars.Context,
 coroutine,
 dict_items,
 dict_itemiterator,
 dict_keyiterator,
 dict_valueiterator,
 dict_keys,
 mappingproxy,
 dict_reverseitemiterator,
 dict_reversekeyiterator,
 dict_reversevalueiterator,
 dict_values,
 dict,
 ellipsis,
 enumerate,
 filter,
 float,
 frame,
 frozenset,
 function,
 generator,
 getset_descriptor,
 PyHKEY,
 instancemethod,
 list_iterator,
 list_reverseiterator,
 list,
 longrange_iterator,
 int,
 map,
 member_descriptor,
 memoryview,
 method_descriptor,
 method,
 moduledef,
 module,
 odict_iterator,
 pickle.PickleBuffer,
 property,
 range_iterator,
 range,
 reversed,
 symtable entry,
 iterator,
 set_iterator,
 set,
 slice,
 staticmethod,
 stderrprinter,
 super,
 traceback,
 tuple_iterator,
 tuple,

In [25]:
import types

In [28]:
types.AsyncGeneratorType

async_generator