## Set intersecton performance

In [86]:
import numpy as np

def gen_random_set(set_len, choice_len=10000):
    return set(np.random.choice(choice_len, set_len, replace=False))
    

def gen_sets(set_count: set, set_len=100, choice_len=10000) -> list:
    return [gen_random_set(set_len, choice_len) 
            for _ in range(set_count)]


In [8]:
gen_sets(5, set_len=10)

[{227, 836, 2534, 3080, 4245, 5120, 6351, 7501, 7907, 8185},
 {2465, 2706, 3831, 4123, 4573, 4924, 5462, 5702, 5851, 6507},
 {1716, 1781, 1888, 2540, 4078, 4205, 4572, 5035, 7354, 9465},
 {56, 956, 2042, 2699, 3597, 3841, 4990, 6283, 6897, 7677},
 {1639, 2118, 4285, 4311, 4886, 5012, 5177, 6857, 7297, 8880}]

In [65]:
sets = gen_sets(100000, set_len=100, choice_len=500)
test_set = gen_random_set(100, choice_len=500)

In [66]:
def process_sets(sets: list):
    intersections = []
    for s in sets:
        intersection = s & test_set
        if len(intersection) > 20:
            intersections.append(intersection)    

In [80]:
%time process_sets(sets)

Wall time: 1.02 s


### With numpy matrices

In [60]:
cols = 100
rows = 100000
arr = np.random.choice(cols * 5, cols * rows).reshape(rows, cols)
tst = np.random.choice(cols * 5, cols)
arr

array([[218, 352, 207, ..., 150,   6,  61],
       [227, 326, 348, ...,  57, 448, 198],
       [127, 475, 439, ..., 161, 207,  89],
       ..., 
       [122,  32,  99, ..., 104, 489, 326],
       [231,  48, 337, ..., 474, 268, 271],
       [ 55, 252, 350, ..., 439, 322, 354]])

In [85]:
%%time

tf = np.isin(arr, tst)
intersections = np.count_nonzero(tf, axis=1)
active_sets = []
for i, ins_count in enumerate(intersections):
    if ins_count > 20:
        active_sets.append(set(arr[i]))

Wall time: 1.68 s


In [57]:
len(active_sets)

278

## Enums performance

In [3]:
from enum import Enum

COLOR_RED = 1
COLOR_GREEN = 2
COLOR_BLUE = 3

class Color(Enum):
    red = 1
    green = 2
    blue = 3

In [17]:
%%timeit
color1 = Color.red
color2 = Color.green
color3 = Color.blue

The slowest run took 25.82 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 351 ns per loop


In [16]:
%%timeit
color1 = COLOR_RED
color2 = COLOR_GREEN
color3 = COLOR_BLUE

The slowest run took 36.00 times longer than the fastest. This could mean that an intermediate result is being cached.
10000000 loops, best of 3: 48.8 ns per loop


## Iterators

In [4]:
def generator(count: int=10):
    for i in range(count):
        yield i

In [5]:
list(generator())

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [8]:
def letter_to_note(letter):
    return ord(letter.lower()) - ord('a')

# generator
def take_phrase_generator(text: str, phrase_len) -> list:
    for i in range(0, len(text) - phrase_len):
        yield [letter_to_note(letter) 
               for letter in text[i:i + phrase_len]]

In [9]:
text = 'ucheniteotkrilicherazlikatasedlzhinamikrobitekoitosekriiatvnaskhorataspodeliatsamoedniisshchimikrobiavsichkiostanalisastrogoindividualni'
take_phrase = take_phrase_generator(text, 5)
for i in range(10):
    print(next(take_phrase))

[20, 2, 7, 4, 13]
[2, 7, 4, 13, 8]
[7, 4, 13, 8, 19]
[4, 13, 8, 19, 4]
[13, 8, 19, 4, 14]
[8, 19, 4, 14, 19]
[19, 4, 14, 19, 10]
[4, 14, 19, 10, 17]
[14, 19, 10, 17, 8]
[19, 10, 17, 8, 11]
