## 8. Robustness and Performance

### 72 Consider Searching Sorted Sequences with `bisect`

In [1]:
data = list(range(10**5))
index = data.index(91234)
assert index == 91234

In [2]:
def find_closest(sequence, goal):
    for index, value in enumerate(sequence):
        if goal < value:
            return index
    raise ValueError(f'{goal} is out of bounds')

In [3]:
index = find_closest(data, 91234.56)
assert index == 91235

In [4]:
try:
    find_closest(data, 100000000)
except ValueError:
    pass  # Expected
else:
    assert False

In [5]:
from bisect import bisect_left

In [6]:
index = bisect_left(data, 91234)     # Exact match
assert index == 91234

index = bisect_left(data, 91234.56)  # Closest match
assert index == 91235

In [7]:
import random

In [8]:
import timeit

In [9]:
size = 10**5
iterations = 1000

data = list(range(size))
to_lookup = [random.randint(0, size)
             for _ in range(iterations)]

In [10]:
def run_linear(data, to_lookup):
    for index in to_lookup:
        data.index(index)

def run_bisect(data, to_lookup):
    for index in to_lookup:
        bisect_left(data, index)

In [11]:
baseline = timeit.timeit(
    stmt='run_linear(data, to_lookup)',
    globals=globals(),
    number=10)
print(f'Linear search takes {baseline:.6f}s')

comparison = timeit.timeit(
    stmt='run_bisect(data, to_lookup)',
    globals=globals(),
    number=10)
print(f'Bisect search takes {comparison:.6f}s')

slowdown = 1 + ((baseline - comparison) / comparison)
print(f'{slowdown:.1f}x time')

Linear search takes 7.155163s
Bisect search takes 0.006069s
1179.1x time


> - 리스트에 들어 있는 정렬된 데이터를 검색할 때 `index` 메서드를 사용하거나 `for` 루프와 맹목적인 비교를 사용하면 선형 시간이 걸린다.
> - `bisect` 내장 모듈의 `bisect_left` 함수는 정렬된 리스트에서 원하는 값을 찾는 데 로그 시간이 걸린다. 따라서 다른 접근 방법보다 훨씬 빠르다.