# 성능과 최적화

## dict의 키 존재 여부 확인 

In [3]:
## explicit하게 확인 후 insert (비효율)

def get_fruit(basket, fruit):
    try:
        return basket[fruit]
    except KeyError:
        return None
basket = {'apple':2}
assert get_fruit(basket, 'orange') == None

In [5]:
## .get 메소드 사용 (효율적)

basket = {'apple':2}
basket.get('orange', '3')    # get or else

'3'

## set을 사용해서 존재여부 확인

In [6]:
### list 사용 (비효율)

def has_invalid_fields(fields):
    for field in fields:
        if field not in ['foo','bar']:
            return True
    return False

fields = ['a','foo','b','bar','c']
has_invalid_fields(fields)

True

In [9]:
### set 사용 (효율적)

def has_invalid_fields(fields):
    return bool(set(fields) - set(['foo', 'bar']))

fields = ['a','foo','b','bar','c']
has_invalid_fields(fields)

True

## default dict 사용

In [10]:
import collections

def add_animal(species, animal, family):
    species[family].add(animal)

species = collections.defaultdict(set)
add_animal(species, 'cat', 'felidea')

## 프로파일링으로 내부 동작 이해하기

In [None]:
python -m cProfile -o xxx.cprof XXX.py
pyprof2calltree -k -i xxx.cprof 

## dis — Disassembler for Python bytecode

In [11]:
def x():
    return 42

import dis
dis.dis(x)

  2           0 LOAD_CONST               1 (42)
              2 RETURN_VALUE


In [16]:
abc = ('a', 'b', 'c')

def concat_a_1():
    for letter in abc:
        abc[0] + letter

def concat_a_2():
    a = abc[0]   # 더 효율적
    for letter in abc:
        a + letter

In [17]:
dis.dis(concat_a_1)

  4           0 SETUP_LOOP              24 (to 26)
              2 LOAD_GLOBAL              0 (abc)
              4 GET_ITER
        >>    6 FOR_ITER                16 (to 24)
              8 STORE_FAST               0 (letter)

  5          10 LOAD_GLOBAL              0 (abc)
             12 LOAD_CONST               1 (0)
             14 BINARY_SUBSCR
             16 LOAD_FAST                0 (letter)
             18 BINARY_ADD
             20 POP_TOP
             22 JUMP_ABSOLUTE            6
        >>   24 POP_BLOCK
        >>   26 LOAD_CONST               0 (None)
             28 RETURN_VALUE


In [18]:
dis.dis(concat_a_2)

  8           0 LOAD_GLOBAL              0 (abc)
              2 LOAD_CONST               1 (0)
              4 BINARY_SUBSCR
              6 STORE_FAST               0 (a)

  9           8 SETUP_LOOP              20 (to 30)
             10 LOAD_GLOBAL              0 (abc)
             12 GET_ITER
        >>   14 FOR_ITER                12 (to 28)
             16 STORE_FAST               1 (letter)

 10          18 LOAD_FAST                0 (a)
             20 LOAD_FAST                1 (letter)
             22 BINARY_ADD
             24 POP_TOP
             26 JUMP_ABSOLUTE           14
        >>   28 POP_BLOCK
        >>   30 LOAD_CONST               0 (None)
             32 RETURN_VALUE


### 함수 내의 함수 구현하면 비효율적 

In [19]:
def x():
    def y():
        return 42
    return y()

In [20]:
dis.dis(x)

  2           0 LOAD_CONST               1 (<code object y at 0x7fd05057bc90, file "<ipython-input-19-11b7e491a2dc>", line 2>)
              2 LOAD_CONST               2 ('x.<locals>.y')
              4 MAKE_FUNCTION            0
              6 STORE_FAST               0 (y)

  4           8 LOAD_FAST                0 (y)
             10 CALL_FUNCTION            0
             12 RETURN_VALUE


### 함수 내의 클로저 함수인 경우는 괜찮음 

In [21]:
def x(a):
    def y():
        return a
    return y()

In [22]:
dis.dis(x)

  2           0 LOAD_CLOSURE             0 (a)
              2 BUILD_TUPLE              1
              4 LOAD_CONST               1 (<code object y at 0x7fd05425a030, file "<ipython-input-21-c8c13f2768ff>", line 2>)
              6 LOAD_CONST               2 ('x.<locals>.y')
              8 MAKE_FUNCTION            8
             10 STORE_FAST               1 (y)

  4          12 LOAD_FAST                1 (y)
             14 CALL_FUNCTION            0
             16 RETURN_VALUE


## 정렬된 리스트와 bisset

* 탐색 시간 : O(n) => O(logN)
* 항목을 삽입해야 하는 위치 리턴

In [38]:
import bisect

mylist = sorted(['b', 'a', 'c'])
mylist

['a', 'b', 'c']

In [39]:
bisect.bisect(mylist, 'b')   

2

In [40]:
bisect.bisect_left(mylist, 'b')

1

In [41]:
bisect.bisect(mylist, 'c')

3

In [42]:
bisect.bisect(mylist, 'd')

3

In [43]:
bisect.bisect_left(mylist, 'd')

3

## 네임드튜플과 슬롯

In [3]:
class Point(object):
    def __init__(self, x, y):
        self.x = x 
        self.y = y
        
p = Point(1,2)
p.z = 3
p.__dict__

{'x': 1, 'y': 2, 'z': 3}

In [6]:
# 클래스 객체는 메모리 비효율적

!cat ch10/object.py

from memory_profiler import profile 
class Point(object):
    def __init__(self, x):
        self.x = x 

@profile
def main():
    p = [Point(42) for i in range(100000)]

if __name__ == "__main__":
    main()
  


In [None]:
$ python -m memory_profiler ch10/object.py 
Filename: ch10/object.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
     6     36.9 MiB     36.9 MiB           1   @profile
     7                                         def main():
     8     54.3 MiB     17.4 MiB      100003       p = [Point(42) for i in range(100000)]


In [7]:
# __slot__  속성 사용을 통한 메모리 절감

!cat ch10/slot.py

from memory_profiler import profile 
class Point(object):

    __slots__ = ('x', )

    def __init__(self, x):
        self.x = x 

@profile
def main():
    p = [Point(42) for i in range(10000)]

if __name__ == "__main__":
    main()
  


In [None]:
$ python  -m memory_profiler ch10/slot.py 
Filename: ch10/slot.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
     9     37.2 MiB     37.2 MiB           1   @profile
    10                                         def main():
    11     43.1 MiB      5.9 MiB      100003       p = [Point(42) for i in range(100000)]


In [8]:
# namedtuple을 사용한 메모리 절감 
!cat ch10/namedtuple.py

from memory_profiler import profile 
import collections


@profile
def main():

    Point = collections.namedtuple('Point', ['x'])

    p = [Point(42) for i in range(10000)]

if __name__ == "__main__":
    main()
  


In [None]:
$ python  -m memory_profiler ch10/namedtuple.py 
Filename: ch10/namedtuple.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
     5     37.0 MiB     37.0 MiB           1   @profile
     6                                         def main():
     7                                         
     8     37.0 MiB      0.0 MiB           1       Point = collections.namedtuple('Point', ['x'])
     9                                         
    10     44.3 MiB      7.4 MiB      100003       p = [Point(42) for i in range(100000)]


## 메모이제이션 

* https://docs.python.org/3/library/functools.html

In [11]:
# 순수 함수일 경우에만 캐시

import math
_SIN_CACHE = {}
def memoized_sin(x):
    if x not in _SIN_CACHE:
        _SIN_CACHE[x] = math.sin(x)
    else:
        print('cache hit')
    return _SIN_CACHE[x]

In [12]:
memoized_sin(1)

0.8414709848078965

In [13]:
memoized_sin(1)

cache hit


0.8414709848078965

In [20]:
# functools.lru_cache를 사용한 메모이제이션

import functools
import math
@functools.lru_cache(maxsize=2)
def memoized_sin(x):
    return math.sin(x)

In [21]:
memoized_sin(1)

0.8414709848078965

In [22]:
memoized_sin.cache_info()

CacheInfo(hits=0, misses=1, maxsize=2, currsize=1)

In [23]:
memoized_sin(1)

0.8414709848078965

In [24]:
memoized_sin.cache_info()

CacheInfo(hits=1, misses=1, maxsize=2, currsize=1)

In [25]:
memoized_sin.cache_clear()

In [26]:
memoized_sin.cache_info()

CacheInfo(hits=0, misses=0, maxsize=2, currsize=0)

## 버퍼 프로토콜로 제로 복사하기 

* memoryview

In [34]:
%%writefile ch10/memscript.py

from memory_profiler import profile 

@profile
def read_random():
    with open('/dev/urandom', 'rb') as source:
        content = source.read(1024*10000)
        content_to_write = content[1024:]   # <--- 실제는 복사
    print('Content length: %d, content to write length: %d' % (len(content), len(content_to_write)))
    
    with open('/dev/null', 'wb') as target:
        target.write(content_to_write)
        
if __name__ == "__main__":
    read_random()        

Overwriting ch10/memscript.py


In [None]:
$> python -m memory_profiler ch10/memscript.py 
Content length: 10240000, content to write length: 10238976
Filename: ch10/memscript.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
     4     37.0 MiB     37.0 MiB           1   @profile
     5                                         def read_random():
     6     37.0 MiB      0.0 MiB           1       with open('/dev/urandom', 'rb') as source:
     7     46.5 MiB      9.5 MiB           1           content = source.read(1024*10000)
     8     56.3 MiB      9.8 MiB           1           content_to_write = content[1024:]   # <--- 실제는 복사
                 ....

### memoryview로 연속 메모리 공간으로 옮긴 후 기본 포인터 연산으로 제로 복사 


In [35]:
s = b'abcdefgh'
view = memoryview(s)

In [36]:
view[1]

98

In [37]:
limited = view[1:3]
limited

<memory at 0x7fbecd7717c8>

In [39]:
bytes(limited)

b'bc'

In [40]:
%%writefile ch10/copy-memoryview.py

from memory_profiler import profile 

@profile
def read_random():
    with open('/dev/urandom', 'rb') as source:
        content = source.read(1024*10000)
        content_to_write = memoryview(content)[1024:]   # <--- 제로 복사
    print('Content length: %d, content to write length: %d' % (len(content), len(content_to_write)))
    
    with open('/dev/null', 'wb') as target:
        target.write(content_to_write)
        
if __name__ == "__main__":
    read_random()      

Writing ch10/copy-memoryview.py


In [None]:
$> python -m memory_profiler ch10/copy-memoryview.py
Content length: 10240000, content to write length: 10238976
Filename: ch10/copy-memoryview.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
     4     37.0 MiB     37.0 MiB           1   @profile
     5                                         def read_random():
     6     37.0 MiB      0.0 MiB           1       with open('/dev/urandom', 'rb') as source:
     7     46.5 MiB      9.5 MiB           1           content = source.read(1024*10000)
     8     46.5 MiB      0.0 MiB           1           content_to_write = memoryview(content)[1024:]   # <--- 제로 복사
                      ......

In [45]:
%%writefile ch10/bytearray.py

from memory_profiler import profile 

@profile
def read_random():
    ba = bytearray(1024*10000)
    ba_at_1024 = memoryview(ba)[1024:]
    
    with open('/dev/urandom', 'rb') as source:
        source.readinto(ba_at_1024)
        content = ba
        content_to_write = ba_at_1024   # <--- 제로 복사
    print('Content length: %d, content to write length: %d' % (len(content), len(content_to_write)))
    
    with open('/dev/null', 'wb') as target:
        target.write(content_to_write)
        
if __name__ == "__main__":
    read_random()  

Writing ch10/bytearray.py


In [None]:
$> python -m memory_profiler ch10/bytearray.py
Content length: 10240000, content to write length: 10238976
Filename: ch10/bytearray.py

Line #    Mem usage    Increment  Occurences   Line Contents
============================================================
     4     36.9 MiB     36.9 MiB           1   @profile
     5                                         def read_random():
     6     46.5 MiB      9.5 MiB           1       ba = bytearray(1024*10000)
     7     46.5 MiB      0.0 MiB           1       ba_at_1024 = memoryview(ba)[1024:]
     8                                             
     9     46.5 MiB      0.0 MiB           1       with open('/dev/urandom', 'rb') as source:
    10     46.5 MiB      0.0 MiB           1           source.readinto(ba_at_1024)
    11     46.5 MiB      0.0 MiB           1           content = ba
    12     46.5 MiB      0.0 MiB           1           content_to_write = ba_at_1024   # <--- 제로 복사
                  .........  