# Chapter 2. 프로파일링으로 병목 지점 찾기

## 2.3 전체 줄리아 집합 계산하기

**예제 2-1** 좌표공간을 위한 전역 상수 선언

In [2]:
""" PIL기반의 이미지 생성을 제외한 줄리아 집합 생성기 """
import time

# 계산할 복소 평면 역역
x1, x2, y1, y2 = -1.8, 1.8, -1.8, 1.8
c_real, c_imag = -0.62772, -0.42193

**예제 2-2** 계산 함수에 입력으로 넘길 좌표 리스트 생성

In [3]:
def calc_pure_python(desired_width, max_iterations):
    """ 복소 좌표(zs)와 복소 인자(cs) 리스트를 만들고, 줄리아 집합을 생성한다. """
    x_step = (x2-x1) / desired_width
    y_step = (y1-y2) / desired_width
    x = []
    y = []
    
    ycoord = y2
    while ycoord > y1:
        y.append(ycoord)
        ycoord += y_step
    xcoord = x1
    while xcoord < x2:
        x.append(xcoord)
        xcoord += x_step
        
    # 좌표 리스트와 각 셀의 초기 조건을 만든다.
    # 초기 조건은 상수이며 쉽게 제거할 수 있음에 주목하자.
    # 우리가 만든 함수의 몇몇 입력을 사용한 실제 시나리오를 시뮬레이션할 때 사용한다.
    zs = []
    cs = []
    for ycoord in y:
        for xcoord in x:
            zs.append(complex(xcoord, ycoord))
            cs.append(complex(c_real, c_imag))
    
    print("Length of x:", len(x))
    print("Total elements:", len(zs))
    start_time = time.time()
    output = calculate_z_serial_purepython(max_iterations, zs, cs)
    end_time = time.time()
    secs = end_time - start_time
    print(calculate_z_serial_purepython.__name__ + " took", secs, "seconds")
    
    # 다음 sum은 1000^2 그리드에 반복 300번을 가정한 값이다.
    # 우리가 의도한 대로 좌표가 변화하는지 확인한다.
    assert sum(output) == 33219980

**예제 2-3** CPU를 집중적으로 사용하는 계산 함수

In [4]:
def calculate_z_serial_purepython(maxiter, zs, cs):
    """ 줄리아 갱신 규칙을 사용해서 output 리스트 계산하기 """
    output = [0] * len(zs)
    for i in range(len(zs)):
        n = 0
        z = zs[i]
        c = cs[i]
        while abs(z) < 2 and n < maxiter:
            z = z * z + c
            n += 1
        output[i] = n
    return output

**예제 2-4** 코드의 __main__

In [5]:
if __name__ == "__main__":
    # 노트북 컴퓨터에 적절한 기본값으로 줄리아 집합을 구하는 순수 파이썬 구현
    calc_pure_python(desired_width=1000, max_iterations=300)

Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 7.923752784729004 seconds


## 2.4 시간을 측정하는 간단한 방법: print와 데커레이터

**예제 2-5** 시간 측정 자동화를 위한 데커레이터 정의

In [9]:
"""Julia set generator with timing decorator"""
import time
from functools import wraps

# area of complex space to investigate
x1, x2, y1, y2 = -1.8, 1.8, -1.8, 1.8
c_real, c_imag = -0.62772, -.42193


def timefn(fn):
    @wraps(fn)
    def measure_time(*args, **kwargs):
        t1 = time.time()
        result = fn(*args, **kwargs)
        t2 = time.time()
        print(f"@timefn: {fn.__name__} took {t2 - t1} seconds")
        return result
    return measure_time


@timefn
def calculate_z_serial_purepython(maxiter, zs, cs):
    """ 줄리아 갱신 규칙을 사용해서 output 리스트 계산하기 """
    output = [0] * len(zs)
    for i in range(len(zs)):
        n = 0
        z = zs[i]
        c = cs[i]
        while abs(z) < 2 and n < maxiter:
            z = z * z + c
            n += 1
        output[i] = n
    return output


def calc_pure_python(draw_output, desired_width, max_iterations):
    """ 복소 좌표(zs)와 복소 인자(cs) 리스트를 만들고, 줄리아 집합을 생성한다. """
    x_step = (x2 - x1) / desired_width
    y_step = (y1 - y2) / desired_width
    x = []
    y = []
    ycoord = y2
    while ycoord > y1:
        y.append(ycoord)
        ycoord += y_step
    xcoord = x1
    while xcoord < x2:
        x.append(xcoord)
        xcoord += x_step
    # 좌표 리스트와 각 셀의 초기 조건을 만든다.
    # 초기 조건은 상수이며 쉽게 제거할 수 있음에 주목하자.
    # 우리가 만든 함수의 몇몇 입력을 사용한 실제 시나리오를 시뮬레이션할 때 사용한다.
    zs = []
    cs = []
    for ycoord in y:
        for xcoord in x:
            zs.append(complex(xcoord, ycoord))
            cs.append(complex(c_real, c_imag))

    print("Length of x:", len(x))
    print("Total elements:", len(zs))
    start_time = time.time()
    output = calculate_z_serial_purepython(max_iterations, zs, cs)
    end_time = time.time()
    secs = end_time - start_time
    print(calculate_z_serial_purepython.__name__ + " took", secs, "seconds")

    # 다음 sum은 1000^2 그리드에 반복 300번을 가정한 값이다.
    # 우리가 의도한 대로 좌표가 변화하는지 확인한다.
    assert sum(output) == 33219980


# Calculate the Julia set using a pure Python solution with
# reasonable defaults for a laptop
# set draw_output to True to use PIL to draw an image
calc_pure_python(draw_output=False, desired_width=1000, max_iterations=300)

Length of x: 1000
Total elements: 1000000
@timefn: calculate_z_serial_purepython took 7.999523162841797 seconds
calculate_z_serial_purepython took 7.999523162841797 seconds


커맨드라인에서 timeit 모듈을 사용하면, 다음과 같이 출력된다.  
(julia1.calc_pure_python에 False 대신 True를 넣으면 이미지도 떠야하는데 우분투에서는 왜 안뜨는 것임? help!!)

<span style="font-family: monospace"> ~# python3 -m timeit -n 5 -r 1 -s "import julia1" "julia1.calc_pure_python(False, desired_width=1000, max_iterations=300)"<br>
Length of x: 1000<br>
Total elements: 1000000<br>
calculate_z_serial_purepython took 4.655411958694458 seconds <br>
Length of x: 1000<br>
Total elements: 1000000<br>
calculate_z_serial_purepython took 4.67050313949585 seconds<br>
Length of x: 1000<br>
Total elements: 1000000<br>
calculate_z_serial_purepython took 4.703853130340576 seconds<br>
Length of x: 1000<br>
Total elements: 1000000<br>
calculate_z_serial_purepython took 4.689031362533569 seconds<br>
Length of x: 1000<br>
Total elements: 1000000<br>
calculate_z_serial_purepython took 4.6835267543792725 seconds<br>
5 loops, best of 1: 4.96 sec per loop<br>
</span>

In [1]:
import julia1
%timeit julia1.calc_pure_python(True, desired_width=1000, max_iterations=300) # True 혹은 False를 넣어줘야함

Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 8.46941590309143 seconds
300.0
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 9.302115678787231 seconds
300.0
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 9.113418102264404 seconds
300.0
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 8.563599586486816 seconds
300.0
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 8.551601886749268 seconds
300.0
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 9.385418176651001 seconds
300.0
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 9.170109033584595 seconds
300.0
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 22.674567699432373 seconds
300.0
13.5 s ± 4.93 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 2.5 유닉스 time 명령어를 이용한 간단한 시간 측정

<span style="font-family: monospace">~# /usr/bin/time -p python3 julia1_nopil.py<br>
Length of x: 1000<br>
Total elements: 1000000<br>
calculate_z_serial_purepython took 4.511674404144287 seconds<br>
real 4.79<br>
user 4.77<br>
sys 0.02</span>

<span style="font-family: monospace">~# /usr/bin/time --verbose python3 julia1_nopil.py<br>
Length of x: 1000<br>
Total elements: 1000000<br>
calculate_z_serial_purepython took 4.490870237350464 seconds<br>
&nbsp;&nbsp;&nbsp;&nbsp; Command being timed: "python3 julia1_nopil.py"<br>
&nbsp;&nbsp;&nbsp;&nbsp; User time (seconds): 4.73<br>
&nbsp;&nbsp;&nbsp;&nbsp; System time (seconds): 0.03<br>
&nbsp;&nbsp;&nbsp;&nbsp; Percent of CPU this job got: 100%<br>
&nbsp;&nbsp;&nbsp;&nbsp; Elapsed (wall clock) time (h:mm:ss or m:ss): 0:04.76<br>
&nbsp;&nbsp;&nbsp;&nbsp; Average shared text size (kbytes): 0<br>
&nbsp;&nbsp;&nbsp;&nbsp; Average unshared data size (kbytes): 0<br>
&nbsp;&nbsp;&nbsp;&nbsp; Average stack size (kbytes): 0<br>
&nbsp;&nbsp;&nbsp;&nbsp; Average total size (kbytes): 0<br>
&nbsp;&nbsp;&nbsp;&nbsp; Maximum resident set size (kbytes): 99540<br>
&nbsp;&nbsp;&nbsp;&nbsp; Average resident set size (kbytes): 0<br>
&nbsp;&nbsp;&nbsp;&nbsp; Major (requiring I/O) page faults: 0<br>
&nbsp;&nbsp;&nbsp;&nbsp; Minor (reclaiming a frame) page faults: 24087<br>
&nbsp;&nbsp;&nbsp;&nbsp; Voluntary context switches: 1<br>
&nbsp;&nbsp;&nbsp;&nbsp; Involuntary context switches: 6<br>
&nbsp;&nbsp;&nbsp;&nbsp; Swaps: 0<br>
&nbsp;&nbsp;&nbsp;&nbsp; File system inputs: 0<br>
&nbsp;&nbsp;&nbsp;&nbsp; File system outputs: 0<br>
&nbsp;&nbsp;&nbsp;&nbsp; Socket messages sent: 0<br>
&nbsp;&nbsp;&nbsp;&nbsp; Socket messages received: 0<br>
&nbsp;&nbsp;&nbsp;&nbsp; Signals delivered: 0<br>
&nbsp;&nbsp;&nbsp;&nbsp; Page size (bytes): 4096<br>
&nbsp;&nbsp;&nbsp;&nbsp; Exit status: 0</span>

## 2.6 cProfile 모듈 사용하기

In [8]:
~# python3 -m cProfile -s cumulative julia1_nopil.py
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 7.267011880874634 seconds
         36221995 function calls in 7.703 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    7.703    7.703 {built-in method builtins.exec}
        1    0.026    0.026    7.703    7.703 julia1_nopil.py:1(<module>)
        1    0.321    0.321    7.677    7.677 julia1_nopil.py:23(calc_pure_python)
        1    5.299    5.299    7.267    7.267 julia1_nopil.py:9(calculate_z_serial_purepython)
 34219980    1.968    0.000    1.968    0.000 {built-in method builtins.abs}
  2002000    0.085    0.000    0.085    0.000 {method 'append' of 'list' objects}
        1    0.004    0.004    0.004    0.004 {built-in method builtins.sum}
        3    0.000    0.000    0.000    0.000 {built-in method builtins.print}
        4    0.000    0.000    0.000    0.000 {built-in method builtins.len}
        2    0.000    0.000    0.000    0.000 {built-in method time.time}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 7)

커맨드라인에 다음을 입력하면 profile.stats이라는 통계 파일이 생성된다.  
<span style="font-family: monospace">~# python3 -m cProfile -o profile.stats julia1_nopil.py</span>

In [4]:
import pstats
p = pstats.Stats("profile.stats")

In [5]:
p.sort_stats("cumulative")

<pstats.Stats at 0x207c5233f10>

In [6]:
p.print_stats()

Mon Feb  7 13:51:51 2022    profile.stats

         36221995 function calls in 7.761 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    7.761    7.761 {built-in method builtins.exec}
        1    0.026    0.026    7.761    7.761 julia1_nopil.py:1(<module>)
        1    0.333    0.333    7.736    7.736 julia1_nopil.py:23(calc_pure_python)
        1    5.339    5.339    7.316    7.316 julia1_nopil.py:9(calculate_z_serial_purepython)
 34219980    1.977    0.000    1.977    0.000 {built-in method builtins.abs}
  2002000    0.082    0.000    0.082    0.000 {method 'append' of 'list' objects}
        1    0.004    0.004    0.004    0.004 {built-in method builtins.sum}
        3    0.000    0.000    0.000    0.000 {built-in method builtins.print}
        4    0.000    0.000    0.000    0.000 {built-in method builtins.len}
        2    0.000    0.000    0.000    0.000 {built-in method time.time}
      

<pstats.Stats at 0x207c5233f10>

In [9]:
p.print_callers()

   Ordered by: cumulative time

Function                                          was called by...
                                                      ncalls  tottime  cumtime
{built-in method builtins.exec}                   <- 
julia1_nopil.py:1(<module>)                       <-       1    0.026    7.761  {built-in method builtins.exec}
julia1_nopil.py:23(calc_pure_python)              <-       1    0.333    7.736  julia1_nopil.py:1(<module>)
julia1_nopil.py:9(calculate_z_serial_purepython)  <-       1    5.339    7.316  julia1_nopil.py:23(calc_pure_python)
{built-in method builtins.abs}                    <- 34219980    1.977    1.977  julia1_nopil.py:9(calculate_z_serial_purepython)
{method 'append' of 'list' objects}               <- 2002000    0.082    0.082  julia1_nopil.py:23(calc_pure_python)
{built-in method builtins.sum}                    <-       1    0.004    0.004  julia1_nopil.py:23(calc_pure_python)
{built-in method builtins.print}                  <-       3    0.0

<pstats.Stats at 0x207c5233f10>

In [10]:
p.print_callees()

   Ordered by: cumulative time

Function                                          called...
                                                      ncalls  tottime  cumtime
{built-in method builtins.exec}                   ->       1    0.026    7.761  julia1_nopil.py:1(<module>)
julia1_nopil.py:1(<module>)                       ->       1    0.333    7.736  julia1_nopil.py:23(calc_pure_python)
julia1_nopil.py:23(calc_pure_python)              ->       1    5.339    7.316  julia1_nopil.py:9(calculate_z_serial_purepython)
                                                           2    0.000    0.000  {built-in method builtins.len}
                                                           3    0.000    0.000  {built-in method builtins.print}
                                                           1    0.004    0.004  {built-in method builtins.sum}
                                                           2    0.000    0.000  {built-in method time.time}
                                

<pstats.Stats at 0x207c5233f10>

## 2.7 SnakeViz로 cProfile 결과 시각화하기

<span style="font-family: monospace">~# pip install snakeviz<br>
~# python3 -m snakeviz profile.stats</span>

![profile](./profile.png)

## 2.8 line_profiler로 한 줄씩 측정하기

**예제 2-6** kernprof를 사용해 프로파일해서 각 줄의 CPU 비용 기록하기  
<span style="font-family: monospace">~# pip install line_profiler<br>
~# kernprof -l -v julia1_lineprofiler.py</span>

In [None]:
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 59.30600833892822 seconds
Wrote profile results to julia1_lineprofiler.py.lprof
Timer unit: 1e-06 s

Total time: 33.2728 s
File: julia1_lineprofiler.py
Function: calculate_z_serial_purepython at line 9

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     9                                           @profile
    10                                           def calculate_z_serial_purepython(maxiter, zs, cs):
    11                                               """Calculate output list using Julia update rule"""
    12         1       1716.0   1716.0      0.0      output = [0] * len(zs)
    13   1000001     265213.0      0.3      0.8      for i in range(len(zs)):
    14   1000000     247479.0      0.2      0.7          n = 0
    15   1000000     285812.0      0.3      0.9          z = zs[i]
    16   1000000     267809.0      0.3      0.8          c = cs[i]
    17  34219980   12569481.0      0.4     37.8          while abs(z) < 2 and n < maxiter:
    18  33219980   10285877.0      0.3     30.9              z = z * z + c
    19  33219980    9063279.0      0.3     27.2              n += 1
    20   1000000     286160.0      0.3      0.9          output[i] = n
    21         1          1.0      1.0      0.0      return output

**예제 2-7** while 문을 나눠서 각각의 소요 시간 측정하기  
<span style="font-family: monospace">~# kernprof -l -v julia1_lineprofiler2.py</span>

In [None]:
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 96.45154166221619 seconds
Wrote profile results to julia1_lineprofiler2.py.lprof
Timer unit: 1e-06 s

Total time: 51.9761 s
File: julia1_lineprofiler2.py
Function: calculate_z_serial_purepython at line 9

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     9                                           @profile
    10                                           def calculate_z_serial_purepython(maxiter, zs, cs):
    11                                               """Calculate output list using Julia update rule"""
    12         1       1185.0   1185.0      0.0      output = [0] * len(zs)
    13   1000001     272612.0      0.3      0.5      for i in range(len(zs)):
    14   1000000     263183.0      0.3      0.5          n = 0
    15   1000000     292065.0      0.3      0.6          z = zs[i]
    16   1000000     273171.0      0.3      0.5          c = cs[i]
    17                                                   while True:
    18  34219980   11745605.0      0.3     22.6              not_yet_escaped = abs(z) < 2
    19  34219980    9680679.0      0.3     18.6              iterations_left = n < maxiter
    20  34219980    9177336.0      0.3     17.7              if not_yet_escaped and iterations_left:
    21  33219980   10740037.0      0.3     20.7                  z = z * z + c
    22  33219980    9243374.0      0.3     17.8                  n += 1
    23                                                       else:
    24                                                           break
    25   1000000     286884.0      0.3      0.6          output[i] = n
    26         1          1.0      1.0      0.0      return output

Total time: 98.0547 s
File: julia1_lineprofiler2.py
Function: calc_pure_python at line 29

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    29                                           @profile
    30                                           def calc_pure_python(draw_output, desired_width, max_iterations):
    31                                               """Create a list of complex co-ordinates (zs) and complex parameters (cs), build Julia set and display"""
    32         1          3.0      3.0      0.0      x_step = (x2 - x1) / desired_width
    33         1          1.0      1.0      0.0      y_step = (y1 - y2) / desired_width
    34         1          1.0      1.0      0.0      x = []
    35         1          0.0      0.0      0.0      y = []
    36         1          0.0      0.0      0.0      ycoord = y2
    37      1001        423.0      0.4      0.0      while ycoord > y1:
    38      1000        378.0      0.4      0.0          y.append(ycoord)
    39      1000        404.0      0.4      0.0          ycoord += y_step
    40         1          1.0      1.0      0.0      xcoord = x1
    41      1001        410.0      0.4      0.0      while xcoord < x2:
    42      1000        421.0      0.4      0.0          x.append(xcoord)
    43      1000        384.0      0.4      0.0          xcoord += x_step
    44                                               # set width and height to the generated pixel counts, rather than the
    45                                               # pre-rounding desired width and height
    46                                               # build a list of co-ordinates and the initial condition for each cell.
    47                                               # Note that our initial condition is a constant and could easily be removed,
    48                                               # we use it to simulate a real-world scenario with several inputs to our function
    49         1          0.0      0.0      0.0      zs = []
    50         1          0.0      0.0      0.0      cs = []
    51      1001        541.0      0.5      0.0      for ycoord in y:
    52   1001000     439254.0      0.4      0.4          for xcoord in x:
    53   1000000     575411.0      0.6      0.6              zs.append(complex(xcoord, ycoord))
    54   1000000     581696.0      0.6      0.6              cs.append(complex(c_real, c_imag))
    55
    56         1        105.0    105.0      0.0      print("Length of x:", len(x))
    57         1          8.0      8.0      0.0      print("Total elements:", len(zs))
    58         1          6.0      6.0      0.0      start_time = time.time()
    59         1   96451531.0 96451531.0     98.4      output = calculate_z_serial_purepython(max_iterations, zs, cs)
    60         1          4.0      4.0      0.0      end_time = time.time()
    61         1          1.0      1.0      0.0      secs = end_time - start_time
    62         1         72.0     72.0      0.0      print(calculate_z_serial_purepython.__name__ + " took", secs, "seconds")
    63
    64         1       3598.0   3598.0      0.0      assert sum(output) == 33219980  # this sum is expected for 1000^2 grid with 300 iterations

timeit으로 각 명령의 개별비용 검사

In [5]:
z = 0 + 0j

In [6]:
%timeit abs(z) < 2

118 ns ± 2.44 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [8]:
n = 1
maxiter = 300

In [9]:
%timeit n < maxiter

62.6 ns ± 1.64 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


**예제 2-8** while 문의 조건 검사 순서를 바꿔 실행 속도를 약간 개선함  
<span style="font-family: monospace">~# kernprof -l -v julia1_lineprofiler3.py</span>

In [None]:
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 62.51071810722351 seconds
Wrote profile results to julia1_lineprofiler3.py.lprof
Timer unit: 1e-06 s

Total time: 35.441 s
File: julia1_lineprofiler3.py
Function: calculate_z_serial_purepython at line 9

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     9                                           @profile
    10                                           def calculate_z_serial_purepython(maxiter, zs, cs):
    11                                               """Calculate output list using Julia update rule"""
    12         1       1383.0   1383.0      0.0      output = [0] * len(zs)
    13   1000001     291720.0      0.3      0.8      for i in range(len(zs)):
    14   1000000     261582.0      0.3      0.7          n = 0
    15   1000000     299939.0      0.3      0.8          z = zs[i]
    16   1000000     283320.0      0.3      0.8          c = cs[i]
    17  34219980   13466685.0      0.4     38.0          while n < maxiter and abs(z) < 2:
    18  33219980   10839830.0      0.3     30.6              z = z * z + c
    19  33219980    9686317.0      0.3     27.3              n += 1
    20   1000000     310269.0      0.3      0.9          output[i] = n
    21         1          1.0      1.0      0.0      return output

**예제 2-9** 설정 코드의 비용을 줄 단위로 테스트하기

In [None]:
Total time: 63.9461 s
File: julia1_lineprofiler3.py
Function: calc_pure_python at line 24

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    24                                           @profile
    25                                           def calc_pure_python(draw_output, desired_width, max_iterations):
    26                                               """Create a list of complex co-ordinates (zs) and complex parameters (cs), build Julia set and display"""
    27         1          3.0      3.0      0.0      x_step = (x2 - x1) / desired_width
    28         1          0.0      0.0      0.0      y_step = (y1 - y2) / desired_width
    29         1          0.0      0.0      0.0      x = []
    30         1          0.0      0.0      0.0      y = []
    31         1          1.0      1.0      0.0      ycoord = y2
    32      1001        452.0      0.5      0.0      while ycoord > y1:
    33      1000        402.0      0.4      0.0          y.append(ycoord)
    34      1000        430.0      0.4      0.0          ycoord += y_step
    35         1          0.0      0.0      0.0      xcoord = x1
    36      1001        457.0      0.5      0.0      while xcoord < x2:
    37      1000        448.0      0.4      0.0          x.append(xcoord)
    38      1000        444.0      0.4      0.0          xcoord += x_step
    39                                               # set width and height to the generated pixel counts, rather than the
    40                                               # pre-rounding desired width and height
    41                                               # build a list of co-ordinates and the initial condition for each cell.
    42                                               # Note that our initial condition is a constant and could easily be removed,
    43                                               # we use it to simulate a real-world scenario with several inputs to our function
    44         1          0.0      0.0      0.0      zs = []
    45         1          1.0      1.0      0.0      cs = []
    46      1001        459.0      0.5      0.0      for ycoord in y:
    47   1001000     391292.0      0.4      0.6          for xcoord in x:
    48   1000000     510770.0      0.5      0.8              zs.append(complex(xcoord, ycoord))
    49   1000000     526604.0      0.5      0.8              cs.append(complex(c_real, c_imag))
    50
    51         1         59.0     59.0      0.0      print("Length of x:", len(x))
    52         1          6.0      6.0      0.0      print("Total elements:", len(zs))
    53         1          4.0      4.0      0.0      start_time = time.time()
    54         1   62510708.0 62510708.0     97.8      output = calculate_z_serial_purepython(max_iterations, zs, cs)
    55         1          5.0      5.0      0.0      end_time = time.time()
    56         1          1.0      1.0      0.0      secs = end_time - start_time
    57         1         79.0     79.0      0.0      print(calculate_z_serial_purepython.__name__ + " took", secs, "seconds")
    58
    59         1       3513.0   3513.0      0.0      assert sum(output) == 33219980  # this sum is expected for 1000^2 grid with 300 iterations

## 2.9 memory_profiler로 메모리 사용량 진단하기

메모리를 프로파일하면 코드의 실행속도는 평소보다 10배에서 100배까지 느려진다. 보통 memory_profiler보다 line_profiler를 더 자주 사용한다.  
  
<span style="font-family: monospace">~# pip install memory_profiler<br>
~# pip install psutil</span>

**예제 2-10** calculate_z_serial_purepython에서 예상치 못한 메모리 사용을 보여주는 memory_profiler의 결과  
(2시간이나 걸린다고 하니 그냥 책을 보자...)

In [10]:
pip install memory_profiler

Collecting memory_profiler
  Downloading memory_profiler-0.60.0.tar.gz (38 kB)
Building wheels for collected packages: memory-profiler
  Building wheel for memory-profiler (setup.py): started
  Building wheel for memory-profiler (setup.py): finished with status 'done'
  Created wheel for memory-profiler: filename=memory_profiler-0.60.0-py3-none-any.whl size=31276 sha256=b669e12175626a5c47537ebccda570f24e4a32189f1eea6a7b5afaf9bb9581d6
  Stored in directory: c:\users\dydzm\appdata\local\pip\cache\wheels\01\ca\8b\b518dd2aef69635ad6fcab87069c9c52f355a2e9c5d4c02da9
Successfully built memory-profiler
Installing collected packages: memory-profiler
Successfully installed memory-profiler-0.60.0
Note: you may need to restart the kernel to use updated packages.


In [11]:
pip install psutil

Note: you may need to restart the kernel to use updated packages.


**그림 2-6** mprof를 이용한 memory_profiler 그래프

In [11]:
from subprocess import call

In [24]:
call(["mprof", "run", "julia1_memoryprofiler1.py"])
call(["mprof", "plot", "-o", "mprof1.png"])

0

![mprof1](./mprof1.png)

**예제 2-11** 컨텍스트 관리자를 사용해서 mprof 그래프에 라벨 추가하기

In [None]:
@profile
def calculate_z_serial_purepython(maxiter, zs, cs):
    """Calculate output list using Julia update rule"""
    with profile.timestamp("create_output_list"):
        output = [0] * len(zs)
    time.sleep(1)
    with profile.timestamp("calculate_output"):
        for i in range(len(zs)):
            n = 0
            z = zs[i]
            c = cs[i]
            while n < maxiter and abs(z) < 2:
                z = z * z + c
                n += 1
            output[i] = n
    return output

**그림 2-7** 라벨과 mprof를 함께 이용한 memory_profiler 리포트

In [25]:
# julia1_memoryprofiler.py를 위와 같이 수정한 후 다음을 실행
call(["mprof", "run", "julia1_memoryprofiler2.py"])
call(["mprof", "plot", "-o", "mprof2.png"])

0

![mprof2](./mprof2.png)

**예제 2-12** 복소수 좌표를 즉시 만들어 RAM을 절약하기

In [None]:
@profile
def calculate_z_serial_purepython(maxiter, x, y):
    """Calculate output list using Julia update rule"""
    output = []
    for ycoord in y:
        for xcoord in x:
            z = complex(xcoord, ycoord)
            c = complex(c_real, c_imga)
            n = 0
            while n < maxiter and abs(z) < 2:
                z = z * z + c
                n += 1
            output.append(n)
    return output

**그림 2-8** 큰 리스트를 2개 없애 다음의 memory_profiler 리포트  

In [4]:
# julia1_memoryprofiler.py를 위와 같이 수정한 후 다음을 실행
call(["mprof", "run", "julia1_memoryprofiler3.py"])
call(["mprof", "plot", "-o", "mprof3.png"])

0

![mprof3](./mprof3.png)

In [4]:
import julia2
%load_ext memory_profiler
%timeit %memit julia2.calc_pure_python(True, desired_width=1000, max_iterations=300)

The memory_profiler extension is already loaded. To reload it, use:
  %reload_ext memory_profiler
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 8.327255487442017 seconds
300.0
peak memory: 73.70 MiB, increment: 20.07 MiB
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 8.682730674743652 seconds
300.0
peak memory: 73.74 MiB, increment: 19.99 MiB
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 8.599271297454834 seconds
300.0
peak memory: 73.71 MiB, increment: 19.96 MiB
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 9.022754669189453 seconds
300.0
peak memory: 73.70 MiB, increment: 19.94 MiB
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 8.990012407302856 seconds
300.0
peak memory: 73.71 MiB, increment: 19.96 MiB
Length of x: 1000
Total elements: 1000000
calculate_z_serial_purepython took 9.144343137741089 seconds
300.0
peak memory: 73.71 MiB,

## 2.10 PySpy로 기존 프로세스 살펴보기

**예제 2-13** 명령줄에서 PySpy 실행하기  
(이 친구도 왜 안될까요....? 도와주실분 구함~ help!!)

## 2.11 바이트코드: 내부 작동

**예제 2-14** dis 모듈로 스택 기반의 가상 머신 작동 방식 살펴보기

In [1]:
import dis

In [2]:
import julia1_nopil

In [4]:
dis.dis(julia1_nopil.calculate_z_serial_purepython)

 11           0 LOAD_CONST               1 (0)
              2 BUILD_LIST               1
              4 LOAD_GLOBAL              0 (len)
              6 LOAD_FAST                1 (zs)
              8 CALL_FUNCTION            1
             10 BINARY_MULTIPLY
             12 STORE_FAST               3 (output)

 12          14 LOAD_GLOBAL              1 (range)
             16 LOAD_GLOBAL              0 (len)
             18 LOAD_FAST                1 (zs)
             20 CALL_FUNCTION            1
             22 CALL_FUNCTION            1
             24 GET_ITER
        >>   26 FOR_ITER                74 (to 102)
             28 STORE_FAST               4 (i)

 13          30 LOAD_CONST               1 (0)
             32 STORE_FAST               5 (n)

 14          34 LOAD_FAST                1 (zs)
             36 LOAD_FAST                4 (i)
             38 BINARY_SUBSCR
             40 STORE_FAST               6 (z)

 15          42 LOAD_FAST                2 (cs)
          

**예제 2-15** 같은 문제를 해결하는 효율적인 방법과 그렇지 않은 방법

In [5]:
def fn_expressive(upper=1_000_000):
    total = 0
    for n in range(upper):
        total += n
    return total

def fn_terse(upper=1_000_000):
    return sum(range(upper))

assert fn_expressive() == fn_terse(), "Expect identical results from both functions"

**예제 2-16** %timeit으로 내장 함수를 사용한 코드가 더 빠를 것이라는 가설 검증

In [6]:
%timeit fn_expressive()

79.5 ms ± 4.93 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [7]:
%timeit fn_terse()

45.5 ms ± 2.21 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


**예제 2-17** dis모듈로 두 함수의 바이트코드 명령어 확인하기

In [None]:
import dis

In [8]:
dis.dis(fn_expressive)

  2           0 LOAD_CONST               1 (0)
              2 STORE_FAST               1 (total)

  3           4 LOAD_GLOBAL              0 (range)
              6 LOAD_FAST                0 (upper)
              8 CALL_FUNCTION            1
             10 GET_ITER
        >>   12 FOR_ITER                12 (to 26)
             14 STORE_FAST               2 (n)

  4          16 LOAD_FAST                1 (total)
             18 LOAD_FAST                2 (n)
             20 INPLACE_ADD
             22 STORE_FAST               1 (total)
             24 JUMP_ABSOLUTE           12

  5     >>   26 LOAD_FAST                1 (total)
             28 RETURN_VALUE


In [9]:
dis.dis(fn_terse)

  8           0 LOAD_GLOBAL              0 (sum)
              2 LOAD_GLOBAL              1 (range)
              4 LOAD_FAST                0 (upper)
              6 CALL_FUNCTION            1
              8 CALL_FUNCTION            1
             10 RETURN_VALUE


## 2.12 최적화 중에 단위 테스트하기

**예제 2-18** @profile을 사용할 간단한 함수와 테스트

In [10]:
import time

def test_some_fn():
    """Check basic behaviours for our function"""
    assert some_fn(2) == 4
    assert some_fn(1) == 1
    assert some_fn(-1) == 1
    
    
@profile
def some_fn(useful_input):
    """An expensive function that we wish to both test and profile"""
    # artificial 'we're doing something clever and expensive' delay
    time.sleep(1) 
    return useful_input ** 2


if __name__ == "__main__":
    print(f"Example call `some_fn(2)` == {some_fn(2)}")

NameError: name 'profile' is not defined

**예제 2-19** 데커레이터 누락으로 테스트가 중단됨  
<span style="font-family: monospace">~# python3 -m pytest utility1.py</span>

In [None]:
==================================== test session starts ====================================
platform linux -- Python 3.8.10, pytest-4.6.9, py-1.11.0, pluggy-0.13.0
rootdir: /root/hpp/high_performance_python_2e/02_profiling
collected 0 items / 1 errors

========================================== ERRORS ===========================================
_______________________________ ERROR collecting utility1.py ________________________________
utility1.py:10: in <module>
    ???
E   NameError: name 'profile' is not defined
!!!!!!!!!!!!!!!!!!!!!!!!!! Interrupted: 1 errors during collection !!!!!!!!!!!!!!!!!!!!!!!!!!
================================== 1 error in 0.06 seconds ==================================

**예제 2-20** 네임스페이스에 아무 일도 하지 않는 @profiler 데커레이터 추가해서 단위 테스트하기

In [None]:
# 라인 프로파일링 도구나 메모리 프로파일링 도구가 주입한
# line_profiler나 memory_profiler가 로컬 영역에 있나 보자
# 이 둘을 사용하지 않는다면 가짜 @profile 데커레이터를 추가해준다
if 'line_profiler' not in dir() and 'profile' not in dir():
    def profile(func):
        def inner(*args, **kwargs):
            return func(*args, **kwargs)
        return inner

**예제 2-21** 아무 일도 하지 않는 데커레이터 추가로 테스트가 제대로 작동하며 프로파일러도 정상 작동함  
<span style="font-family: monospace">~# python3 -m pytest utility1.py</span>

In [None]:
==================================== test session starts ====================================
platform linux -- Python 3.8.10, pytest-4.6.9, py-1.11.0, pluggy-0.13.0
rootdir: /root/hpp/high_performance_python_2e/02_profiling
collected 1 item

utility2.py .                                                                         [100%]

================================= 1 passed in 3.03 seconds ==================================