# Helful tips

This notebook help you to understand how to build quality code
- write test first
- check performance (timeit and line_profiler)
- check memory usage (memory_profiler)

# Let's analyze the example of an algorithm
    
    We have a list with integers 
    and list of targets 
    find sum all elements not exceeding to all targets
    
    Example 1:
    Input: elements [10, 1, -1, 0], targets [-1, 100]
    for target -1 -> sum([-1]) = -1, for target 100 -> sum([10, 1, -1, 0]) = 10
    answer sum([-1, 10]) = 9
    Output: 9

In [None]:
%load_ext autoreload
%autoreload 2

# Write tests first

In [2]:
%%writefile test_find_closest_sum.py

def utest_find_closest_sum(func):
    assert func([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0]) == 0
    assert func([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [-1]) == 0
    assert func([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [11]) == 45
    assert func([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [5]) == 15
    assert func([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [-10, 10]) == 45
    assert func([-10, -9, -8, -7, -6, -5, -4, -3, -2, -1], [-11]) == -10
    assert func([-10, -9, -8, -7, -6, -5, -4, -3, -2, -1], [10]) == -55
    assert func([-10, -9, -8, -7, -6, -5, -4, -3, -2, -1], [-5]) == -45
    assert func([100, 0, 1], [100, -1]) == 101
    assert func([100, 0, 1], [100, 10]) == 102
    assert func([10, 1, -1, 0], [-1, 100]) == 9


Overwriting test_find_closest_sum.py


In [3]:
%%writefile find_closest_sum.py

import random

def generate_lists(n):
    return [random.randint(0, n) for _ in range(n)], [random.randint(-n - 1, n + 1) for _ in range(n)]


Overwriting find_closest_sum.py


In [4]:
from find_closest_sum import generate_lists

lst_values, lst_targets = generate_lists(int(1e4))

# find_closest1: Dummy solution 

In [5]:
%%writefile -a find_closest_sum.py

def find_closest_sum1(vlst, tlst):
    
    # sort
    vlist = sorted(vlst)
    
    targets = []
    for target in tlst:
        vmin = float('Inf')
        closest = None
        for i in range(len(vlist)):
            value = abs(vlist[i] - target)
            if value <= vmin:
                vmin = value
                closest = i

        targets.extend(vlist[:closest + 1])
        
    return sum(targets)


Appending to find_closest_sum.py


# Add test

In [6]:
%%writefile -a test_find_closest_sum.py

from find_closest_sum import find_closest_sum1

def test_find_closest_sum1():
    utest_find_closest_sum(find_closest_sum1)


Appending to test_find_closest_sum.py


# Check performance

In [7]:
%%writefile -a test_find_closest_sum.py

from find_closest_sum import generate_lists

lst_values, lst_targets = generate_lists(int(1e4))

from find_closest_sum import find_closest_sum1

def test_performance1():
    find_closest_sum1(lst_values, lst_targets)
    

Appending to test_find_closest_sum.py


In [8]:
!pytest -vv --durations=0 test_find_closest_sum.py

platform linux -- Python 3.6.8, pytest-5.3.1, py-1.8.0, pluggy-0.13.1 -- /home/dsysoev/.virtualenvs/lab/bin/python3
cachedir: .pytest_cache
rootdir: /media/data/devel/fun-with-python/cpu_profiling
collected 2 items                                                              [0m

test_find_closest_sum.py::test_find_closest_sum1 [32mPASSED[0m[32m                  [ 50%][0m
test_find_closest_sum.py::test_performance1 [32mPASSED[0m[32m                       [100%][0m

13.28s call     test_find_closest_sum.py::test_performance1
0.00s call     test_find_closest_sum.py::test_find_closest_sum1
0.00s setup    test_find_closest_sum.py::test_find_closest_sum1
0.00s teardown test_find_closest_sum.py::test_performance1
0.00s teardown test_find_closest_sum.py::test_find_closest_sum1
0.00s setup    test_find_closest_sum.py::test_performance1


# Conclusion: find_closest_sum1

Worst performance.


# find_closest_sum2: Speed up find element with O(n) -> O(log n)

In [9]:
%%writefile -a find_closest_sum.py

import bisect


def find_closest_sum2(vlst, tlst):
    
    # list should be sorted
    vlist = sorted(vlst)
    
    targets = []
    for target in tlst:
        # bisect.bisect_left will return the first value in the list
        # that is greater than or equal to the target
        i = bisect.bisect_left(vlist, target)
        
        if i == len(vlist):
            indx = i - 1
        elif vlist[i] == target:
            indx = i
        elif i > 0:
            j = i - 1
            # since we know value is larger than target 
            # (and vice versa for the value at j),
            # we don't need to use absolute values here
            if vlist[i] - target > target - vlist[j]:
                indx = j
        else:
            indx = i
        
        targets.extend(vlist[:indx + 1])
            
    return sum(targets)


Appending to find_closest_sum.py


In [10]:
%%writefile -a test_find_closest_sum.py

from find_closest_sum import find_closest_sum2

def test_find_closest_sum2():
    utest_find_closest_sum(find_closest_sum2)
    
def test_performance2():
    find_closest_sum2(lst_values, lst_targets)


Appending to test_find_closest_sum.py


In [11]:
!pytest -vv --durations=0 test_find_closest_sum.py

platform linux -- Python 3.6.8, pytest-5.3.1, py-1.8.0, pluggy-0.13.1 -- /home/dsysoev/.virtualenvs/lab/bin/python3
cachedir: .pytest_cache
rootdir: /media/data/devel/fun-with-python/cpu_profiling
collected 4 items                                                              [0m

test_find_closest_sum.py::test_find_closest_sum1 [32mPASSED[0m[32m                  [ 25%][0m
test_find_closest_sum.py::test_performance1 [32mPASSED[0m[32m                       [ 50%][0m
test_find_closest_sum.py::test_find_closest_sum2 [32mPASSED[0m[32m                  [ 75%][0m
test_find_closest_sum.py::test_performance2 [32mPASSED[0m[32m                       [100%][0m

12.71s call     test_find_closest_sum.py::test_performance1
0.42s call     test_find_closest_sum.py::test_performance2
0.00s call     test_find_closest_sum.py::test_find_closest_sum1
0.00s setup    test_find_closest_sum.py::test_find_closest_sum1
0.00s call     test_find_closest_sum.py::test_find_closest_sum2
0.00s setup   

# Run line profiler

In [12]:
%load_ext line_profiler

In [13]:
from test_find_closest_sum import find_closest_sum2

%lprun -f find_closest_sum2 find_closest_sum2(lst_values, lst_targets)

Timer unit: 1e-06 s

Total time: 0.407341 s
File: /media/data/devel/fun-with-python/cpu_profiling/find_closest_sum.py
Function: find_closest_sum2 at line 29

Line #      Hits         Time  Per Hit   % Time  Line Contents
    29                                           def find_closest_sum2(vlst, tlst):
    30                                               
    31                                               # list should be sorted
    32         1       2350.0   2350.0      0.6      vlist = sorted(vlst)
    33                                               
    34         1          1.0      1.0      0.0      targets = []
    35     10001       6010.0      0.6      1.5      for target in tlst:
    36                                                   # bisect.bisect_left will return the first value in the list
    37                                                   # that is greater than or equal to the target
    38     10000      12352.0      1.2      3.0          i = bisect.bisect_l

# Run memory profiler

In [14]:
%load_ext memory_profiler

In [15]:
from test_find_closest_sum import find_closest_sum2

%mprun -f find_closest_sum2 find_closest_sum2(lst_values, lst_targets)




Filename: /media/data/devel/fun-with-python/cpu_profiling/find_closest_sum.py

Line #    Mem usage    Increment   Line Contents
    29     52.2 MiB     52.2 MiB   def find_closest_sum2(vlst, tlst):
    30                                 
    31                                 # list should be sorted
    32     52.2 MiB      0.0 MiB       vlist = sorted(vlst)
    33                                 
    34     52.2 MiB      0.0 MiB       targets = []
    35    215.1 MiB      0.0 MiB       for target in tlst:
    36                                     # bisect.bisect_left will return the first value in the list
    37                                     # that is greater than or equal to the target
    38    214.9 MiB      0.0 MiB           i = bisect.bisect_left(vlist, target)
    39                                     
    40    214.9 MiB      0.0 MiB           if i == len(vlist):
    41                                         indx = i - 1
    42    214.9 MiB      0.0 MiB           elif

# Conclusion: find_closest_sum2

Good performance, but huge memory usage.


# find_closest_sum3: use hash for store computed values

In [16]:
%%writefile -a find_closest_sum.py

import bisect


def find_closest_sum3(vlst, tlst):
    
    # list should be sorted
    vlist = sorted(vlst)
    
    # create hash for store target results
    hash_map = {}
    
    vsum = 0
    targets = []
    for target in tlst:
        # bisect.bisect_left will return the first value in the list
        # that is greater than or equal to the target
        i = bisect.bisect_left(vlist, target)
        
        if i == len(vlist):
            indx = i - 1
        elif vlist[i] == target:
            indx = i
        elif i > 0:
            j = i - 1
            # since we know value is larger than target 
            # (and vice versa for the value at j),
            # we don't need to use absolute values here
            if vlist[i] - target > target - vlist[j]:
                indx = j
        else:
            indx = i
        
        if target not in hash_map:
            hash_map[target] = sum(vlist[:indx + 1])
        
        vsum += hash_map[target]
            
    return vsum


Appending to find_closest_sum.py


In [17]:
%%writefile -a test_find_closest_sum.py

from find_closest_sum import find_closest_sum3

def test_find_closest_sum3():
    utest_find_closest_sum(find_closest_sum3)
    
def test_performance3():
    find_closest_sum3(lst_values, lst_targets)

Appending to test_find_closest_sum.py


In [18]:
!pytest -vv --durations=0 test_find_closest_sum.py

platform linux -- Python 3.6.8, pytest-5.3.1, py-1.8.0, pluggy-0.13.1 -- /home/dsysoev/.virtualenvs/lab/bin/python3
cachedir: .pytest_cache
rootdir: /media/data/devel/fun-with-python/cpu_profiling
collected 6 items                                                              [0m

test_find_closest_sum.py::test_find_closest_sum1 [32mPASSED[0m[32m                  [ 16%][0m
test_find_closest_sum.py::test_performance1 [32mPASSED[0m[32m                       [ 33%][0m
test_find_closest_sum.py::test_find_closest_sum2 [32mPASSED[0m[32m                  [ 50%][0m
test_find_closest_sum.py::test_performance2 [32mPASSED[0m[32m                       [ 66%][0m
test_find_closest_sum.py::test_find_closest_sum3 [32mPASSED[0m[32m                  [ 83%][0m
test_find_closest_sum.py::test_performance3 [32mPASSED[0m[32m                       [100%][0m

12.66s call     test_find_closest_sum.py::test_performance1
0.44s call     test_find_closest_sum.py::test_performance2
0.21s call

In [19]:
from test_find_closest_sum import find_closest_sum3

%lprun -f find_closest_sum3 find_closest_sum3(lst_values, lst_targets)

Timer unit: 1e-06 s

Total time: 0.233755 s
File: /media/data/devel/fun-with-python/cpu_profiling/find_closest_sum.py
Function: find_closest_sum3 at line 61

Line #      Hits         Time  Per Hit   % Time  Line Contents
    61                                           def find_closest_sum3(vlst, tlst):
    62                                               
    63                                               # list should be sorted
    64         1       2339.0   2339.0      1.0      vlist = sorted(vlst)
    65                                               
    66                                               # create hash for store target results
    67         1          2.0      2.0      0.0      hash_map = {}
    68                                               
    69         1          1.0      1.0      0.0      vsum = 0
    70         1          1.0      1.0      0.0      targets = []
    71     10001       3736.0      0.4      1.6      for target in tlst:
    72                

In [20]:
from find_closest_sum import find_closest_sum3

%mprun -f find_closest_sum3 find_closest_sum3(lst_values, lst_targets)




Filename: /media/data/devel/fun-with-python/cpu_profiling/find_closest_sum.py

Line #    Mem usage    Increment   Line Contents
    61     53.1 MiB     53.1 MiB   def find_closest_sum3(vlst, tlst):
    62                                 
    63                                 # list should be sorted
    64     53.1 MiB      0.0 MiB       vlist = sorted(vlst)
    65                                 
    66                                 # create hash for store target results
    67     53.1 MiB      0.0 MiB       hash_map = {}
    68                                 
    69     53.1 MiB      0.0 MiB       vsum = 0
    70     53.1 MiB      0.0 MiB       targets = []
    71     53.1 MiB      0.0 MiB       for target in tlst:
    72                                     # bisect.bisect_left will return the first value in the list
    73                                     # that is greater than or equal to the target
    74     53.1 MiB      0.0 MiB           i = bisect.bisect_left(vlist, tar

# Conclusion: find_closest_sum3

Good performance and low memory usage.

# Comparison table

| Name              | Tests | Performance | Memory  |
|-------------------|-------|-------------|---------|
| find_closest_sum1 | Ok    | 12660  ms     | - |
| find_closest_sum2 | Ok    | 440 ms (28x faster) | 215 Mb |
| find_closest_sum3 | Ok    | 210 ms (2x faster) | 53 Mb (4x lower) |

# Dis module

In [21]:
import dis

from find_closest_sum import find_closest_sum3

dis.dis(find_closest_sum3)

 64           0 LOAD_GLOBAL              0 (sorted)
              2 LOAD_FAST                0 (vlst)
              4 CALL_FUNCTION            1
              6 STORE_FAST               2 (vlist)

 67           8 BUILD_MAP                0
             10 STORE_FAST               3 (hash_map)

 69          12 LOAD_CONST               1 (0)
             14 STORE_FAST               4 (vsum)

 70          16 BUILD_LIST               0
             18 STORE_FAST               5 (targets)

 71          20 SETUP_LOOP             158 (to 180)
             22 LOAD_FAST                1 (tlst)
             24 GET_ITER
        >>   26 FOR_ITER               150 (to 178)
             28 STORE_FAST               6 (target)

 74          30 LOAD_GLOBAL              1 (bisect)
             32 LOAD_ATTR                2 (bisect_left)
             34 LOAD_FAST                2 (vlist)
             36 LOAD_FAST                6 (target)
             38 CALL_FUNCTION            2
             40 STORE_F