Profiling and Optimizing Jupyter Notebooks - A Comprehensive Guide

https://towardsdatascience.com/speed-up-jupyter-notebooks-20716cbe2025

In [7]:
from random import random

def estimate_pi(n=1e7) -> "area":
    """Estimate pi with monte carlo simulation.
    
    Arguments:
        n: number of simulations
    """
    in_circle = 0
    total = n
    
    while n != 0:
        prec_x = random()
        prec_y = random()
        if pow(prec_x, 2) + pow(prec_y, 2) <= 1:
            in_circle += 1 # inside the circle
        n -= 1
        
    return 4 * in_circle / total

In [8]:
%time estimate_pi()

Wall time: 6.57 s


3.142022

In [9]:
%timeit -r 2 -n 5 estimate_pi()

6.59 s ± 40 ms per loop (mean ± std. dev. of 2 runs, 5 loops each)


In [10]:
%prun estimate_pi()

 

In [11]:
%prun -D pi.prof estimate_pi()

 
*** Profile stats marshalled to file 'pi.prof'. 


In [12]:
%prun -s cumulative estimate_pi()

 

In [13]:
!pip install line_profiler

Collecting line_profiler
  Downloading https://files.pythonhosted.org/packages/14/fc/ecf4e238bb601ff829068e5a72cd1bd67b0ee0ae379db172eb6a0779c6b6/line_profiler-2.1.2.tar.gz (83kB)
Building wheels for collected packages: line-profiler
  Running setup.py bdist_wheel for line-profiler: started
  Running setup.py bdist_wheel for line-profiler: finished with status 'error'
  Complete output from command d:\anaconda3\python.exe -u -c "import setuptools, tokenize;__file__='C:\\Users\\hp\\AppData\\Local\\Temp\\pip-install-5jw05n79\\line-profiler\\setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" bdist_wheel -d C:\Users\hp\AppData\Local\Temp\pip-wheel-7b3xii6p --python-tag cp36:
  running bdist_wheel
  running build
  running build_py
  creating build
  creating build\lib.win-amd64-3.6
  copying line_profiler.py -> build\lib.win-amd64-3.6
  copying kernprof.py -> build\lib.win-amd64-3.6
  copying line_profi

  Failed building wheel for line-profiler
Command "d:\anaconda3\python.exe -u -c "import setuptools, tokenize;__file__='C:\\Users\\hp\\AppData\\Local\\Temp\\pip-install-5jw05n79\\line-profiler\\setup.py';f=getattr(tokenize, 'open', open)(__file__);code=f.read().replace('\r\n', '\n');f.close();exec(compile(code, __file__, 'exec'))" install --record C:\Users\hp\AppData\Local\Temp\pip-record-rsnt_mpb\install-record.txt --single-version-externally-managed --compile" failed with error code 1 in C:\Users\hp\AppData\Local\Temp\pip-install-5jw05n79\line-profiler\


In [14]:
%lprun -f estimate_pi estimate_pi()

ERROR:root:Line magic function `%lprun` not found.


优化

In [15]:
from random import random

def estimate_pi(n=1e7) -> "area":
    """Estimate pi with monte carlo simulation.
    
    Arguments:
        n: number of simulations
    """
    return 4 * sum(1 for _ in range(int(n)) if random()**2 + random()**2 <= 1) / n

In [16]:
%prun estimate_pi()

 

In [17]:
%timeit -r 2 -n 5 estimate_pi()

4.71 s ± 6.27 ms per loop (mean ± std. dev. of 2 runs, 5 loops each)


In [1]:
import numpy as np

def estimate_pi(n=10000000) -> "area":
    """Estimate pi with monte carlo simulation.
    
    Arguments:
        n: number of simulations
    """
    xy = np.random.rand(n, 2)
    inside = np.sum(xy[:, 0]**2 + xy[:, 1]**2 <= 1)
    return 4 * inside / n

In [2]:
%timeit estimate_pi()

321 ms ± 8.03 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [3]:
%memit estimate_pi()

ERROR:root:Line magic function `%memit` not found.


In [21]:
%mprun -f estimate_pi estimate_pi()

ERROR:root:Line magic function `%mprun` not found.


In [22]:
import numpy as np

def estimate_pi(n=10000000) -> "area":
    """Estimate pi with monte carlo simulation.
    
    Arguments:
        n: number of simulations
    """
    return np.sum(np.random.random(n)**2 + np.random.random(n)**2 <= 1) / n * 4

In [23]:
%timeit estimate_pi()

317 ms ± 29.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


Out of memory

In [None]:
import numpy as np

def estimate_pi_mem_block(n=10000000) -> "area":
    """Estimate pi with monte carlo simulation.
    
    Arguments:
        n: number of simulations
    """
    size = 10000000 # 1e7
    n_blocks, remainder = divmod(n, size) 
    memory_blocks = [size] * n_blocks + [remainder]
    inside = sum(np.sum(np.random.random(block)**2 + np.random.random(block)**2 <= 1) for block in memory_blocks)
    return 4 * inside / n

In [None]:
%timeit -r 1 -n 1000 pi_chudnovsky(10**100)