# Description

Demonstrate time and memory profiling tools on a toy example.

# Imports

In [None]:
%load_ext autoreload
%autoreload 2
%load_ext memory_profiler

import logging
import time

import IPython.display as dspl
import pandas as pd

import helpers.hdbg as hdbg
import helpers.henv as henv
import helpers.hprint as hprint

In [None]:
hdbg.init_logger(verbosity=logging.INFO)

_LOG = logging.getLogger(__name__)

_LOG.info("%s", henv.get_system_signature()[0])

hprint.config_notebook()

# Functions

In [None]:
def func1() -> pd.DataFrame:
    time.sleep(2)
    df = pd.DataFrame(["str1"] * int(1e6))
    return df


def func2(df: pd.DataFrame) -> pd.DataFrame:
    time.sleep(3)
    df[1] = df[0] + "_str2"
    return df


def func3() -> pd.DataFrame:
    time.sleep(1)
    df = func1()
    df = func2(df)
    return df

# Profile time

## Profile overall time

In [None]:
%%time
df = func3()

## Time by function

The docs do not say that, but under the hood `%prun` uses `cProfile`: https://github.com/ipython/ipython/blob/master/IPython/core/magics/execution.py#L22

In [None]:
# We can suppress output to the notebook by specifying "-q".
%prun -D tmp.pstats df = func3()

In [None]:
!gprof2dot -f pstats tmp.pstats | dot -Tpng -o output.png
dspl.Image(filename="output.png")

`gprof2dot` supports thresholds that make output more readable: https://github.com/jrfonseca/gprof2dot#documentation

In [None]:
!gprof2dot -n 5 -e 5 -f pstats tmp.pstats | dot -Tpng -o output.png
dspl.Image(filename="output.png")

# Profile memory

## Peak memory

In [None]:
%%memit
df = func3()

## Memory by line

The function needs to be defined outside of a notebook to profile it by line, but this is how to execute the profiling:

In [None]:
%mprun -f func3 df = func3()