# Code efficiency modules
    - time
    - timeit
    - profiling
    - line profiling
    - memory profiling

## time module

In [None]:
import time
import random

In [None]:
for i in range(20):
    starttime = time.time()
    myList_Sum = sum([random.randint(1,10)**2 for i in range(10000)])
    stoptime = time.time()
    timetaken_1 = stoptime-starttime
    print('Time to taken to run code, ', timetaken_1)

    starttime = time.time()
    randList = []
    for i in range(10000):
        randList.append(random.randint(1,10)**2)
    myList_Sum = sum(randList)    
    stoptime = time.time()
    timetaken_2 = stoptime-starttime
    print('Time to taken to run code, ', timetaken_2)

    if min(timetaken_2,timetaken_1) == timetaken_1:
        print('Comprehension took lesser time')
    else:
        print('Regular for loop took lesser time')
    print('\n')

In [None]:
def mymean(iterable):
    return sum(iterable)/len(iterable)

In [None]:
x = lambda iterable:sum(iterable)/len(iterable)

In [None]:

starttime = time.time()
myList_Average = mymean([random.randint(1,10) for i in range(10000)])
stoptime = time.time()
timetaken1 = stoptime-starttime
print('Time to taken to run code, ', timetaken1)


starttime = time.time()
myList_Average = x([random.randint(1,10) for i in range(10000)])
stoptime = time.time()
timetaken2 = stoptime-starttime
print('Time to taken to run code, ', timetaken2)

if min(timetaken2,timetaken1) == timetaken2:
    print('lambda took lesser time')
else:
    print('Regular function took lesser time')
print('\n')

## timeit module

In [None]:
import numpy as np
import timeit

In [None]:
a = np.arange(4).reshape((2,2))
a.sum()
sum(a)
np.sum(a)

The module function timeit.timeit(stmt, setup, timer, number) accepts four arguments: 

- stmt

- setup (some requirements like imports)

- number (number of executions) 

timeit.timeit() function returns the number of seconds it took to execute the code.

In [None]:
timeit.timeit("np.arange(4).reshape((2,2)).sum()",setup = "import numpy as np",number = 10000)

In [None]:
timeit.timeit("sum(np.arange(4).reshape((2,2)))",setup = "import numpy as np",number = 10000)

In [None]:
timeit.timeit("np.sum(np.arange(4).reshape((2,2)))",setup = "import numpy as np",number = 10000)

#### Proper profiling of codes can be done using cProfile and profile modules (inbuilt)

## cProfile module

primarily a timing module

cProfile - extension of c, low overhead and fast
profile - written in python, adds overhead 

Both add deterministic profiling - monitoring number of function calls, timeing of each, etc

In [12]:
import cProfile
import random

#### profiling single line code - `cProfile.run`

In [13]:
cProfile.run('sum([random.randint(1,10) for i in range(10000)])')

         56061 function calls in 0.045 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.007    0.007    0.045    0.045 <string>:1(<listcomp>)
        1    0.000    0.000    0.045    0.045 <string>:1(<module>)
    10000    0.014    0.000    0.028    0.000 random.py:200(randrange)
    10000    0.010    0.000    0.038    0.000 random.py:244(randint)
    10000    0.009    0.000    0.013    0.000 random.py:250(_randbelow_with_getrandbits)
        1    0.000    0.000    0.045    0.045 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.sum}
    10000    0.002    0.000    0.002    0.000 {method 'bit_length' of 'int' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
    16056    0.003    0.000    0.003    0.000 {method 'getrandbits' of '_random.Random' objects}




- run only runs with single line statements
- profile constrcutor has enable() disable() methods which will profile a set of lines in a code

#### profile constructor

In [18]:
def cube(x):
    """Cube of x."""
    return x * x * x

myprof = cProfile.Profile()

def my_map(func = cube, arg_list = list(range(10000))):
    myprof.enable()
    result = []
    for i in arg_list:
        result.append(func(i))
    sumresult = sum(result)    
    myprof.disable()
    return sumresult

In [19]:
my_map()

2499500025000000

In [20]:
myprof.print_stats()

         20002 function calls in 0.003 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    10000    0.002    0.000    0.002    0.000 1289081230.py:1(cube)
        1    0.000    0.000    0.000    0.000 {built-in method builtins.sum}
    10000    0.001    0.000    0.001    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}




#### save to a profile file

In [21]:
myprof.dump_stats("mymap.prof")

#### open a profile file

In [23]:
import pstats

In [24]:
a = pstats.Stats('mymap.prof')

In [25]:
a.print_stats()

Tue May  3 18:30:49 2022    mymap.prof

         20002 function calls in 0.003 seconds

   Random listing order was used

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    10000    0.001    0.000    0.001    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.sum}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
    10000    0.002    0.000    0.002    0.000 /var/folders/64/86wm1bvj5tdbj8tcrs97120w0000gn/T/ipykernel_9256/1289081230.py:1(cube)




<pstats.Stats at 0x7fa879dcd190>

pstats module also helps modify results

In [27]:
a.sort_stats(pstats.SortKey.CUMULATIVE).print_stats(3);

Tue May  3 18:30:49 2022    mymap.prof

         20002 function calls in 0.003 seconds

   Ordered by: cumulative time
   List reduced from 4 to 3 due to restriction <3>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    10000    0.002    0.000    0.002    0.000 /var/folders/64/86wm1bvj5tdbj8tcrs97120w0000gn/T/ipykernel_9256/1289081230.py:1(cube)
    10000    0.001    0.000    0.001    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.sum}




sort the results of profiling based on cumulative time and then have taken the first 3 results from it when printing.

In [28]:
a.sort_stats(pstats.SortKey.TIME, pstats.SortKey.CUMULATIVE).print_stats();

Tue May  3 18:30:49 2022    mymap.prof

         20002 function calls in 0.003 seconds

   Ordered by: internal time, cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    10000    0.002    0.000    0.002    0.000 /var/folders/64/86wm1bvj5tdbj8tcrs97120w0000gn/T/ipykernel_9256/1289081230.py:1(cube)
    10000    0.001    0.000    0.001    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.sum}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}




sort the results based on total time and cumulative

In [29]:
p = pstats.Stats("mymap.prof")

In [30]:
p.print_stats();

Tue May  3 18:30:49 2022    mymap.prof

         20002 function calls in 0.003 seconds

   Random listing order was used

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    10000    0.001    0.000    0.001    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.sum}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
    10000    0.002    0.000    0.002    0.000 /var/folders/64/86wm1bvj5tdbj8tcrs97120w0000gn/T/ipykernel_9256/1289081230.py:1(cube)




In [31]:
p.print_stats("append"); # only results containing "append" method

Tue May  3 18:30:49 2022    mymap.prof

         20002 function calls in 0.003 seconds

   Random listing order was used
   List reduced from 4 to 1 due to restriction <'append'>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    10000    0.001    0.000    0.001    0.000 {method 'append' of 'list' objects}




In [33]:
p = pstats.Stats("mymap.prof")

p.strip_dirs().print_stats();

Tue May  3 18:30:49 2022    mymap.prof

         20002 function calls in 0.003 seconds

   Random listing order was used

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
    10000    0.001    0.000    0.001    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {built-in method builtins.sum}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
    10000    0.002    0.000    0.002    0.000 1289081230.py:1(cube)




In [36]:
p = pstats.Stats("mymap.prof")

p.strip_dirs().print_callers() # which function was called by which other function

   Random listing order was used

Function                                          was called by...
{method 'append' of 'list' objects}               <- 
{built-in method builtins.sum}                    <- 
{method 'disable' of '_lsprof.Profiler' objects}  <- 
1289081230.py:1(cube)                             <- 




In [38]:
p = pstats.Stats("mymap.prof")

p.print_callees()

   Random listing order was used

Function                                                                               called...
{method 'append' of 'list' objects}                                                    -> 
{built-in method builtins.sum}                                                         -> 
{method 'disable' of '_lsprof.Profiler' objects}                                       -> 
/var/folders/64/86wm1bvj5tdbj8tcrs97120w0000gn/T/ipykernel_9256/1289081230.py:1(cube)  -> 




<pstats.Stats at 0x7fa879dcd5b0>

#### recursive function profiling

In [50]:
def factorial(x):
    """This is a recursive function
    to find the factorial of an integer"""

    if x == 1:
        return 1
    else:
        return (x * factorial(x-1))

In [51]:
cProfile.run("factorial(20)")

         23 function calls (4 primitive calls) in 0.000 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     20/1    0.000    0.000    0.000    0.000 3861038260.py:1(factorial)
        1    0.000    0.000    0.000    0.000 <string>:1(<module>)
        1    0.000    0.000    0.000    0.000 {built-in method builtins.exec}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}




In [52]:
sum(my_map(cube, list(range(1,50000))))
sum(list(map(cube,list(range(1,50000)))))

TypeError: 'int' object is not iterable

## Profile module

In [40]:
import profile

In [None]:
profile.run("mymean([random.randint(1,10) for i in range(10000)])")

In [None]:
profile.run("mymean([1,2,3,4])")

In [None]:
profile.run("mymean([random.randint(1,10) for i in range(10000)])", filename = 'out1.prof')

In [None]:
profile.run("mymean([1,2,3,4])", filename = 'out2.prof')

### `profile()` constructor

In [None]:
def square(x):
    """Square of x."""
    return x*x

In [None]:
def cube(x):
    """Cube of x."""
    return x * x * x

In [None]:
my_num = [4,3,4,6,8,1]


def myfunc(my_list):
    squarelist = []
    cubelist = []
    for i in my_list:
        if i %2 == 0:
            squarelist.append(square(i))
        else:
            cubelist.append(cube(i))
        
    print(squarelist)
    print(cubelist)

In [None]:
myprofile = profile.Profile()

myprofile.run('myfunc(my_num)')

In [None]:
myprofile.print_stats()

### Calibrated profiling

In [45]:
pr = profile.Profile()

cal = []
for i in range(5):
    cal.append(pr.calibrate(10000, verbose=2))

elapsed time without profiling = 0.027554999999999552
elapsed time with profiling = 0.10639499999999913
'CPU seconds' profiler reported = 0.060390999999881956
total # calls = 10001.0
mean stopwatch overhead per profile event = 1.6416358364104792e-06
elapsed time without profiling = 0.02788100000000071
elapsed time with profiling = 0.1069899999999997
'CPU seconds' profiler reported = 0.05953600000061954
total # calls = 10001.0
mean stopwatch overhead per profile event = 1.5825917408568556e-06
elapsed time without profiling = 0.02738899999999944
elapsed time with profiling = 0.10306899999999786
'CPU seconds' profiler reported = 0.05735299999971133
total # calls = 10001.0
mean stopwatch overhead per profile event = 1.4980501949660977e-06
elapsed time without profiling = 0.028126999999997793
elapsed time with profiling = 0.10902400000000156
'CPU seconds' profiler reported = 0.06072400000075007
total # calls = 10001.0
mean stopwatch overhead per profile event = 1.6296870313344803e-06
elapse

In [46]:
bias = sum(cal)/5
bias

1.5855514448607212e-06

This bias can be reduced from my profilinf events

In [47]:
prof = profile.Profile(bias=bias)

prof.run("my_map()")

<profile.Profile at 0x7fa879ebf8b0>

In [48]:
prof.print_stats()

         5 function calls in 0.017 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.016    0.016 1289081230.py:7(my_map)
        1    0.016    0.016    0.016    0.016 :0(enable)
        1    0.000    0.000    0.016    0.016 :0(exec)
        1    0.000    0.000    0.016    0.016 <string>:1(<module>)
        1    0.000    0.000    0.017    0.017 profile:0(my_map())
        0    0.000             0.000          profile:0(profiler)




In [53]:
%prun factorial(20)

 

# Line profiling

Previous modules only generate time taken information which cannot identify bottle necks in codes

`line_profiler` helps profile the code line by line

In [None]:
## lineprof1.py

def cube(x):
    """Cube of x."""
    return x * x * x

@profile
def my_map(func = cube, arg_list = list(range(1,50000))):
    result = []
    for i in arg_list:
        result.append(func(i))
    sumresult = sum(result)    
    return sumresult

my_map()

In [None]:
kernprof -l lineprof1.py

python -m line_profiler lineprof1.py.lprof

In [None]:
def cube(x):
    """Cube of x."""
    return x * x * x

def my_map(func = cube, arg_list = list(range(1,50000))):
    result = []
    for i in arg_list:
        result.append(func(i))
    sumresult = sum(result)    
    return sumresult

In [None]:
print(my_map(cube, list(range(1,50000))))
print(sum(list(map(cube,list(range(1,50000))))))

In [None]:
# conda install -c anaconda line_profiler

In [None]:
%load_ext line_profiler

In [None]:
%lprun -f my_map my_map()

## `pprofile`

In [None]:
pprofiler similar to line-profiler

deterministic profiling

In [None]:
## mypprofile1.py

def add(a, b):
    return a+b


def get_sum_of_list():
    final_list = []
    for i in range(100):
        out = add(i, i)
        final_list.append(out)
    return final_list


if __name__ == "__main__":
    l = get_sum_of_list()
    print(l)

In [None]:
pprofile mypprofile1.py

In [None]:
pprofile -o mypprofile1.out mypprofile.py

In [None]:
##mypprofile2.py

import random

def add(a, b):
    return a+b


def get_sum_of_list():
    final_list = []
    for i in range(10):
        rand1 = random.randint(1,100)
        rand2 = random.randint(1,100)
        out = add(rand1, rand2)
        final_list.append(out)
    return final_list


if __name__ == "__main__":
    l = get_sum_of_list()
    print(l)

In [None]:
pprofile -o mypprofilerand.out mypprofile2.py

In [None]:
## Excluding modules from being profiled

In [None]:
pprofile --exclude random example_1.py


# Memory profiling

Part of the code that take up lot of memory

- Fully written in python
- Monitors line by line memory usage

example1 - memprof_1.py:
    
    A simple deocrator @profile will give memory usage information
    Let us use the my_map function as always

In [None]:
## memprof_1.py

from memory_profiler import profile


def cube(x):
    """Cube of x."""
    return x * x * x

@profile
def my_map(func = cube, arg_list = list(range(1,50000))):
    result = []
    for i in arg_list:
        result.append(func(i))
    sumresult = sum(result)    
    return sumresult

if __name__ == "__main__":
    my_map()

In [None]:
python -m memory_profiler my_map.py

In [None]:
## memprof_2.py

from memory_profiler import profile


def cube(x):
    """Cube of x."""
    return x * x * x

@profile
def my_map(func = cube, arg_list = list(range(1,50000))):
    result = []
    myres = []
    for i in arg_list:
        result.append(func(i))
        myres.append(func(i)*2)
    sumresult = sum(result)  
    del result
    del myres
    return sumresult

if __name__ == "__main__":
    my_map()
    
## deleting will deallocate memory

In [None]:
## memprof_3.py

from memory_profiler import profile


def cube(x):
    """Cube of x."""
    return x * x * x

@profile(precision = 3)
def my_map(func = cube, arg_list = list(range(1,50000))):
    result = []
    for i in arg_list:
        result.append(func(i))
    sumresult = sum(result)    
    del result
    return sumresult

if __name__ == "__main__":
    my_map()

### `mprof` 

In [None]:
mprof is a executable from command line; comes with memory_profiler
gives information about memory usage over time
to identify times when memory usage peaks 
data is stored in a .dat file which can be accessed for plotting later


In [None]:
Streaming an output to a logfile

In [None]:
## memprof_4.py

from memory_profiler import profile

fp = open("report.log", "w+")
def cube(x):
    """Cube of x."""
    return x * x * x

@profile(stream = fp)
def my_map(func = cube, arg_list = list(range(1,50000))):
    result = []
    for i in arg_list:
        result.append(func(i))
    sumresult = sum(result)    
    return sumresult

if __name__ == "__main__":
    my_map()

In [None]:
python -m memory_profiler memprof_4.py

In [None]:
# conda install -c anaconda memory_profiler

In [None]:
import memory_profiler
import psutil
from memprof_1 import my_map

In [1]:
%load_ext memory_profiler

In [2]:
def cube(x):
    """Cube of x."""
    return x * x * x

def my_map(func = cube, arg_list = list(range(1,50000))):
    result = []
    for i in arg_list:
        result.append(func(i))
    sumresult = sum(result)    
    return sumresult

In [5]:
from memprof_1 import my_map

In [6]:
%%mprun -f my_map 

my_map()

Filename: /Users/siddhart/Documents/NMIMS/NMIMS-April2022/Advanced Python/memprof_1.py

Line #    Mem usage    Increment  Occurences   Line Contents
     8     55.4 MiB     55.4 MiB           1   @profile
     9                                         def my_map(func = cube, arg_list = list(range(1,50000))):
    10     55.4 MiB      0.0 MiB           1       result = []
    11     55.7 MiB      0.0 MiB       50000       for i in arg_list:
    12     55.7 MiB      0.2 MiB       49999           result.append(func(i))
    13     55.7 MiB      0.0 MiB           1       sumresult = sum(result)    
    14     55.7 MiB      0.0 MiB           1       return sumresult





In [7]:
%memit my_map()

Filename: /Users/siddhart/Documents/NMIMS/NMIMS-April2022/Advanced Python/memprof_1.py

Line #    Mem usage    Increment  Occurences   Line Contents
     8     55.7 MiB     55.7 MiB           1   @profile
     9                                         def my_map(func = cube, arg_list = list(range(1,50000))):
    10     55.7 MiB      0.0 MiB           1       result = []
    11     55.7 MiB      0.0 MiB       50000       for i in arg_list:
    12     55.7 MiB      0.0 MiB       49999           result.append(func(i))
    13     55.7 MiB      0.0 MiB           1       sumresult = sum(result)    
    14     55.7 MiB      0.0 MiB           1       return sumresult


peak memory: 55.71 MiB, increment: 0.02 MiB


## Memory management in Python

In [None]:
Python variables are references to objects
Variables do not have dat types attached to them
id() gives memory location

Every Python ojbects maintains 3 things internally.

Type - Type of object
Value - Value of Object
RefCount - Reference Count

In [1]:
x = 10
y = x
t = 10
print(id(x), id(y), id(t)) # 10 has three reference counts as three variables point to that object

4528999040 4528999040 4528999040


In [None]:
z = 'hello'
l = 'hello'
k = l
i = 'Hello World'
j = 'Hello World'
print(id(z), id(l), id(k), id(i), id(j)) # objects with space are created as two separate objects

In [None]:
lst = ['hello',10, 'Hello World']
print(id(lst[0]), id(lst[1]), id(lst[2]))

Decreasing reference count of an object

There are 3 ways to decrease the reference count of an object:

- Using del statement
- Assigning None to a variable name.
- Pointing reference variable to some other object.

In [None]:
del x

In [None]:
y, t, id(y), id(t) # object still accessible

In [10]:
y = None
y # again object accessible; but reference to obj removed

In [None]:
t, lst[1], id(t), id(lst[1])

only reference count for object 10 is reduced; similar to def scope

In [None]:
def test():
    title = 'Hello'
    print(title)

test()

In [None]:
Garbage collection:
    Reference counting

In [None]:
Reference counting:
    counts references to an object and deletes it when final reference count becomes empty

In [None]:
a = 40      # Create object <40>
b = a       # Increase ref. count  of <40>
c = [b]     # Increase ref. count  of <40>
del a       # Decrease ref. count  of <40>
b = 100     # Decrease ref. count  of <40>
c[0] = -1   # Decrease ref. count  of <40>

### guppy

In [2]:
from guppy import hpy
import numpy as np


heap = hpy()

print("Heap Status At Starting : ")
heap_status1 = heap.heap()
print("Heap Size : ", heap_status1.size, " bytes\n")
print(heap_status1)


heap.setref()

print("\nHeap Status After Setting Reference Point : ")
heap_status2 = heap.heap()
print("Heap Size : ", heap_status2.size, " bytes\n")
print(heap_status2)

a = [i for i in range(1000)]
b = "A"
c = np.random.randint(1,100, (1000,))

print("\nHeap Status After Creating Few Objects : ")
heap_status3 = heap.heap()
print("Heap Size : ", heap_status3.size, " bytes\n")
print(heap_status3)

print("\nMemory Usage After Creation Of Objects : ", heap_status3.size - heap_status2.size, " bytes")

Heap Status At Starting : 
Heap Size :  42611547  bytes

Partition of a set of 330443 objects. Total size = 42611859 bytes.
 Index  Count   %     Size   % Cumulative  % Kind (class / dict of class)
     0 103969  31 12795163  30  12795163  30 str
     1  75730  23  5450744  13  18245907  43 tuple
     2  21982   7  3904770   9  22150677  52 types.CodeType
     3  43615  13  3320871   8  25471548  60 bytes
     4   2844   1  2821400   7  28292948  66 type
     5  19894   6  2705584   6  30998532  73 function
     6   7467   2  2583096   6  33581628  79 dict (no owner)
     7   1020   0  1524440   4  35106068  82 dict of module
     8   2844   1  1405712   3  36511780  86 dict of type
     9    748   0   664864   2  37176644  87 set
<1007 more rows. Type e.g. '_.more' to view.>

Heap Status After Setting Reference Point : 
Heap Size :  3373  bytes

Partition of a set of 32 objects. Total size = 3373 bytes.
 Index  Count   %     Size   % Cumulative  % Kind (class / dict of class)
     0  