## Memory Management in Python

In [1]:
import psutil, os

In [2]:
def mem_profile(unit = 'mb'):
    """ Calculate the amout of memory being used by the python process.
    
    Args:
        unit (string): the desired output unit of memory. Defaults to Megabyte. 
    
    Returns:
        The amout of memory in use by the python process.
        
    Raises:
        ValueError: If `unit` isn't a recognised memory unit.
    
    """
    used_bytes = psutil.Process(os.getpid()).memory_info().rss
    
    unit = unit.lower()
    
    if(unit == 'mb'):
        in_use = used_bytes/(1024**2)
    elif(unit == 'b'):
        in_use = used_bytes
    elif(unit == 'kb'):
        in_use = used_bytes/(1024**1)
    elif(unit == 'gb'):
        in_use = used_bytes/(1024**3)
    elif(unit == 'tb'):
        in_use = used_bytes/(1024**4)
    else:
        raise ValueError("unit must be one of {'b','kb,'mb','gb','tb'}")
    
    return in_use

### Test Suite

In order to clear the memory of previous variables from this notebook


 > Kernel
 
 > Restart and run all 
 
 

In [3]:
import unittest
import numpy as np
import pandas as pd


class TestMemory(unittest.TestCase):
    """Testing of mem_profile"""
    
    one_mb_float = (1024 ** 2) // 8  # Number of floats that fill 1 MB of memory
    n_mb = 100
    array_size = n_mb*one_mb_float
    
    def test_np_array(self):
        m0 = mem_profile()          # memory in use before variable creation
        _ = np.random.randn(self.array_size) 
        m1 = mem_profile()          # memory in use after variable creation
        np.testing.assert_almost_equal(m1 - m0, self.n_mb, decimal=1, err_msg=': incorrect for numpy memory usage', verbose=True)
        
        
    def test_pd_df(self):
        m0 = mem_profile('mb')
        df = pd.DataFrame(np.random.randn(self.array_size))
        df = df + 1 # pandas seems to be evaluated somewhat lazily
        m1 = mem_profile('mb')
        m_df = df.memory_usage(index=True).values[1] // (1024**2)   # convert to mb
        np.testing.assert_almost_equal(m1 - m0, m_df, decimal=1, err_msg=': incorrect for pandas memory usage', verbose=True)
        
        
        

### Run Tests

In [4]:
if __name__ == '__main__':
    unittest.main(argv=[''], exit=False)

..
----------------------------------------------------------------------
Ran 2 tests in 1.542s

OK
