## IDIA Benchmarking Framework

Description: A testing framework for a variety of use cases such as benchmarking performance, building dashboards, etc. 

### Usage: 
1. Benchmark Testing - Run a benchmark and record parameters for comparing various computing environtments
2. Runtime profiling - Debug a specific test in detail

### Examples: 
1. Benchmark a script 
2. Benchmark a terminal command from a notebook (no script)
3. Run code directly from a notebook

Github repository: forthcoming

### Example 1: Run a benchmark on a script 

Run a script that executes tclean using a singularity container

In [1]:
import benchmark

In [2]:
mybenchmark = benchmark.Benchmark()

In [5]:
# Small tclean

import subprocess
import utils
from datetime import datetime

omp = utils.OpenMPRuntime()


def run_tclean( filename, nthreads ):
    '''
    Run a small tclean imaging task with simulated data
    '''
    
    print 'Process ID:' + str(subprocess.os.getpid())
#     nthreads=2
    cleanfiles=True
    
    # Check input file
    if os.path.exists(filename):
        msdir = os.path.dirname(filename)
        msname = os.path.basename(filename)
    else: 
        raise NameError("input file, {}, not found".format(filename))
    
    # Limit number of threads used by OpenMP
    omp.omp_set_num_threads(nthreads)
    print "Running tclean with max threads: {}".format(omp.omp_get_max_threads())

    from casatasks import tclean
    start_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print "Start time {}".format(start_time)    

    vis = filename
    niter=100
    imsize=1024
    cell='4arcsec'
    specmode='mfs'
    threshold=-10

    image = vis + '.tclean'
    subprocess.check_call("rm -rf {}*".format(image), shell=True )
    subprocess.check_call("rm -rf TempLattice*", shell=True)
    
    tclean(vis=vis, imagename=image, niter=niter, imsize=imsize, cell=cell, specmode=specmode, threshold=threshold)
    
    if cleanfiles:
        print "cleaning up"
        subprocess.check_call("rm -rf {}*".format(image), shell=True )
        subprocess.check_call("rm -rf TempLattice*", shell=True)

    end_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    print "Finsih time {}".format(end_time)


In [4]:
# Larger CLEAN Process (wide field, OpenMP processing)

import subprocess
from datetime import datetime
import utils

omp = utils.OpenMPRuntime()

def run_tclean( filename, nthreads ):
    '''
    Run a small tclean imaging task with simulated data
    '''
    
    print 'Function Process ID:' + str(subprocess.os.getpid())
#     nthreads=2
    cleanfiles=True
    
    # Check input file
    if os.path.exists(filename):
        msdir = os.path.dirname(filename)
        msname = os.path.basename(filename)
    else: 
        raise NameError("input file, {}, not found".format(filename))
    
    # Limit number of threads used by OpenMP
    omp.omp_set_num_threads(nthreads)
    print "Running tclean with max threads: {}".format(omp.omp_get_max_threads())

    from casatasks import tclean

    start_time = datetime.now()
    print "Start time {}".format(start_time.strftime('%Y-%m-%d %H:%M:%S'))    

#     msdir = "/users/jbochenek/data/"
#     msname = "3c391_ctm_mosaic_spw0.ms"
    vis = filename
    nterms = 2
    niter = 100 #1000
    gain = 0.1
    deconvolver = 'mtmfs'
    threshold = '0.1mJy'
    imagedimension = 1024
    imagecell = '1.2arcsec'                # cell size for images
    gridder = 'widefield'
    specmode='mfs'

    image = vis + '.tclean'
    subprocess.check_call("rm -rf {}*".format(image), shell=True )
    subprocess.check_call("rm -rf TempLattice*", shell=True)
    
    tclean( vis=vis, imagename = image, field = '', spw = '', specmode = specmode, nterms = nterms, niter = niter, gain = gain, deconvolver = deconvolver, threshold = threshold, imsize = [imagedimension,imagedimension], cell = imagecell, gridder = gridder,
            wprojplanes = -1 ,
            pblimit = -1,
            stokes = 'I',
            weighting = 'briggs',
            robust = 0.0,
            savemodel = 'modelcolumn' )
    
    if cleanfiles:
        print "cleaning up"
        subprocess.check_call("rm -rf {}*".format(image), shell=True )
        subprocess.check_call("rm -rf TempLattice*", shell=True)

    end_time = datetime.now()
    print "Time Elapsed: {:.1f} s".format((end_time - start_time).total_seconds())

In [6]:
fin = "/users/jbochenek/data/3c391_ctm_mosaic_spw0.ms"
nth = 4
print 'Supervisor Process ID:' + str(subprocess.os.getpid())
res = mybenchmark.execute_function(run_tclean, fin, nth)

Supervisor Process ID:28743
init rprof
Tracker PID: 29084
entering rprof
Process ID:29084
Running tclean with max threads: 4
loading parameter_dictionary
loading task_help
loading all modules
done loading modules
Start time 2018-06-04 10:42:45
cleaning up
Finsih time 2018-06-04 10:43:47
exiting rprof ...
exited rprof
Test finished - RunTime (s): 63


In [9]:
from bokeh.io import output_notebook
output_notebook()

In [12]:
res.visualize()

In [44]:
%matplotlib inline

In [41]:
from numpy import *
from matplotlib.pyplot import *
import time

In [17]:
first = res.results[0]

In [19]:
fig, ax = subplots() 
line, = ax.plot(x,y) 
ax.set_xlim(0,1) 
ax.set_ylim(-1,1) 

NameError: name 'subplots' is not defined

In [40]:
first

ResourceData(time=1528045824.158017, mem=0.0, cpu=0, pmem=0, uss=0.0, rio=0.0, wio=0.0)

In [None]:
defaults = dict(title="Profile Results",
                tools="save,reset,xwheel_zoom,xpan",
                toolbar_location='above',
                plot_width=800, plot_height=300)

p1 = bp.figure(y_range=fix_bounds(0, max(cpu), 100),
               x_range=fix_bounds(0, right - left, 1),
               **defaults)
p1 = bp.figure(y_range=(0, 100), x_range=(0, 1), **defaults)
p1.line(t, cpu, color=colors[0], line_width=4, legend='% CPU')

p1.yaxis.axis_label = "% CPU"
p1.extra_y_ranges = {'memory': Range1d(*fix_bounds(min(mem) if mem else 0, max(mem) if mem else 100, 100))}
p1.line(t, mem, color=colors[2], y_range_name='memory', line_width=4,
       legend='Memory')
p1.xaxis.axis_label = "Time (s)"
p1.add_layout(LinearAxis(y_range_name='memory', axis_label='Memory (MB)'), 'right')


In [36]:
pmem

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [21]:
import numpy as np

In [24]:
integral = np.sum([ v*t for v, t in zip(t, pmem)])

In [25]:
integral

0.0

In [58]:
t = [r.time for r in res.results]
rmem = [r.rmem for r in res.results]
smem = [r.smem for r in res.results]
umem = [r.umem for r in res.results]
left, right = min(t), max(t)
t = [i - left for i in t]

In [18]:
first

ResourceData(time=1528108965.063339, cpu=0.0, pmem=0.08772661385327238, rmem=217.239552, umem=10.203136, smem=28.270592, rio=0.393216, wio=0.0)

In [23]:
import bokeh.plotting as bp

In [28]:
from bokeh.plotting import figure, output_file, show

In [54]:
from bokeh import palettes
palette='Viridis'
colors = palettes.all_palettes[palette][6]

In [52]:
p = bp.figure(plot_width=800, plot_height=400,  x_range=[0., right - left], y_range=[0.,1.1*max(smem)] )

In [53]:
p.xaxis.axis_label = "Time (s)"

In [59]:
# p.square(t, pmem, line_width=4, legend="Memory", line_color='green')
p.line(t, smem, color=colors[3], line_width=4, legend="Shared Memory")
p.line(t, umem, color=colors[2], line_width=4, legend="USS Memory")
p.line(t, rmem, color=colors[4], line_width=4, legend="Memory")

p.xaxis.bounds = (0, right-left)

In [60]:
show(p)

In [None]:


p = bp.figure(plot_width=600, plot_height=400,  x_range=[0., right - left], y_range=[0.,1.] )

p.xaxis.axis_label = "Time (s)"

p.line(t, pmem, line_width=4, legend="Memory")
p.xaxis.bounds = (0, right-left)
show(p)

In [29]:
len(pmem), len(t)

(1859, 1859)

In [8]:
print res

<profile.ResourceProfiler object at 0x7f56d8076590>


In [38]:
res.results[100]

ResourceData(time=1528045733.817296, mem=0.0, cpu=0, pmem=0, uss=0.0, rio=0.0, wio=0.0)

In [None]:
memory

In [11]:
from multiprocessing import Process, Pipe, current_process

In [10]:
import psutil

In [13]:
parent = psutil.Process(current_process().pid)

In [14]:
parent.as_dict()

{'cmdline': ['/usr/bin/python',
  '-m',
  'casakern',
  '-f',
  '/users/jbochenek/.local/share/jupyter/runtime/kernel-ac24b9f8-681f-460e-97ba-2dca4d32c48e.json'],
 'connections': [pconn(fd=59, family=2, type=1, laddr=addr(ip='127.0.0.1', port=40723), raddr=addr(ip='127.0.0.1', port=60622), status='ESTABLISHED'),
  pconn(fd=35, family=2, type=1, laddr=addr(ip='127.0.0.1', port=35150), raddr=(), status='LISTEN'),
  pconn(fd=23, family=2, type=1, laddr=addr(ip='127.0.0.1', port=57609), raddr=(), status='LISTEN'),
  pconn(fd=26, family=2, type=1, laddr=addr(ip='127.0.0.1', port=58184), raddr=(), status='LISTEN'),
  pconn(fd=20, family=2, type=1, laddr=addr(ip='127.0.0.1', port=56753), raddr=(), status='LISTEN'),
  pconn(fd=58, family=2, type=1, laddr=addr(ip='127.0.0.1', port=40723), raddr=addr(ip='127.0.0.1', port=40242), status='ESTABLISHED'),
  pconn(fd=57, family=2, type=1, laddr=addr(ip='127.0.0.1', port=56753), raddr=addr(ip='127.0.0.1', port=39102), status='ESTABLISHED'),
  pconn(fd

In [16]:
parent.memory_percent()

0.10117746559554308

In [22]:
parent.memory_full_info()

pfullmem(rss=250830848, vms=1743331328, shared=114933760, text=3043328, lib=0, data=821460992, dirty=0, uss=244797440, pss=247534592, swap=0)

In [19]:
psutil.version_info

(5, 4, 5)

In [23]:
parent.memory_full_info().rss

250830848