In [None]:
%matplotlib notebook
%pylab

<hr style="border-width:4px; border-color:coral; border-style:solid"/></br>

## Create  timing data

<hr style="border-width:4px; border-color:coral; border-style:solid"/>

This notebook shows you one way to create data from MPI runs.  In this example, we create files `trap_01.out`, `trap_02.out`, `trap_02.out` and so on for runs on 1, 2, 4, etc processors. You may need to modify thsi code so it works for your setup.

This code assumes that you are writing your output in binary.  The advantages of the binary output is that we store the full precision of the data in the smallest possible file size. 

For timing data, we need much larger values of `N` to see good scaling. So instead of providing `N` values, we specify a range of exponents `p` and define `N=2^p`. 

In [None]:
procs = [1,2,4,8]   # set to 1,2,4, and 8
exec_file = 'integral'
pvec = array(range(5,10))     # N = 2^p

We create a Pandas DataFrame, using a MultiIndex, so that we can store values for multiple values of the processor counts, and multiple N values. 

In [None]:
import pandas

idx = pandas.IndexSlice

index = pandas.MultiIndex.from_product([procs,pvec],names=['Proc','p'])
cols = ['Time']    # Data to store in the multi-index

df_time = pandas.DataFrame(index=index,columns = cols)

In [None]:
import subprocess
import shlex
import timeit
import os

# Define data to read from output files
dt = dtype([('N','int32'), ('Error','d')])  

# Output file
outfile = '{f:s}_{np:02}.out'.format

# mpirun command
shell_cmd = 'mpirun -n {np:d} {fexec:s} {p:d}'.format

for np in procs:
    output_fname = outfile(f=exec_file,np=np)    
    try:
        os.remove(output_fname)
    except:
        pass
    tvec = []
    for p in pvec:
        cmd = shell_cmd(np=np,fexec=exec_file,p=p)
        arg_list = shlex.split(cmd) 
        T = timeit.repeat(stmt='subprocess.run(arg_list)', \
                          repeat = 1, \
                          number = 1, \
                          globals=globals())        
        tvec.append(min(array(T)))
        
    fout = open(output_fname,"rb")
    data = fromfile(fout,dtype=dt)
    fout.close()

    df_time.loc[idx[np,:],'Time'] = tvec
    
df_time.loc[idx[:],'Time']= df_time.loc[idx[:],'Time'].astype('float')
df_time

We see that for $N=2^{28}$, we seem to get good scaling results.  So let's plot those values. 

In [None]:
figure(1)
clf()

df_plot = df_time.loc[idx[:,28],'Time']

loglog(procs,df_plot,'.-',markersize=15,label='Time')

In [None]:
# Add slope to get best fit line
figure(1)

c = polyfit(log(procs[:-1]),log(df_plot.values[:-1]),1)
loglog(procs,exp(polyval(c,log(procs))),'r*-', markersize=10,\
         label='Best-fit line (slope={:6.2f})'.format(c[0]),linewidth=1)

In [None]:
# Add title, xlabel, ylabel, xticks and a legend
figure(1)

def fix_xticks(Nvec):
    p0 = log2(Nvec[0])
    p1 = log2(Nvec[-1])
    xlim([2**(p0-0.5), 2**(p1+0.5)])
    
    # Make nice tick marks
    pstr = (['{:d}'.format(int(N)) for N in Nvec])
    xticks(Nvec,pstr)

fix_xticks(procs)  # Need numpy array, not a Pandas 'Series'
xlabel("Procs",fontsize=16)
ylabel("Time",fontsize=16)
title("Timing in trapezoidal method",fontsize=18)
legend()