In [None]:
import os.path
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline
pd.set_option("display.max.columns", None)

# Thread benchmarks results for "don" and "Fedora-i9"

## Initial exploration of data

Review data to find configurations to focus on : core, work counts for which results are stable

**NOTE times are calculated on a per work unit eg time/count where counts is the number of times the work was done**

In [None]:
# TSC Value obtained with: sudo bpftrace -e 'BEGIN { printf("%u\n", *kaddr("tsc_khz")); exit(); }
TSC={}
TSC["don"]=2599950
TSC["fedora_i9"]=2400023
# time_seconds = (tsc_count_end - tsc_count_start) * 1.0/(tsc_frequency * 1000)
results = {}


# time in nano-seconds
def timens(value,m):
    return (value* (1.0/(TSC[m]*1000))*1000000000)

def processData(WORK=["NULLLOOP", "NOPLOOP", "4NOPLOOP"], 
                CPU0S=[0,1,3,5], CPU1S=[3,7,17,46], 
                MACHINES=["don", "fedora_i9"], 
                DATES=["current"], 
                TYPES=["LOCAL_WORK", "LOCAL_WORK_NO_SERVERTHREAD","LOCAL_WORK_WITH_REMOTE_READ_SHARED",
                       "LOCAL_WORK_WITH_REMOTE_RW_SHARED","REMOTE_WORK_READ_SHARED","REMOTE_WORK_RW_SHARED"],
                COUNTS=[100, 200, 400, 800, 1600, 3200, 6400, 12800, 25600, 51200, 102400],
                VERBOSE=False):
    for W in WORK:
        for CPU0 in CPU0S:
            for CPU1 in CPU1S:
                for MACHINE in MACHINES:
                    for DATE in DATES:
                        for TYPE in TYPES:
                            KEY=MACHINE+"-"+str(CPU0)+","+str(CPU1)+"-"+W+"-"+TYPE
                            results[KEY] =  pd.DataFrame(columns=['count','ctime','wtime','cerr','werr'])
                            for COUNT in COUNTS:
                                FILE="data/"+MACHINE+"/"+DATE+"/bm."+W+"_"+TYPE+"_"+str(COUNT)+"_"+str(CPU0)+"_"+str(CPU1)+".times"
                                if os.path.exists(FILE):
                                    if VERBOSE:
                                        print("processing:" + FILE)
                                    data=pd.read_csv(FILE)
                                    count=data['count'][0]
                                    cpu0=data['cpu0'][0]
                                    cpu1=data['cpu1'][0]
                                    data['pctime']=timens(data['ctime']/count, MACHINE)
                                    data['pwtime']=timens(data['wtime']/count, MACHINE)
                                    minCtime=timens(data['pctime'].min(), MACHINE)
                                    maxCtime=timens(data['pctime'].max(), MACHINE)
                                    meanCtime=timens(data['pctime'].mean(), MACHINE)
                                    errCtime=np.array([meanCtime - minCtime, maxCtime - meanCtime])
                                    minWtime=timens(data['pwtime'].min(),MACHINE)
                                    maxWtime=timens(data['pwtime'].max(),MACHINE)
                                    meanWtime=timens(data['pwtime'].mean(),MACHINE)
                                    errWtime=np.array([meanWtime - minWtime, maxWtime - meanWtime])
                                    newres = pd.DataFrame([{'count':count, 'ctime':meanCtime, 'wtime':meanWtime, 'cerr':errCtime, 'werr':errWtime}])
                                    results[KEY] = pd.concat([results[KEY], newres], axis=0,ignore_index=True)

In [None]:
def plot(KEYS, loff=0):
    plt.figure()
    fig, ax = plt.subplots()
    for key in KEYS:
#    ax.set_ylim(bottom=0)
        ax.errorbar(results[key]["count"],results[key]["ctime"],
                    yerr=np.array(results[key]['cerr'].tolist()).T,
                    label=key+"_CTIME", marker='x')
        ax.errorbar(results[key]["count"],results[key]["wtime"],
                    yerr=np.array(results[key]['werr'].tolist()).T,
                    label=key+"_WTIME", marker='o', linestyle="dotted")
#    plt.ylim(bottom=0)
    # ax.set_yscale('log')
    ax.set(xlabel='work count', ylabel='time in nano-seconds')
    plt.legend(bbox_to_anchor=(0,1.4+loff), loc='upper center', ncol=1)
    plt.show()

In [None]:
def data(KEYS):
    for key in KEYS:
        print(key)
        display(results[key])
        print("mean ctime: " + str(results[key]["ctime"].mean()) + " mean wtime: " + str(results[key]["wtime"].mean()))

In [None]:
processData()

In [None]:
cores={}
machines=["don", "fedora_i9"]
cores["don"]="3,17"
cores["fedora_i9"]="1,7"

In [None]:
plot(loff=.4, KEYS=['don-3,17-NOPLOOP-LOCAL_WORK', 'don-3,46-NOPLOOP-LOCAL_WORK', 'don-5,17-NOPLOOP-LOCAL_WORK', 'don-5,46-NOPLOOP-LOCAL_WORK'])
plot(loff=.4, KEYS=['fedora_i9-0,3-NOPLOOP-LOCAL_WORK', 'fedora_i9-0,7-NOPLOOP-LOCAL_WORK', 'fedora_i9-1,3-NOPLOOP-LOCAL_WORK', 'fedora_i9-1,7-NOPLOOP-LOCAL_WORK'])

### Core Stability
Based on the above core pairs seem stable and equivalent


In [None]:
for m in machines:
    plot(KEYS=[m+'-'+cores[m]+'-NOPLOOP-LOCAL_WORK', m+'-'+cores[m]+'-NOPLOOP-LOCAL_WORK_NO_SERVERTHREAD'])
for m in machines:
    plot(loff=.4,KEYS=[m+'-'+cores[m]+'-NOPLOOP-LOCAL_WORK', m+'-'+cores[m]+'-NOPLOOP-LOCAL_WORK_WITH_REMOTE_READ_SHARED', m+'-'+cores[m]+'-NOPLOOP-REMOTE_WORK_READ_SHARED'])

###  Count Stability : Ignore smaller counts

there seems to be some noise when the process runs for a too short a time ... we focus on the larger counts.... we would expect the timer to suffer more noise at very large counts as interrupts and scheduling events will perturb TSC values

From here on we are 

In [None]:
# reprocess results for only the large stable counts
processData(COUNTS=[12800, 25600, 102400])

In [None]:
plot(loff=.6, KEYS=['don-3,17-NOPLOOP-LOCAL_WORK', 'don-3,46-NOPLOOP-LOCAL_WORK', 'don-5,17-NOPLOOP-LOCAL_WORK', 'don-5,46-NOPLOOP-LOCAL_WORK'])
plot(loff=.6, KEYS=['fedora_i9-0,7-NOPLOOP-LOCAL_WORK', 'fedora_i9-0,3-NOPLOOP-LOCAL_WORK', 'fedora_i9-1,7-NOPLOOP-LOCAL_WORK', 'fedora_i9-1,3-NOPLOOP-LOCAL_WORK'])

In [None]:
for m in machines:
    plot(KEYS=[m+'-'+cores[m]+'-4NOPLOOP-LOCAL_WORK'])

In [None]:
for m in machines:
    plot(KEYS=[m+'-'+cores[m]+'-NULLLOOP-LOCAL_WORK', m+'-'+cores[m]+'-NOPLOOP-LOCAL_WORK', m+'-'+cores[m]+'-4NOPLOOP-LOCAL_WORK'], loff=0.2)

In [None]:
for m in machines:
    plot([m+'-'+cores[m]+'-NOPLOOP-LOCAL_WORK',m+'-'+cores[m]+'-NOPLOOP-LOCAL_WORK_NO_SERVERTHREAD'])

In [None]:
for m in machines:
    plot([m+'-'+cores[m]+'-NOPLOOP-LOCAL_WORK', m+'-'+cores[m]+'-NOPLOOP-LOCAL_WORK_WITH_REMOTE_READ_SHARED'])

In [None]:
for m in machines:
    plot([m+'-'+cores[m]+'-NOPLOOP-LOCAL_WORK',  m+'-'+cores[m]+'-NOPLOOP-REMOTE_WORK_READ_SHARED'], loff=0.2)
    data([m+'-'+cores[m]+'-NOPLOOP-LOCAL_WORK',  m+'-'+cores[m]+'-NOPLOOP-REMOTE_WORK_READ_SHARED'])

## Summary of Results

Lowest overheads seem to be with single WR line remote transport for both machines.  These graphs and tables summaries the results

In [None]:
plot([
        'don'+'-'+cores['don']+'-NOPLOOP-LOCAL_WORK',  'don'+'-'+cores['don']+'-NOPLOOP-REMOTE_WORK_READ_SHARED',
        'fedora_i9'+'-'+cores['fedora_i9']+'-NOPLOOP-LOCAL_WORK',  'fedora_i9'+'-'+cores['fedora_i9']+'-NOPLOOP-REMOTE_WORK_READ_SHARED'
     ], loff=0.29)

In [None]:
dlkey='don'+'-'+cores['don']+'-NOPLOOP-LOCAL_WORK'
dreadrkey='don'+'-'+cores['don']+'-NOPLOOP-REMOTE_WORK_READ_SHARED'
dreadlwork=results[dlkey]["wtime"].mean()
dreadlctime=results[dlkey]["ctime"].mean()
dreadrctime=results[drkey]["ctime"].mean()
freadlkey='fedora_i9'+'-'+cores['fedora_i9']+'-NOPLOOP-LOCAL_WORK'
freadrkey='fedora_i9'+'-'+cores['fedora_i9']+'-NOPLOOP-REMOTE_WORK_READ_SHARED'
freadlwork=results[flkey]["wtime"].mean()
freadlctime=results[flkey]["ctime"].mean()
freadrctime=results[frkey]["ctime"].mean()
summary = pd.DataFrame({"machine":["don-read", "fedora_i9-read"], 
                        "work (ns)":[dreadlwork,freadlwork], 
                        "local (ns)":[dreadlctime,freadlctime], 
                        "remote (ns)":[dreadrctime,freadrctime], 
                        "time and loop overhead (ns)":[dreadlctime-dreadlwork, freadlctime-freadlwork], 
                        "remote overhead (ns)":[dreadrctime-dreadlctime,freadrctime-freadlctime ]})
display(summary)

In [None]:
plot([
        'don'+'-'+cores['don']+'-NOPLOOP-LOCAL_WORK',  'don'+'-'+cores['don']+'-NOPLOOP-REMOTE_WORK_RW_SHARED',
        'fedora_i9'+'-'+cores['fedora_i9']+'-NOPLOOP-LOCAL_WORK',  'fedora_i9'+'-'+cores['fedora_i9']+'-NOPLOOP-REMOTE_WORK_RW_SHARED'
     ], loff=0.29)

In [None]:
dlkey='don'+'-'+cores['don']+'-NOPLOOP-LOCAL_WORK'
drkey='don'+'-'+cores['don']+'-NOPLOOP-REMOTE_WORK_RW_SHARED'
dlwork=results[dlkey]["wtime"].mean()
dlctime=results[dlkey]["ctime"].mean()
drctime=results[drkey]["ctime"].mean()
flkey='fedora_i9'+'-'+cores['fedora_i9']+'-NOPLOOP-LOCAL_WORK'
frkey='fedora_i9'+'-'+cores['fedora_i9']+'-NOPLOOP-REMOTE_WORK_RW_SHARED'
flwork=results[flkey]["wtime"].mean()
flctime=results[flkey]["ctime"].mean()
frctime=results[frkey]["ctime"].mean()
summary = pd.DataFrame({"machine":["don-rw", "fedora_i9-rw"], 
                        "work (ns)":[dlwork,flwork], 
                        "local (ns)":[dlctime,flctime], 
                        "remote (ns)":[drctime,frctime], 
                        "time and loop overhead (ns)":[dlctime-dlwork, flctime-flwork], 
                        "remote overhead (ns)":[drctime-dlctime,frctime-flctime ]})
display(summary)