# Analyse kernel profiling

Sample cmd 
```
ck benchmark program:caffe --env.CK_CAFFE_BATCH_SIZE=1 \
  --deps.lib-caffe=cb3e77cde4b54140 --deps.caffemodel=ae96844061a5678d \
  --cmd_key=time_gpu --dvdt_prof --skip_stat_analysis \
  --tags=prof,alexnet --record --record_uoa=prof-training-alexnet \
  --repetitions=3
```

## Includes

### Standard

In [57]:
import os
import sys
import json
import time

### Scientific

In [58]:
import IPython as ip
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib as mp

In [59]:
print('IPython version: %s' % ip.__version__)
print('NumPy version: %s' % np.__version__)
print('SciPy version: %s' % sp.__version__)
print('Pandas version: %s' % pd.__version__)
print('Matplotlib version: %s' % mp.__version__)

IPython version: 5.3.0
NumPy version: 1.12.0
SciPy version: 0.18.1
Pandas version: 0.19.2
Matplotlib version: 2.0.0


In [60]:
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline
# import scipy.stats as st

### Collective Knowledge

In [61]:
import ck.kernel as ck
print('CK version: %s' % ck.__version__)

CK version: 1.8.7


## Access experimental results

In [62]:
def get_experimental_results(tags):
    search = 'search'
    repo_uoa = 'local'
    module_uoa = 'experiment'
    r=ck.access({'action':search, 'module_uoa':module_uoa, 'tags':tags})
    if r['return']>0:
        print ("Error: %s" % r['error'])
        exit(1)
    experiments=r['lst']
    dfs = []
    for experiment in experiments:
        data_uoa = experiment['data_uoa']
        r = ck.access({'action':'list_points', 'repo_oua':repo_uoa, 'module_uoa':module_uoa, 'data_uoa':data_uoa})
        if r['return']>0:
            print ("Error: %s" % r['error'])
            exit(1)
        path = r['path']
        points = r['points']
        for point in points:
            with open(os.path.join(path, 'ckp-%s.0001.json' % point)) as point_file:
                point_data_raw = json.load(point_file)
            # DataFrame columns.
            characteristics = [
                {
                    'time (ms)'   : np.float32(characteristics['run'].get('time_fw_ms',0)),
                    'per_layer_info': characteristics['run'].get('per_layer_info',[]),
                    'dvdt_prof'   : characteristics['run'].get('dvdt_prof',[])
                }
                for characteristics in point_data_raw['characteristics_list'] 
                if characteristics['run'].get('run_success','')!=''
            ]
            df = pd.DataFrame(characteristics)
            df.columns.name = 'run characteristic'
            df.index.name = 'repetition'
            # DataFrame indices.
            df['program'] = point_data_raw['choices']['data_uoa']
            df['tags'] = 'unknown'
            df = df.set_index(['program','tags'], append=True)
            df = df.reorder_levels(('program', 'tags', 'repetition'))
            dfs.append(df)
    results = pd.concat(dfs)
#     for i in characteristics:
#         print i['per_layer_info']
#         print "###############################################################################"
#         print i['dvdt_prof']
    return results

In [63]:
results = get_experimental_results('alexnet,prof')

## Show execution time

In [64]:
results

Unnamed: 0_level_0,Unnamed: 1_level_0,run characteristic,dvdt_prof,per_layer_info,time (ms)
program,tags,repetition,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
caffe,unknown,0,[{u'timestamp': {u'start': u'2017-03-03T15:52:...,"[{u'index': 0, u'direction': u'forward', u'tim...",23.0462
caffe,unknown,1,[{u'timestamp': {u'start': u'2017-03-03T15:52:...,"[{u'index': 0, u'direction': u'forward', u'tim...",19.5156
caffe,unknown,2,[{u'timestamp': {u'start': u'2017-03-03T15:52:...,"[{u'index': 0, u'direction': u'forward', u'tim...",18.2241
caffe,unknown,0,[],"[{u'index': 0, u'direction': u'forward', u'tim...",4.48102
caffe,unknown,1,[],"[{u'index': 0, u'direction': u'forward', u'tim...",4.82099
caffe,unknown,2,[],"[{u'index': 0, u'direction': u'forward', u'tim...",4.78851
caffe,unknown,0,[{u'timestamp': {u'start': u'2017-03-03T15:51:...,"[{u'index': 0, u'direction': u'forward', u'tim...",22.915199
caffe,unknown,1,[{u'timestamp': {u'start': u'2017-03-03T15:51:...,"[{u'index': 0, u'direction': u'forward', u'tim...",20.4767
caffe,unknown,2,[{u'timestamp': {u'start': u'2017-03-03T15:51:...,"[{u'index': 0, u'direction': u'forward', u'tim...",18.3388


In [65]:
results[['time (ms)']]

Unnamed: 0_level_0,Unnamed: 1_level_0,run characteristic,time (ms)
program,tags,repetition,Unnamed: 3_level_1
caffe,unknown,0,23.0462
caffe,unknown,1,19.5156
caffe,unknown,2,18.2241
caffe,unknown,0,4.48102
caffe,unknown,1,4.82099
caffe,unknown,2,4.78851
caffe,unknown,0,22.915199
caffe,unknown,1,20.4767
caffe,unknown,2,18.3388


In [66]:
results[['dvdt_prof']]

Unnamed: 0_level_0,Unnamed: 1_level_0,run characteristic,dvdt_prof
program,tags,repetition,Unnamed: 3_level_1
caffe,unknown,0,[{u'timestamp': {u'start': u'2017-03-03T15:52:...
caffe,unknown,1,[{u'timestamp': {u'start': u'2017-03-03T15:52:...
caffe,unknown,2,[{u'timestamp': {u'start': u'2017-03-03T15:52:...
caffe,unknown,0,[]
caffe,unknown,1,[]
caffe,unknown,2,[]
caffe,unknown,0,[{u'timestamp': {u'start': u'2017-03-03T15:51:...
caffe,unknown,1,[{u'timestamp': {u'start': u'2017-03-03T15:51:...
caffe,unknown,2,[{u'timestamp': {u'start': u'2017-03-03T15:51:...


## Plot execution time

In [67]:
def plot(mean, std):
    mean \
        .plot(yerr=std, title='Execution time (ms)', kind='bar', colormap=cm.autumn,
            figsize=[16, 8], rot=0, grid=True, legend=True) \
        .legend(loc='upper left')

## Show profiling info

In [68]:
# Pick the first repetition of the first experiment for now.
trace = results['dvdt_prof'].iloc[0]
trace_layer = results['per_layer_info'].iloc[0]
if not trace:
    raise Exception("No OpenCL profiling information!")
# What's that experiment, by the way?
results['dvdt_prof'].index[0]


(u'caffe', 'unknown', 0)

In [69]:
r=ck.access({'action':'show', 'module_uoa':'env', 'tags':'tool,opencl,dvdt,prof'})
if r['return']>0:
    print ("Error: %s" % r['error'])
    exit(1)
# Get path the first returned environment entry.
dvdt_prof_dir=r['lst'][0]['meta']['env']['CK_ENV_TOOL_DVDT_PROF']
dvdt_prof_src_python=os.path.join(dvdt_prof_dir,'src','python')
sys.path.append(dvdt_prof_src_python)
import prof_wrangler as pw
pw.test()
import prof_common as pc
pc.test()

prof_wrangler.py
prof_common.py


In [70]:
trace = pw.index_calls(trace)
unit = 'ms'
trace_layer = pw.index_calls(trace_layer)
unit2 = 'ms'


In [71]:
print len(trace)
print len(trace_layer)

2239
48


### Kernel enqueues

In [72]:
# Partial trace only containing kernel enqueues.
kernel_enqueues = pw.filter_calls(trace, ['clEnqueueNDRangeKernel'])
# Kernel enqueues as a DataFrame.
df_kernel_enqueues = pw.df_kernel_enqueues(kernel_enqueues, unit)
df_kernel_enqueues

Unnamed: 0_level_0,Unnamed: 1_level_0,lws0,lws1,lws2,gws0,gws1,gws2,gwo0,gwo1,gwo2,t1 - t0 (ms),p3 - p0 (ms),p3 - p2 (ms),p2 - p1 (ms),p1 - p0 (ms)
call_index,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
11,fillbuffer_float,128,1,1,16384,1,1,0,0,0,0.856,0.026368,0.016384,0.005376,0.004608
17,fillbuffer_float,128,1,1,16384,1,1,0,0,0,0.042,0.017408,0.012032,0.002816,0.002560
23,fillbuffer_float,128,1,1,16384,1,1,0,0,0,0.866,0.043520,0.035840,0.003584,0.004096
41,im2col_float,128,1,1,16384,1,1,0,0,0,0.068,0.051968,0.046080,0.003072,0.002816
62,XgemmDirectTN,8,8,1,24,760,1,0,0,0,0.136,0.116480,0.108544,0.003584,0.004352
85,XgemmDirectTN,8,8,1,24,760,1,0,0,0,0.068,0.050432,0.043776,0.003072,0.003584
90,relu_forward_float,128,1,1,16384,1,1,0,0,0,0.031,0.016896,0.011264,0.002816,0.002816
96,fillbuffer_float,128,1,1,16384,1,1,0,0,0,0.045,0.017152,0.011264,0.003328,0.002560
102,fillbuffer_float,128,1,1,16384,1,1,0,0,0,0.858,0.020480,0.012288,0.004352,0.003840
115,lrn_full_float,0,0,0,3025,1,1,0,0,0,0.083,0.066560,0.061184,0.002816,0.002560


In [73]:
df_kernel_enqueues['count'] = 1

In [74]:
df_kernel_enqueues.groupby(level='name').sum()

Unnamed: 0_level_0,lws0,lws1,lws2,gws0,gws1,gws2,gwo0,gwo1,gwo2,t1 - t0 (ms),p3 - p0 (ms),p3 - p2 (ms),p2 - p1 (ms),p1 - p0 (ms),count
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Xaxpy,512,2,2,2048,2,2,0,0,0,0.065,0.031232,0.018432,0.006144,0.006656,2
XaxpyFast,1024,4,4,16384,4,4,0,0,0,0.147,0.065536,0.035072,0.014592,0.015872,4
XgemmDirectNN,24,24,3,2304,4352,3,0,0,0,4.442,4.378368,4.354816,0.01024,0.013312,3
XgemmDirectTN,208,208,26,1376,4912,26,0,0,0,2.934,2.470656,2.295808,0.082688,0.09216,26
XgemmDirectTT,64,64,8,344,3008,8,0,0,0,1.079,0.918272,0.858112,0.030464,0.029696,8
Xgemv,320,10,10,12640,10,10,0,0,0,0.989,0.79616,0.725504,0.035072,0.035584,10
XgemvFastRot,128,4,4,16384,4,4,0,0,0,2.537,2.448896,2.418432,0.015104,0.01536,4
dropout_forward_float,512,4,4,65536,4,4,0,0,0,0.117,0.054272,0.029952,0.013312,0.011008,4
fill_float,384,3,3,49152,3,3,0,0,0,0.084,0.038656,0.02048,0.009984,0.008192,3
fillbuffer_float,7040,55,55,901120,55,55,0,0,0,17.86,2.756864,2.37696,0.20096,0.178944,55


In [75]:
# df_kernel_enqueues.info(memory_usage=True)

In [76]:
num_enqueues_total = len(kernel_enqueues)
num_enqueues_per_repetition = 4
df_kernel_enqueues['kernel_index'] = (pd.Series(range(num_enqueues_total)) % num_enqueues_per_repetition).values
df_kernel_enqueues = df_kernel_enqueues \
    .set_index('kernel_index', append=True) \
    .reorder_levels(['call_index','kernel_index','name'])

In [77]:
df_kernel_enqueues_stats = df_kernel_enqueues.groupby(level='kernel_index').describe()
df_kernel_enqueues_stats

Unnamed: 0_level_0,Unnamed: 1_level_0,lws0,lws1,lws2,gws0,gws1,gws2,gwo0,gwo1,gwo2,t1 - t0 (ms),p3 - p0 (ms),p3 - p2 (ms),p2 - p1 (ms),p1 - p0 (ms),count
kernel_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,count,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0
0,mean,98.0,2.75,1.0,10103.272727,91.295455,1.0,0.0,0.0,0.0,0.19025,0.061661,0.054906,0.003409,0.003345,1.0
0,std,69.015671,3.066132,0.0,7710.317064,233.187975,0.0,0.0,0.0,0.0,0.307862,0.091004,0.090583,0.000602,0.000682,0.0
0,min,8.0,1.0,1.0,24.0,1.0,1.0,0.0,0.0,0.0,0.024,0.011776,0.006144,0.00256,0.002304,1.0
0,25%,26.0,1.0,1.0,216.0,1.0,1.0,0.0,0.0,0.0,0.038,0.015808,0.008768,0.003072,0.002816,1.0
0,50%,128.0,1.0,1.0,16384.0,1.0,1.0,0.0,0.0,0.0,0.0445,0.02176,0.015872,0.003328,0.003328,1.0
0,75%,128.0,2.75,1.0,16384.0,12.75,1.0,0.0,0.0,0.0,0.14025,0.062208,0.05664,0.003648,0.00384,1.0
0,max,256.0,8.0,1.0,16384.0,1024.0,1.0,0.0,0.0,0.0,1.306,0.4608,0.452352,0.005376,0.005376,1.0
1,count,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0,44.0
1,mean,101.090909,2.227273,0.954545,12134.045455,75.909091,1.0,0.0,0.0,0.0,0.194045,0.099299,0.092881,0.003334,0.003084,1.0


# KERNEL XgemmDirectNN

In [86]:
# Partial trace only containing kernel enqueues.
kernel_enqueues = pw.filter_calls(trace, ['clEnqueueNDRangeKernel'])
# Kernel enqueues as a DataFrame.
df_kernel_enqueues = pw.df_kernel_enqueues(kernel_enqueues, unit)
df_xgemm_directnn_enqueues = df_kernel_enqueues.swaplevel().ix['XgemmDirectNN']
df_xgemm_directnn_enqueues
df_xgemm_directnn_enqueues['kSizeM'] = 'N/A'
df_xgemm_directnn_enqueues['kSizeN'] = 'N/A'
df_xgemm_directnn_enqueues['kSizeK'] = 'N/A'

setkernel_enqueues = pw.filter_calls(trace, ['clSetKernelArg'])
kernel_entries = pw.filter_calls(trace, ['clEnqueueNDRangeKernel'])
xgemm_entries = []
for kernel_count in kernel_entries:
    if kernel_count['name'] == 'XgemmDirectNN':
        xgemm_index = kernel_count['kernel']
        xgemm_matrix_sizes = []
        for k in setkernel_enqueues:
            if (k['kernel'] == xgemm_index) and (k['arg_index'] == 0 or k['arg_index'] == 1 or k['arg_index'] == 2):
                tmp = pc.hex_str_as_int(k['arg_value'])
                xgemm_matrix_sizes.append(tmp)
        entry_matrix_sizes = tuple(xgemm_matrix_sizes)
        xgemm_entries.append(entry_matrix_sizes)

    
        
df_xgemm_directnn_enqueues[['kSizeM', 'kSizeN', 'kSizeK']] = xgemm_entries
# df_xgemm_directnn_enqueues

In [79]:
df_xgemm_directnn_enqueues['GFLOPS'] = 2 * 1.0e-9 * 1000 *df_xgemm_directnn_enqueues['kSizeM'] * df_xgemm_directnn_enqueues['kSizeN'] * df_xgemm_directnn_enqueues['kSizeK']/ df_xgemm_directnn_enqueues['p3 - p2 (ms)']

In [80]:
df_xgemm_directnn_enqueues

Unnamed: 0_level_0,lws0,lws1,lws2,gws0,gws1,gws2,gwo0,gwo1,gwo2,t1 - t0 (ms),p3 - p0 (ms),p3 - p2 (ms),p2 - p1 (ms),p1 - p0 (ms),kSizeM,kSizeN,kSizeK,GFLOPS
call_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1476,8,8,1,256,1024,1,0,0,0,0.309,0.288768,0.2816,0.003328,0.00384,1000,4096,1,29.090909
1541,8,8,1,1024,1024,1,0,0,0,1.121,1.09824,1.09056,0.003584,0.004096,4096,4096,1,30.768075
1606,8,8,1,1024,2304,1,0,0,0,3.012,2.99136,2.982656,0.003328,0.005376,4096,9216,1,25.312162


# KERNEL XgemmDirectTN


In [87]:
# Partial trace only containing kernel enqueues.
kernel_enqueues = pw.filter_calls(trace, ['clEnqueueNDRangeKernel'])
# Kernel enqueues as a DataFrame.
df_kernel_enqueues = pw.df_kernel_enqueues(kernel_enqueues, unit)
df_xgemm_directnn_enqueues = df_kernel_enqueues.swaplevel().ix['XgemmDirectTN']
df_xgemm_directnn_enqueues
df_xgemm_directnn_enqueues['kSizeM'] = 'N/A'
df_xgemm_directnn_enqueues['kSizeN'] = 'N/A'
df_xgemm_directnn_enqueues['kSizeK'] = 'N/A'

setkernel_enqueues = pw.filter_calls(trace, ['clSetKernelArg'])
kernel_entries = pw.filter_calls(trace, ['clEnqueueNDRangeKernel'])
xgemm_entries = []
for kernel_count in kernel_entries:
    if kernel_count['name'] == 'XgemmDirectTN':
        xgemm_index = kernel_count['kernel']
        xgemm_matrix_sizes = []
        for k in setkernel_enqueues:
            if (k['kernel'] == xgemm_index) and (k['arg_index'] == 0 or k['arg_index'] == 1 or k['arg_index'] == 2):
                tmp = pc.hex_str_as_int(k['arg_value'])
                xgemm_matrix_sizes.append(tmp)
        entry_matrix_sizes = tuple(xgemm_matrix_sizes)
        xgemm_entries.append(entry_matrix_sizes)
print len(xgemm_entries)

df_xgemm_directnn_enqueues[['kSizeM', 'kSizeN', 'kSizeK']] = xgemm_entries
df_xgemm_directnn_enqueues['GFLOPS'] = 2 * 1.0e-9 * 1000 *df_xgemm_directnn_enqueues['kSizeM'] * df_xgemm_directnn_enqueues['kSizeN'] * df_xgemm_directnn_enqueues['kSizeK']/ df_xgemm_directnn_enqueues['p3 - p2 (ms)']
df_xgemm_directnn_enqueues


26


Unnamed: 0_level_0,lws0,lws1,lws2,gws0,gws1,gws2,gwo0,gwo1,gwo2,t1 - t0 (ms),p3 - p0 (ms),p3 - p2 (ms),p2 - p1 (ms),p1 - p0 (ms),kSizeM,kSizeN,kSizeK,GFLOPS
call_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
62,8,8,1,24,760,1,0,0,0,0.136,0.11648,0.108544,0.003584,0.004352,96,3025,363,1942.349646
85,8,8,1,24,760,1,0,0,0,0.068,0.050432,0.043776,0.003072,0.003584,96,3025,1,13.267544
191,8,8,1,32,184,1,0,0,0,0.146,0.128256,0.121856,0.003072,0.003328,128,729,1200,1837.815126
210,8,8,1,32,184,1,0,0,0,0.151,0.1344,0.128,0.002816,0.003584,128,729,1200,1749.6
233,8,8,1,64,184,1,0,0,0,0.051,0.033792,0.027392,0.002816,0.003584,256,729,1,13.626168
339,8,8,1,96,48,1,0,0,0,0.207,0.189696,0.183296,0.002816,0.003584,384,169,2304,1631.463687
362,8,8,1,96,48,1,0,0,0,0.038,0.021504,0.01536,0.003072,0.003072,384,169,1,8.45
412,8,8,1,48,48,1,0,0,0,0.144,0.126976,0.119808,0.003328,0.00384,192,169,1728,936.0
431,8,8,1,48,48,1,0,0,0,0.141,0.123136,0.116736,0.003072,0.003328,192,169,1728,960.631579
454,8,8,1,96,48,1,0,0,0,0.041,0.02304,0.016384,0.003328,0.003328,384,169,1,7.921875


# KERNEL XgemmDirectTT


In [88]:
# Partial trace only containing kernel enqueues.
kernel_enqueues = pw.filter_calls(trace, ['clEnqueueNDRangeKernel'])
# Kernel enqueues as a DataFrame.
df_kernel_enqueues = pw.df_kernel_enqueues(kernel_enqueues, unit)
df_xgemm_directnn_enqueues = df_kernel_enqueues.swaplevel().ix['XgemmDirectTT']
df_xgemm_directnn_enqueues
df_xgemm_directnn_enqueues['kSizeM'] = 'N/A'
df_xgemm_directnn_enqueues['kSizeN'] = 'N/A'
df_xgemm_directnn_enqueues['kSizeK'] = 'N/A'

setkernel_enqueues = pw.filter_calls(trace, ['clSetKernelArg'])
kernel_entries = pw.filter_calls(trace, ['clEnqueueNDRangeKernel'])
xgemm_entries = []
for kernel_count in kernel_entries:
    if kernel_count['name'] == 'XgemmDirectTT':
        xgemm_index = kernel_count['kernel']
        xgemm_matrix_sizes = []
        for k in setkernel_enqueues:
            if (k['kernel'] == xgemm_index) and (k['arg_index'] == 0 or k['arg_index'] == 1 or k['arg_index'] == 2):
                tmp = pc.hex_str_as_int(k['arg_value'])
                xgemm_matrix_sizes.append(tmp)
        entry_matrix_sizes = tuple(xgemm_matrix_sizes)
        xgemm_entries.append(entry_matrix_sizes)
print len(xgemm_entries)

df_xgemm_directnn_enqueues[['kSizeM', 'kSizeN', 'kSizeK']] = xgemm_entries

df_xgemm_directnn_enqueues[['kSizeM', 'kSizeN', 'kSizeK']] = xgemm_entries
df_xgemm_directnn_enqueues['GFLOPS'] = 2 * 1.0e-9 * 1000 *df_xgemm_directnn_enqueues['kSizeM'] * df_xgemm_directnn_enqueues['kSizeN'] * df_xgemm_directnn_enqueues['kSizeK']/ df_xgemm_directnn_enqueues['p3 - p2 (ms)']

df_xgemm_directnn_enqueues

8


Unnamed: 0_level_0,lws0,lws1,lws2,gws0,gws1,gws2,gwo0,gwo1,gwo2,t1 - t0 (ms),p3 - p0 (ms),p3 - p2 (ms),p2 - p1 (ms),p1 - p0 (ms),kSizeM,kSizeN,kSizeK,GFLOPS
call_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1751,8,8,1,32,432,1,0,0,0,0.077,0.05888,0.052224,0.003072,0.003584,128,1728,169,1431.529412
1770,8,8,1,32,432,1,0,0,0,0.088,0.06016,0.052224,0.004608,0.003328,128,1728,169,1431.529412
1849,8,8,1,48,432,1,0,0,0,0.1,0.080128,0.072704,0.003584,0.00384,192,1728,169,1542.422535
1868,8,8,1,48,432,1,0,0,0,0.099,0.081408,0.074752,0.003072,0.003584,192,1728,169,1500.164384
1947,8,8,1,96,576,1,0,0,0,0.172,0.153344,0.145408,0.003584,0.004352,384,2304,169,2056.56338
2080,8,8,1,32,304,1,0,0,0,0.141,0.120576,0.11264,0.004352,0.003584,128,1200,729,1988.181818
2099,8,8,1,32,304,1,0,0,0,0.139,0.119808,0.111616,0.004608,0.003584,128,1200,729,2006.422018
2232,8,8,1,24,96,1,0,0,0,0.263,0.243968,0.236544,0.003584,0.00384,96,363,3025,891.294643


### Kernel per layer
 


In [83]:
def convert_time(t):
#     get hh:mm:ss:decilmals
    h,m,s = t.split(".")[0].split(":")  #[0] hours, minutes, seconds [1] milliseconds ... 
    tsec = (int(h)*3600) + (int(m)*60) + int(s)
    total = float(tsec) + float("0." + t.split(".")[1]) 
    return float(total)

In [84]:
print len(trace)
print len(trace_layer)
epoch_layer=[]
for tlc in trace_layer[1:len(trace_layer)]:
    epoch_layer.append(convert_time(tlc['timestamp'].split(" ")[1]))
     
print "````````````````````````````````````"
print tlc['timestamp'].split(" ")
print "````````````````````````````````````"
print trace[len(trace)-1]['timestamp']['end']
print "````````````````````````````````````"  
p = len(epoch_layer)
last_trace = 0


# t = trace[0]['timestamp']['end'].split("T")[1]
# t = convert_time(t)
# print ("first converted %s last epoch layer %s" %(t,epoch_layer[0]))



# t = trace[0]['timestamp']['end'].split("T")[1]
# t = convert_time(t)
# print ("second converted %s last epoch layer %s" %(t,epoch_layer[1]))

lc = 0
for i in range (0, len(trace)):
    t = trace[i]['timestamp']['end'].split("T")[1]
    nt = convert_time(t)
    
    if (nt < epoch_layer[lc]):
        print ("%s (%s) belongs to %s" %(nt, trace[i]['timestamp']['end'].split("T")[1],epoch_layer[lc]))
    else:
        print "##############################################"
        print "new layer %s ( how many trace %s)"% (lc,i)
        lc=lc+1


t = trace[len(trace)-1]['timestamp']['end'].split("T")[1]
t = convert_time(t)
print ("last converted %s last epoch layer %s" %(t,epoch_layer[p-1]))
    

2239
48
````````````````````````````````````
[u'0303', u'15:52:25.271373']
````````````````````````````````````
2017-03-03T15:52:25.271458
````````````````````````````````````
57145.014973 (15:52:25.014973) belongs to 57145.27118
57145.023487 (15:52:25.023487) belongs to 57145.27118
57145.035422 (15:52:25.035422) belongs to 57145.27118
57145.036137 (15:52:25.036137) belongs to 57145.27118
57145.115602 (15:52:25.115602) belongs to 57145.27118
57145.116714 (15:52:25.116714) belongs to 57145.27118
57145.116745 (15:52:25.116745) belongs to 57145.27118
57145.116772 (15:52:25.116772) belongs to 57145.27118
57145.116785 (15:52:25.116785) belongs to 57145.27118
57145.116797 (15:52:25.116797) belongs to 57145.27118
57145.116809 (15:52:25.116809) belongs to 57145.27118
57145.117681 (15:52:25.117681) belongs to 57145.27118
57145.117714 (15:52:25.117714) belongs to 57145.27118
57145.117729 (15:52:25.117729) belongs to 57145.27118
57145.11774 (15:52:25.117740) belongs to 57145.27118
57145.117751 (1