In [1]:
from IPython.display import HTML
HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')

In [2]:
# Import Collective Knowledge (http://github.com/ctuning/ck)
import ck.kernel as ck
print (ck.__version__)

In [3]:
__version__ = '4.2'

from IPython.display import display
import matplotlib
import matplotlib.pyplot as plt
import seaborn
%matplotlib inline

import os
import json
import pandas
import numpy
import collections
import enum
import re


# UXPERF - CPUParallelism

###### Requires wa result_processor to have 'cpustates' enabled

Detailed breakdown of Parallelism of the device

CPU Parallelism - Per Cluster
* A Chart per `workload` per `device`
* Chart represents per cluster the % time spent with N cores active,
  where N is the number of cores. 0 is IDLE/OFF

All averaging is defined in `AVG_METHODS`.
Valid methods are `median` and `mean`.

* `workload` = the wa workload provided in the agenda file
* `device` = the physical device the workload was ran on

In [4]:
# BASE_DIR is the base directory containing the wa data to process.
# It is expected to be in the following structure:
# BASE_DIR/
#     [1 or more] WORKLOAD_NAME/
#         [1 or more] DEVICE_NAME/
#             [detected by a folder called `__meta`] WA_DATA
#
# This follows the structure from wa-per-device.py script v2.0 onwards
#
# To generate new results, you should only need to change this line
BASE_DIR = ''

# List of averaging methods available
AVG_METHODS = enum.Enum('AVG_METHODS', 'MEDIAN MEAN')
# Set which averaging method to use here
avg_method = AVG_METHODS.MEDIAN

In [5]:
### CHART FORMATTING CODE ###

# Font and Dimensions
font_size = {'S': 14, 'M': 18, 'L': 22} # S=ticks, M=everything else, L=title
seaborn.set_context("notebook", rc={"figure.figsize" : [16,9]})
seaborn.set_style('whitegrid', {"font.family" : ["Gill Sans MT"]})

# ARM Template Colour Scheme
arm_colour = (18, 140, 171)
arm_green = (0, 169, 96)
arm_blue = (0, 195, 220)
arm_purple = (118, 95, 151)
arm_red = (207, 54, 74)
arm_grey = (144, 147, 147)

# Lambda Functions
rgb_to_hex = lambda rgb: '#'+format(rgb[0]<<16 | rgb[1]<<8 | rgb[2], '06x')

# Give a temperature based colour palette
# n_colors = Number of colours in the returned palette
# grey_end = Make sure the last colour is grey in the palette
def temperature_palette(n_colors, grey_end=True):
    if grey_end: 
        n_colors -= 1
    palette = seaborn.color_palette("RdYlGn", n_colors=n_colors)
    palette.reverse()
    if grey_end:
        palette.append(rgb_to_hex(arm_grey))
    return palette

# Using a single colour, generate a palette that scales the tint for n_colors
# n_colors = Number of colours for the tinted colours
# grey_end = Adds an additional colour, grey, to the end of the palette
# grey_start = Adds an additional colour, grey, to the start of the palette
def cluster_palette(color, n_colors, grey_end=True, grey_start=False):
    tint = lambda rgb, scale: ( int(rgb[0] + (255-rgb[0])*scale),
                                int(rgb[1] + (255-rgb[1])*scale),
                                int(rgb[2] + (255-rgb[2])*scale))
    palette = []
    col_count = n_colors
    if grey_start: 
        col_count -= 1
        palette.append(rgb_to_hex(arm_grey))
    if grey_end:
        col_count -= 1
    for i in range(n_colors):
        tmp = tint(color, float(i)/n_colors)
        palette.append(rgb_to_hex(tmp))
    if grey_end:
        palette.append(rgb_to_hex(arm_grey))
    return seaborn.color_palette(palette, n_colors=n_colors)

# Defines the formatting for plotting charts
# title = The title of the chart
# xlable = the xlabel of the chart
# ylabel = the ylabel of the chart
# xrot = rotation of the xticks
# ncol = number of columns for the legend to use
# legtitle = the legend title
# revleg_ax = reverse the legend. To enable this, you must pass in the axis of the chart to reverse
def arm_plot_formatting(title='', xlabel='', ylabel='', xrot=0, ncol=1, legtitle='', revleg_ax=None):
    if revleg_ax:
        h, l = revleg_ax.get_legend_handles_labels()
        revleg_ax.legend(reversed(h), reversed(l),
                         title=legtitle, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=font_size['M'], ncol=ncol)
    else:
        plt.legend(title=legtitle, loc='center left', bbox_to_anchor=(1, 0.5), fontsize=font_size['M'], ncol=ncol)
    plt.tick_params(labelsize=font_size['S'])
    plt.xticks(rotation=xrot)
    plt.title(title+'\n', fontsize=font_size['L'])
    plt.xlabel('\n'+xlabel, fontsize=font_size['M'], rotation=0, ha='center')
    plt.ylabel(ylabel+'\n', fontsize=font_size['M'], ha='center', va='center')
    
# Save the current plot to a file.
# TAG: To denote the type of chart generated
# ARGS: List of arguments to be appended to the image name
# This list will be hyphen separated and a PNG will be produced in the BASE_DIR directory
def arm_plot_save(tag, args):
    if not isinstance(args, list):
        args = [args]
    args = list(filter(None, args))
    fname = '-'.join([tag]+args)+'.png'
    plt.savefig(os.path.join(BASE_DIR, fname), bbox_inches='tight')

# Setup the Colour Palette using the ARM Template colours
arm_palette = seaborn.color_palette([rgb_to_hex(arm_colour), rgb_to_hex(arm_green), rgb_to_hex(arm_blue),
                                     rgb_to_hex(arm_purple), rgb_to_hex(arm_red), rgb_to_hex(arm_grey)], n_colors=6)
seaborn.set_palette(arm_palette)

In [6]:
### FUNCTIONS ###
# common to all uxperf scripts #

# Lambda Functions
dropzero = lambda x: x[(x.T != 0).any()]
normalise_data = lambda x: x-x.iloc[0,0]

# Parse the meta folder looking for the cluster information in the json file
def get_clusters(file_path):
    json_file = ''
    for f in os.listdir(file_path):
        if f.endswith('json'):
            json_file = os.path.join(file_path, f)
            break
    if os.path.isfile(json_file):
        with open(json_file, 'r') as j:
            data = json.load(j)
        try:
            clusters = data['device_config']['core_clusters']
            print ('Number of clusters:', len(set(clusters)))
        except:
            print ('Unable to find core_clusters information... Will assume one cluster...')
            clusters = None
    else:
        print ('Error could not find file:', file_path)
        clusters = None
    return clusters

# Parse status.txt for iterations that completed successfully and append its index to a list
def get_iterations(file_path):
    if os.path.isfile(file_path):
        iterations = []
        with open(file_path, 'r') as status:
            for line in status:
                if ('OK' in line) and ('Run status' not in line):
                    iteration = int(list(filter(None, line.strip().split(' ')))[2])
                    iterations.append(iteration)
        return iterations
    else:
        print ('Error could not find file:', file_path)
        return None

# Load the csv file and optionally drop unneccessary columns
def load_csv(file_path, drop=None):
    if os.path.isfile(file_path):
        data = pandas.read_csv(file_path)
        if drop:
            data = data.drop(drop, axis=1)
        return data
    else:
        print ('Error could not find file:', file_path)
        return None
    
# Only keep results in data from successful iterations
def process_iterations(data, iterations):
    if (data is not None) and (iterations is not None):
        return data[data['iteration'].map(lambda x: x in iterations)]
    else:
        return None

# Group and perform averaging on the results. Drop iteration column
def avg_iterations_by(data, group):
    if data is not None:
        groups = data.groupby(group, sort=False)
        if avg_method == AVG_METHODS.MEDIAN:
            avg = groups.median()
        elif avg_method == AVG_METHODS.MEAN:
            avg = groups.mean()
        avg = avg.drop('iteration', axis=1)
        avg.index.name = None
        return avg
    else:
        return None
    
# Drop rows that contain IDLE state and append a single IDLE state representing them all
def merge_idles(data):
    if data is not None:
        noidle = data[~data.index.str.contains('MWAIT|CPUIDLE|WFI|sleep')]
        idle = pandas.DataFrame([100-x for x in noidle.sum()], index=noidle.columns, columns=['IDLE']).T
        return noidle.append(idle)
    else:
        return None

# Convert KHz to MHz and drop the label, in place
def khz_to_mhz(data):
    if data is not None:
        mhz = []
        for state in data.index:
            if state is not 'IDLE':
                khz = int(state.replace('KHz', ''))
                mhz.append(khz/1000)
            else:
                mhz.append(state)
        data.index = mhz

# For multiple dataframes, perform averaging on their data and concat to one larger dataframe
# data_list expects 2 values per item. Device (title) and Data
def avg_columns(data_list):
    results = None
    for device, data in data_list:
        if avg_method == AVG_METHODS.MEDIAN:
            avg = data.median(axis=1)
        elif avg_method == AVG_METHODS.MEAN:
            avg = data.mean(axis=1)
        avg.name = device
        results = avg.to_frame() if results is None else pandas.concat([results, avg], axis=1)
    results.fillna(0, inplace=True)
    return dropzero(results)

In [7]:
### FUNCTIONS ###
# unique to this uxperf script #

## Process raw input data

input: `BASE_DIR`

output: `DATA_SET`

In [10]:
# Obtain ALL WA results entries with meta via CK
r=ck.access({'action':'search',
             'module_uoa':'wa-result',
             'add_meta':'yes'})
if r['return']>0: ck.jerr(r)
lst=r['lst']
ck.out('Number of CK entries: '+str(len(lst)))

In [12]:
#BASE_DIR='.'
dir_count = 0
DATA_SET = {}

for l in lst:
    folder=os.path.join(l['path'],'results')
    meta=l['meta']['meta']
    
    workload=meta['workload_name']
    device=meta['local_device_uoa']

#if os.path.isdir(BASE_DIR):
#    for workload in os.listdir(BASE_DIR):
#        w_dir = os.path.join(BASE_DIR, workload)
#        if os.path.isdir(w_dir):
#            for device in os.listdir(w_dir):
#                folder = os.path.join(w_dir, device)
#                if os.path.isdir(folder):
                    # Valid wa data folder
    if '__meta' in os.listdir(folder):
        print ('Processing folder:', folder)
        print ('Workload:', workload)
        print ('Device: ', device)
        dir_count += 1
        iterations = get_iterations(os.path.join(folder, 'status.txt'))
        # 1 or more successful iterations
        if iterations is not None and len(iterations) > 0:
            print ('Number of successful iterations:', len(iterations))
            raw_data = load_csv(os.path.join(folder, 'parallel.csv'), ['id', 'workload'])
            valid_data = process_iterations(raw_data, iterations)
            data = avg_iterations_by(valid_data, ['cluster', u'number_of_cores'])
            if data is not None:
                if workload not in DATA_SET:
                    DATA_SET[workload] = []
                DATA_SET[workload].append([device, data])
            else:
                print ('Warning!!! No data found. Skipping folder...')
        else:
            print ('Warning!!! No successful iterations found. Skipping folder...')
    else:
        print ('Warning!!! Not a valid wa data folder. Skipping folder...')
    print()
print()
print ('Processed {} directories'.format(dir_count))
print ('Obtained {} data sets'.format(sum([len(DATA_SET[key]) for key in DATA_SET])))

## CPU Parallelism - Per Cluster

input: `DATA_SET`

In [13]:
plist = []
for workload in DATA_SET:
    print (workload)
    for device, data in DATA_SET[workload]:
        pdata = data.reset_index()
        clusters = collections.Counter(pdata['cluster'].values).keys()
        cluster_count = len(clusters) - 1
        pdata = (pdata if (cluster_count > 1) else pdata[pdata['cluster'] == '0'])
        print (device)
        display(pdata)
        plist.append([workload, device, pdata])
    print

In [14]:
for workload, device, data in plist:
    ncols = pandas.value_counts(data['cluster'].values)[0]
    plt.figure()
    seaborn.set_palette(cluster_palette(arm_colour, ncols, grey_end=False, grey_start=True))
    ax = seaborn.barplot(data=data, x='cluster', y='%time', hue='number_of_cores')
    ax.set_ylim(0,100)
    arm_plot_formatting(title='CPU Parallelism - Per Cluster\nNumber of active cores', xlabel='Workload: '+workload, ylabel='Time with N cores active (%)', legtitle=device)
    arm_plot_save('CP', [workload, device])