In [1]:
#!/usr/bin/python3

import os
import matplotlib.pyplot as plt
import matplotlib.dates as matdates
from matplotlib2tikz import save as tikz_save
import numpy as np
import pandas as pd
import locale
import statistics
from scipy.stats.mstats import gmean

In [2]:
comerge_run_list = [log for log in os.listdir('.') if 'run-comerge' in log and log.endswith('.log')]
print(comerge_run_list)

['run-comerge-2019_07_20__11_51_50-run-1.log', 'run-comerge-2019_07_20__11_51_50-run-2.log', 'run-comerge-2019_07_20__11_51_50-run-3.log', 'run-comerge-2019_07_20__11_51_50-run-4.log', 'run-comerge-2019_07_20__11_51_50-run-5.log']


In [3]:
results_dirs = []
for log in comerge_run_list:
    with open(log, 'rt') as fd:
        line = fd.readline().strip().split(' ')
        results_dirs.append(line[1])
print(results_dirs)

['./comerge-pb-results-2019_07_20__11_51_50', './comerge-pb-results-2019_07_20__14_31_07', './comerge-pb-results-2019_07_20__17_10_53', './comerge-pb-results-2019_07_20__19_51_06', './comerge-pb-results-2019_07_20__22_33_51']


In [4]:
data = { x[2:] : os.listdir(x) for x in results_dirs}
print(data)

{'comerge-pb-results-2019_07_20__11_51_50': ['pb-dram0-intelsdp1044-2019_07_20__11_51_50.log', 'pb-dram1-intelsdp1044-2019_07_20__11_51_50.log', 'pb-make-intelsdp1044-2019_07_20__11_51_50.log', 'pb-pmem1-intelsdp1044-2019_07_20__11_51_50.log', 'pb-pmem7-intelsdp1044-2019_07_20__11_51_50.log'], 'comerge-pb-results-2019_07_20__14_31_07': ['pb-dram0-intelsdp1044-2019_07_20__14_31_07.log', 'pb-dram1-intelsdp1044-2019_07_20__14_31_07.log', 'pb-make-intelsdp1044-2019_07_20__14_31_07.log', 'pb-pmem1-intelsdp1044-2019_07_20__14_31_07.log', 'pb-pmem7-intelsdp1044-2019_07_20__14_31_07.log'], 'comerge-pb-results-2019_07_20__17_10_53': ['pb-dram0-intelsdp1044-2019_07_20__17_10_53.log', 'pb-dram1-intelsdp1044-2019_07_20__17_10_53.log', 'pb-make-intelsdp1044-2019_07_20__17_10_53.log', 'pb-pmem1-intelsdp1044-2019_07_20__17_10_53.log', 'pb-pmem7-intelsdp1044-2019_07_20__17_10_53.log'], 'comerge-pb-results-2019_07_20__19_51_06': ['pb-dram0-intelsdp1044-2019_07_20__19_51_06.log', 'pb-dram1-intelsdp1044-

In [5]:
def get_data_key(logname):
    """Given a log file name, figure out what kind of memory was being used and return the correct key"""
    if 'dram0' in logname: return 'dram0'
    if 'dram1' in logname: return 'dram1'
    if 'pmem1' in logname: return 'pmem1'
    if 'pmem7' in logname: return 'pmem7'
    if 'make' in logname: return 'make'
    raise ValueException

In [6]:
class PerfDataTypes:
    """
             11,266.70 msec task-clock                #    1.000 CPUs utilized
                     4      context-switches          #    0.355 M/sec
                     0      cpu-migrations            #    0.000 K/sec
                22,030      page-faults               # 1955.441 M/sec
        41,292,991,932      cycles                    # 3665275.336 GHz                   (30.75%)
        57,539,842,386      instructions              #    1.39  insn per cycle           (38.44%)
         6,774,876,761      branches                  # 601356005.770 M/sec               (38.45%)
             4,832,518      branch-misses             #    0.07% of all branches          (38.46%)
        13,526,507,060      L1-dcache-loads           # 1200648594.000 M/sec              (38.47%)
         9,012,615,601      L1-dcache-load-misses     #   66.63% of all L1-dcache hits    (38.47%)
           849,715,461      LLC-loads                 # 75422994.941 M/sec                (30.78%)
             4,911,229      LLC-load-misses           #    0.58% of all LL-cache hits     (30.78%)
       <not supported>      L1-icache-loads
             3,776,205      L1-icache-load-misses                                         (30.78%)
        13,547,867,184      dTLB-loads                # 1202544575.182 M/sec              (30.78%)
         2,066,428,912      dTLB-load-misses          #   15.25% of all dTLB cache hits   (30.77%)
                   338      iTLB-loads                #   30.002 M/sec                    (30.76%)
                 5,183      iTLB-load-misses          # 1533.43% of all iTLB cache hits   (30.75%)
       <not supported>      L1-dcache-prefetches
       <not supported>      L1-dcache-prefetch-misses
    """
    
    @staticmethod 
    def convert_float(value):
        """Convert a string float value"""
        locale.setlocale(locale.LC_NUMERIC, '') #default locale
        return locale.atof(value)
    
    @staticmethod
    def convert_int(value):
        """Convert a string int value"""
        locale.setlocale(locale.LC_NUMERIC, '') # default
        return locale.atoi(value)

    
    @staticmethod
    def task_clock(line):
        """             11,266.70 msec task-clock                #    1.000 CPUs utilized"""
        if line is None: line = "             11,266.70 msec task-clock                #    1.000 CPUs utilized" # Testing
        fields = line.strip().split('#')
        field1 = fields[0].split() # [11,266.70, 'msec', 'task-clock']
        field2 = fields[1].split() # [1.000, 'CPUs' 'utilized']
        return {field1[-1] : (PerfDataTypes.convert_float(field1[0]), 
                              field1[1], 
                              PerfDataTypes.convert_float(field2[0]),
                              " ".join(field2[1:]))}

    @staticmethod
    def context_switches(line):
        """                     4      context-switches          #    0.355 M/sec"""
        if line is None: line = "                     4      context-switches          #    0.355 M/sec"
        fields = line.strip().split('#')
        field1 = fields[0].split() # [4, 'context-switches']
        field2 = fields[1].split() # ['0.355', 'M/sec']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              PerfDataTypes.convert_float(field2[0]),
                              field2[1])}

    @staticmethod
    def cpu_migrations(line):
        """0      cpu-migrations            #    0.000 K/sec"""
        if line is None: line = "0      cpu-migrations            #    0.000 K/sec"
        fields = line.strip().split('#')
        field1 = fields[0].split() # ['0', 'cpu-migrations']
        field2 = fields[1].split() # ['0.000', 'K/sec']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              PerfDataTypes.convert_float(field2[0]),
                              field2[1])}

    @staticmethod
    def page_faults(line):
        """                22,030      page-faults               # 1955.441 M/sec    """
        if line is None:  line = "                22,030      page-faults               # 1955.441 M/sec    "
        fields = line.strip().split('#')
        field1 = fields[0].split() # ['22,030', 'page-faults']
        field2 = fields[1].split() # ['1955.441', 'M/sec']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              PerfDataTypes.convert_float(field2[0]),
                              field2[1])}

    @staticmethod
    def cycles(line):
        """        41,292,991,932      cycles                    # 3665275.336 GHz                   (30.75%)"""
        if line is None: line = "        41,292,991,932      cycles                    # 3665275.336 GHz                   (30.75%)"
        fields = line.strip().split('#')
        field1 = fields[0].split() # [41,292,991,932, 'cycles']
        field2 = fields[1].split() # ['3.665275', 'GHz', '(30.75%)']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              PerfDataTypes.convert_float(field2[0]),
                              field2[1],
                              PerfDataTypes.convert_float(field2[-1][field2[-1].index('(')+1:field2[-1].index('%')])) }

    @staticmethod
    def instructions(line):
        """        57,539,842,386      instructions              #    1.39  insn per cycle           (38.44%)"""
        if line is None: line = "        57,539,842,386      instructions              #    1.39  insn per cycle           (38.44%)"
        fields = line.strip().split('#')
        field1 = fields[0].split() # [57,539,842,386, 'instructions']
        field2 = fields[1].split() # [1.39, 'insn', 'per', 'cycle', '(38.44%)']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              PerfDataTypes.convert_float(field2[0]),
                              " ".join(field2[1:3]),
                              PerfDataTypes.convert_float(field2[-1][field2[-1].index('(')+1:field2[-1].index('%')])) }

    @staticmethod
    def branches(line):
        """         6,774,876,761      branches                  # 601356005.770 M/sec               (38.45%)"""
        if line is None: line = "         6,774,876,761      branches                  # 601356005.770 M/sec               (38.45%)"
        fields = line.strip().split('#')
        field1 = fields[0].split() # [6,774,876,761, 'branches']
        field2 = fields[1].split() # ['601356005.770', 'M/sec', '(38.45%)']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              PerfDataTypes.convert_float(field2[0]),
                              field2[1],
                              PerfDataTypes.convert_float(field2[-1][field2[-1].index('(')+1:field2[-1].index('%')])) }

    @staticmethod
    def branch_misses(line):
        """             4,832,518      branch-misses             #    0.07% of all branches          (38.46%)"""
        if line is None: line = "             4,832,518      branch-misses             #    0.07% of all branches          (38.46%)"
        fields = line.strip().split('#')
        field1 = fields[0].split() # [4,832,518, 'branch-misses']
        field2 = fields[1].split() # [0.07%, 'of', 'all', 'branches', '(38.46%)']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              " ".join(field2[1:4]),
                              PerfDataTypes.convert_float(field2[-1][field2[-1].index('(')+1:field2[-1].index('%')])) }

    @staticmethod
    def l1_dcache_loads(line):
        """        13,526,507,060      L1-dcache-loads           # 1200648594.000 M/sec              (38.47%)        """
        if line is None: line = "        13,526,507,060      L1-dcache-loads           # 1200648594.000 M/sec              (38.47%)        "
        fields = line.strip().split('#')
        field1 = fields[0].split() # [13,526,507,060, 'L1-dcache-loads']
        field2 = fields[1].split() # ['1200648594.000', 'M/sec', '(38.47%)']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              PerfDataTypes.convert_float(field2[0]),
                              field2[1],
                              PerfDataTypes.convert_float(field2[-1][field2[-1].index('(')+1:field2[-1].index('%')])) }

    @staticmethod
    def l1_dcache_load_misses(line):
        """         9,012,615,601      L1-dcache-load-misses     #   66.63% of all L1-dcache hits    (38.47%)        """
        if line is None: line =  "         9,012,615,601      L1-dcache-load-misses     #   66.63% of all L1-dcache hits    (38.47%)        "
        fields = line.strip().split('#')
        field1 = fields[0].split() # [9,012,615,601, 'L1-dcache-load-misses']
        field2 = fields[1].split() # [66.63%, 'of', 'all', 'L1-dcache', 'hits', (38.46%)']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              " ".join(field2[1:5]),
                              PerfDataTypes.convert_float(field2[-1][field2[-1].index('(')+1:field2[-1].index('%')])) }

    @staticmethod
    def llc_loads(line):
        """           849,715,461      LLC-loads                 # 75422994.941 M/sec                (30.78%)"""
        if line is None: line = "           849,715,461      LLC-loads                 # 75422994.941 M/sec                (30.78%)"
        fields = line.strip().split('#')
        field1 = fields[0].split() # [849,715,461, 'LC-loads']
        field2 = fields[1].split() # ['75422994.941', 'M/sec', '(30.78%)']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              PerfDataTypes.convert_float(field2[0]),
                              field2[1],
                              PerfDataTypes.convert_float(field2[-1][field2[-1].index('(')+1:field2[-1].index('%')])) }

    @staticmethod
    def llc_load_misses(line):
        """             4,911,229      LLC-load-misses           #    0.58% of all LL-cache hits     (30.78%)"""
        if line is None: line = "             4,911,229      LLC-load-misses           #    0.58% of all LL-cache hits     (30.78%)"
        fields = line.strip().split('#')
        field1 = fields[0].split() # [4,911,229, 'LLC-load-misses']
        field2 = fields[1].split() # [0.58%, 'of', 'all', 'LL-cache', 'hits', (30.78%)']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              " ".join(field2[1:5]),
                              PerfDataTypes.convert_float(field2[-1][field2[-1].index('(')+1:field2[-1].index('%')])) }

    @staticmethod
    def l1_icache_loads(line):
        """       <not supported>      L1-icache-loads"""
        if line is None: line = "       <not supported>      L1-icache-loads"
        if 'not supported' in line: return None
        print(line)
        raise NotImplementedError

    @staticmethod
    def l1_icache_load_misses(line):
        """             3,776,205      L1-icache-load-misses                                         (30.78%)"""
        if line is None: line = "             3,776,205      L1-icache-load-misses                                         (30.78%)"
        fields = line.strip().split() # ['3,776,205', 'L1-icache-load-misses', '(30.78%)']
        return {fields[1] : (PerfDataTypes.convert_int(fields[0]), 
                             PerfDataTypes.convert_float(fields[-1][fields[-1].index('(')+1:fields[-1].index('%')]))}

    @staticmethod
    def dtlb_loads(line):
        """        13,547,867,184      dTLB-loads                # 1202544575.182 M/sec              (30.78%)"""
        if line is None: line = "        13,547,867,184      dTLB-loads                # 1202544575.182 M/sec              (30.78%)"
        fields = line.strip().split('#')
        field1 = fields[0].split() # [13,547,867,184, 'dTLB-loads']
        field2 = fields[1].split() # ['601356005.770', 'M/sec', '(38.45%)']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              PerfDataTypes.convert_float(field2[0]),
                              field2[1],
                              PerfDataTypes.convert_float(field2[-1][field2[-1].index('(')+1:field2[-1].index('%')])) }

    @staticmethod
    def dtlb_load_misses(line):
        """         2,066,428,912      dTLB-load-misses          #   15.25% of all dTLB cache hits   (30.77%)"""
        if line is None: line = "         2,066,428,912      dTLB-load-misses          #   15.25% of all dTLB cache hits   (30.77%)"
        fields = line.strip().split('#')
        field1 = fields[0].split() # [2,066,428,912, 'dTLB-load-misses']
        field2 = fields[1].split() # ['15.25%', 'of', 'all', 'dTLB', 'cache', 'hits', '(30.77%)']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              PerfDataTypes.convert_float(field2[0][:field2[0].index('%')]),
                              " ".join(field2[1:6]),
                              PerfDataTypes.convert_float(field2[-1][field2[-1].index('(')+1:field2[-1].index('%')])) }

    @staticmethod
    def itlb_loads(line):
        """                   338      iTLB-loads                #   30.002 M/sec                    (30.76%)"""
        if line is None: line = "                   338      iTLB-loads                #   30.002 M/sec                    (30.76%)"
        fields = line.strip().split('#')
        field1 = fields[0].split() # [338, 'iTLB-loads']
        field2 = fields[1].split() # ['30.002', 'M/sec', '(30.76%)']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              PerfDataTypes.convert_float(field2[0]),
                              field2[1],
                              PerfDataTypes.convert_float(field2[-1][field2[-1].index('(')+1:field2[-1].index('%')])) }

    @staticmethod
    def itlb_load_misses(line):
        """                 5,183      iTLB-load-misses          # 1533.43% of all iTLB cache hits   (30.75%)"""
        if line is None: line = "                 5,183      iTLB-load-misses          # 1533.43% of all iTLB cache hits   (30.75%)"
        fields = line.strip().split('#')
        field1 = fields[0].split() # [5,183, 'iTLB-load-misses']
        field2 = fields[1].split() # ['1533.43%', 'of', 'all', 'iTLB', 'cache', 'hits', '(30.75%)']
        return {field1[-1] : (PerfDataTypes.convert_int(field1[0]),
                              PerfDataTypes.convert_float(field2[0][:field2[0].index('%')]),
                              " ".join(field2[1:6]),
                              PerfDataTypes.convert_float(field2[-1][field2[-1].index('(')+1:field2[-1].index('%')])) }

    @staticmethod
    def l1_dcache_prefetches(line):
        """       <not supported>      L1-dcache-prefetches"""
        if line is None: line = "       <not supported>      L1-dcache-prefetches"
        if 'not supported' in line: return None
        print(line)
        raise NotImplementedError

    @staticmethod
    def l1_dcache_prefetch_misses(line):
        """       <not supported>      L1-dcache-prefetch-misses"""
        if line is None: line = "       <not supported>      L1-dcache-prefetch-misses"
        if 'not supported' in line: return None
        print(line)
        raise NotImplementedError

    @staticmethod
    def test():
        results = []
        results.append(PerfDataTypes.branch_misses(None))
        results.append(PerfDataTypes.branches(None))
        results.append(PerfDataTypes.context_switches(None))
        results.append(PerfDataTypes.cpu_migrations(None))
        results.append(PerfDataTypes.cycles(None))
        results.append(PerfDataTypes.dtlb_load_misses(None))
        results.append(PerfDataTypes.dtlb_loads(None))
        results.append(PerfDataTypes.instructions(None))
        results.append(PerfDataTypes.itlb_load_misses(None))
        results.append(PerfDataTypes.itlb_loads(None))
        results.append(PerfDataTypes.l1_dcache_load_misses(None))
        results.append(PerfDataTypes.l1_dcache_loads(None))
        results.append(PerfDataTypes.l1_dcache_prefetch_misses(None))
        results.append(PerfDataTypes.l1_dcache_prefetches(None))
        results.append(PerfDataTypes.l1_icache_load_misses(None))
        results.append(PerfDataTypes.l1_icache_loads(None))
        results.append(PerfDataTypes.llc_load_misses(None))
        results.append(PerfDataTypes.llc_loads(None))
        results.append(PerfDataTypes.page_faults(None))
        results.append(PerfDataTypes.task_clock(None))
        return results

class PerfData:
    
    counters = (('task-clock', PerfDataTypes.task_clock),
            ('context-switches', PerfDataTypes.context_switches),
            ('cpu-migrations', PerfDataTypes.cpu_migrations),
            ('page-faults', PerfDataTypes.page_faults),
            ('cycles', PerfDataTypes.cycles),
            ('instructions', PerfDataTypes.instructions),
            ('branches', PerfDataTypes.branches),
            ('branch-misses', PerfDataTypes.branch_misses), 
            ('L1-dcache-loads', PerfDataTypes.l1_dcache_loads),
            ('L1-dcache-load-misses', PerfDataTypes.l1_dcache_load_misses),
            ('LLC-loads', PerfDataTypes.llc_loads),
            ('LLC-load-misses', PerfDataTypes.llc_load_misses),
            ('L1-icache-loads', PerfDataTypes.l1_icache_loads),
            ('L1-icache-load-misses', PerfDataTypes.l1_icache_load_misses),
            ('dTLB-loads', PerfDataTypes.dtlb_loads),
            ('dTLB-load-misses', PerfDataTypes.dtlb_load_misses),
            ('iTLB-loads', PerfDataTypes.itlb_loads),
            ('iTLB-load-misses', PerfDataTypes.itlb_load_misses),
            ('L1-dcache-prefetches', PerfDataTypes.l1_dcache_prefetches),
            ('L1-dcache-prefetch-misses', PerfDataTypes.l1_dcache_prefetch_misses))

    def __init__(self, lines):
        self.times = {}
        self.stats = {}
        index = 0
        while index < len(lines):
            if len(lines[index]) < 10: # short lines are usually empty
                index = index + 1
                continue
            if 'not supported' in lines[index]: # means that the CPU doesn't support whatever this was - skip
                index = index + 1
                continue
            if 'elapsed' in lines[index]: break
            for ctr, ctr_func in self.counters:
                hash_index = lines[index].find('#')
                if ctr in lines[index][0:hash_index]:
                    assert ctr not in self.stats, 'Duplicate perf type {}'.format(ctr)
                    self.stats[ctr] = ctr_func(lines[index])
                    break
            index = index + 1
        # at this point we should have the execution times
        while index < len(lines):
            if len(lines[index]) < 10:
                index = index + 1
                continue
            self.parse_time(lines[index])
            index = index + 1
        assert index == len(lines), 'Did not expect more data: {}'.format(lines[index])
        return
    
    def parse_time(self, line):
        data = line.split()
        t = data[0]
        tt = data[-1]
        assert tt not in self.times, 'Duplicate time {}'.format(tt)
        self.times[tt] = float(t)
        return None
    



In [38]:
class PerfLogData:
    """Store information about Polybench Data, with performance logs"""
    
    data_log_types = ('dram0', 'dram1', 'pmem1', 'pmem7')
    
    def __init__(self, data_dir='.', verbose=False):
        # look for log files to parse further
        self.verbose = verbose
        self.data_dir = data_dir
        self.data_frame = None
        if self.verbose: print('PerfLogData: verbose enabled')
        self.run_list = [log for log in os.listdir(data_dir) if 'run-comerge' in log and log.endswith('.log')]
        if self.verbose: print('PerfLogData (init): run_list {}'.format(self.run_list))
        self.results_dirs = []
        for log in self.run_list:
            with open(log, 'rt') as fd: self.results_dirs.append(fd.readline().strip().split()[1][2:])
        if self.verbose: print('PerfLogData (init): results_dirs {}'.format(self.results_dirs))
        self.run_data = {}
        for resdir in self.results_dirs:
            logs = [l for l in os.listdir('{}/{}'.format(data_dir, resdir))]
            timestamp = resdir.split('-')[-1]
            if timestamp not in self.run_data: self.run_data[timestamp] = {}
            for l in logs:
                datakey = self.get_data_key(l)
                assert datakey not in self.run_data[timestamp], 'Unexpected duplicate data'
                self.run_data[timestamp][datakey] = self.__load_log__('{}/{}/{}'.format(data_dir, resdir, l))
        pass
    

    def __load_log__(self, log_file_name):
        with open(log_file_name, 'rt') as fd:
            return [x.strip() for x in fd.readlines()]
        
    def parse_perf_data(self, perfdata):
        return PerfData(perfdata)

    def parse_single_test(self, lines):
        # first line of a block is the test we ran - extract the name
        index = 0
        line = lines[index].strip().split('/')
        assert line[-1:][0].endswith('_time') # if not, this script isn't going to work right
        testname = line[-1:][0][:-5]
        # next line is CPU binding
        index = index + 1
        assert 'binding' in lines[index] # if not, this script isn't going to work right
        binding = lines[index].strip().split(' ')[-1:][0].split(',')
        # next line is allocation information
        index = index + 1
        allocation = 0
        if 'VMMALLOC' in lines[index]: # this is a PMEM run
            # TODO: do we need to track what was done here? For now we skip
            index = index + 1
        while 'allocate' in lines[index]:
            line = lines[index].strip().split(' ')[-1]
            allocation = allocation + int(line)
            index = index + 1
        # between allocation and Performance, Polybench will report the run time
        pb_run_time = None
        while 'Performance' not in lines[index]:
            if len(lines[index]) > 3: 
                assert pb_run_time is None, 'Already have a runtime {}'.format(pb_run_time)
                pb_run_time = float(lines[index])
            index = index + 1
        # next thing we are looking for is Performance
        assert pb_run_time is not None, 'Did not find a runtime'
        performance_data = self.parse_perf_data(lines[index:])
        return (testname, binding, allocation, pb_run_time, performance_data)

        
    def parse_mem_log(self, log_type, logdata):
        assert log_type in self.data_log_types, 'Invalid log type {}, options are {}'.format(log_type, self.data_log_types)
        line = 0
        data = []
        while line < len(logdata):
            start = line
            end = start
            while 'finished' not in logdata[line]: line = line + 1
            end = line
            data.append(self.parse_single_test(logdata[start:end]))            
            line = line + 1
        return data
    
    def parse_dram0_log(self, log_data):
        return self.parse_mem_log('dram0', log_data)
    
    def parse_dram1_log(self, log_data):
        return self.parse_mem_log('dram1', log_data)
    
    def parse_pmem1_log(self, log_data):
        return self.parse_mem_log('pmem1', log_data)

    def parse_pmem7_log(self, log_data):
        return self.parse_mem_log('pmem7', log_data)
    
    def parse_make_log(self, log_data):
        return None
       
    @staticmethod
    def get_data_key(logname):
        """Given a log file name, figure out what kind of memory was being used and return the correct key"""
        if 'dram0' in logname: return 'dram0'
        if 'dram1' in logname: return 'dram1'
        if 'pmem1' in logname: return 'pmem1'
        if 'pmem7' in logname: return 'pmem7'
        if 'make' in logname: return 'make'
        raise ValueException
        
    def get_dataframe(self):
        if self.data_frame is None: self.data_frame = self.__build_dataframe__()
        return self.data_frame

    
    def __build_dataframe__(self):
        row_data = []
        for run in pld.run_data:
            for rdtype in pld.run_data[run]:
                parse_func = getattr(pld, 'parse_{}_log'.format(rdtype), None)
                if parse_func is None:
                    print('Unknown log type {}'.format(rdtype))
                    continue
                parsed_data = parse_func(pld.run_data[run][rdtype])
                if parsed_data is None: continue # skip the unparsed make logs       
                for td in parsed_data:
                    row = {}
                    row['run'] = run
                    row['type'] = rdtype
                    row['test'] = td[0]
                    row['polybench-time'] = td[3]
                    row['alloc'] = td[2]
                    for property in td[-1].stats:
                        property_data = td[-1].stats[property][property]
                        if 'task-clock' == property:
                            # task-clock {'task-clock': (11266.7, 'msec', 1.0, 'CPUs utilized')}
                            row['task-clock'] = property_data[0]
                            row['cpus-utilized'] = property_data[2]
                        elif 'context-switches' == property:
                            # context-switches {'context-switches': (4, 0.355, 'M/sec')}
                            row['context-switches'] = property_data[0]
                            row['context-switches-M-per-sec'] = property_data[1]
                        elif 'cpu-migrations' == property:
                            # cpu-migrations {'cpu-migrations': (0, 0.0, 'K/sec')}
                            row['cpu-migrations'] = property_data[0]
                            row['cpu-migrations-K-per-sec'] = property_data[1]
                        elif 'page-faults' == property:
                            # page-faults {'page-faults': (22030, 1955.441, 'M/sec')}
                            row['page-faults'] = property_data[0]
                            row['page-faults-M-per-sec'] = property_data[1]
                        elif 'cycles' == property:
                            # cycles {'cycles': (41292991932, 3665275.336, 'GHz', 30.75)}
                            row['cycles'] = property_data[0]
                            row['frequency (GHz)'] = property_data[1]
                            assert property_data[2] == 'GHz', 'CPU frequency is not in GHz'
                        elif 'instructions' == property:
                            # instructions {'instructions': (57539842386, 1.39, 'insn per', 38.44)
                            row['instructions'] = property_data[0]
                            row['instructions-per-cycle'] = property_data[1]
                        elif 'branches' == property:
                            # branches {'branches': (6774876761, 601356005.77, 'M/sec', 38.45)}
                            row['branches'] = property_data[0]
                            row['branches-M-per-sec'] = property_data[1]
                        elif 'branch-misses' == property:
                            # branch-misses {'branch-misses': (4832518, 'of all branches', 38.46)}
                            row['branch-misses'] = property_data[0]
                        elif 'L1-dcache-loads' == property:
                            # L1-dcache-loads {'L1-dcache-loads': (13526507060, 1200648594.0, 'M/sec', 38.47)}
                            row['L1-dcache-loads'] = property_data[0]
                            row['L1-dcache-loads-M-per-sec'] = property_data[1]
                        elif 'L1-dcache-load-misses' == property:
                            # L1-dcache-load-misses {'L1-dcache-load-misses': (9012615601, 'of all L1-dcache hits', 38.47)}
                            row[property] = property_data[0]
                        elif 'LLC-loads' == property:
                            # LLC-loads {'LLC-loads': (849715461, 75422994.941, 'M/sec', 30.78)}
                            row[property] = property_data[0]
                            row['LLC-loads-M-per-sec'] = property_data[1]
                        elif 'LLC-load-misses' == property:
                            # LLC-load-misses {'LLC-load-misses': (4911229, 'of all LL-cache hits', 30.78)}
                            row[property] = property_data[0]
                        elif 'L1-icache-load-misses' == property:
                            # L1-icache-load-misses {'L1-icache-load-misses': (3776205, 30.78)}
                            row[property] = property_data[0]
                        elif 'dTLB-loads' == property:
                            # dTLB-loads {'dTLB-loads': (13547867184, 1202544575.182, 'M/sec', 30.78)}
                            row[property] = property_data[0]
                            row['dTLB-loads-M-per-sec'] = property_data[1]
                        elif 'dTLB-load-misses' == property:
                            # dTLB-load-misses {'dTLB-load-misses': (2066428912, 15.25, 'of all dTLB cache hits', 30.77)}
                            row[property] = property_data[0]
                            row['dTLB-load-misses-of-all-dTLB-cache-hits'] = property_data[1]
                        elif 'iTLB-loads' == property:
                            # iTLB-loads {'iTLB-loads': (338, 30.002, 'M/sec', 30.76)}
                            row[property] = property_data[0]
                            row['iTLB-loads-M-per-sec'] = property_data[1]
                        elif 'iTLB-load-misses' == property:
                            # iTLB-load-misses {'iTLB-load-misses': (5183, 1533.43, 'of all iTLB cache hits', 30.75)}
                            row[property] = property_data[0]
                            row['iTLB-load-misses-of-all-iTLB-cache-hits'] = property_data[1]
                        else:
                            assert False, 'Unknown property type {}'.format(property)
                    row_data.append(row)
        labels = [label for label in row_data[0]]
        # validate my labels
        for row in row_data:
            test_labels = [label for label in row]
            assert test_labels == labels, 'mismatched labels {} - {}'.format(labels, test_labels)
        # build a flat row
        flat_data = []
        for row in row_data:
            flat_data.append([row[label] for label in labels])
        return pd.DataFrame(flat_data, columns = labels)
    
    def __build_average_dataframe__(self):
        df = self.get_dataframe()
        labels = []
        # we drop the timestamp
        for column in df.columns[1:3]: # type and test
            labels.append(column)
        for column in df.columns[3:]: # numeric values
            if 'alloc' == column:
                labels.append('alloc')
                continue
            labels.append('{} arithmetic mean'.format(column))
            labels.append('{} harmonic mean'.format(column))
            labels.append('{} geometric mean'.format(column))
            labels.append('{} standard deviation'.format(column))
        average_data = []
        for type in df.type.unique():
            for test in df.test.unique():
                row = []
                row.append(type)
                row.append(test)
                seldata = select_data(df, type, test)
                for column in df.columns[3:]:
                    if 'alloc' == column:
                        row.append(statistics.mean(seldata[column]))
                        continue
                    test_mean = statistics.mean(seldata[column])
                    if test_mean > 0.0:
                        test_hmean = statistics.harmonic_mean(seldata[column])
                        test_gmean = gmean(seldata[column])
                        test_stdev = statistics.stdev(seldata[column], test_mean)
                    else:
                        test_hmean = 0.0
                        test_gmean = 0.0
                        test_stdev = 0.0
                    row.append(test_mean)
                    row.append(test_hmean)
                    row.append(test_gmean)
                    row.append(test_stdev)
                average_data.append(row)
        return pd.DataFrame(average_data, columns = labels)
    
    def get_average_dataframe(self):
        if getattr(self, 'average_df', None) is None: 
            self.average_df = self.__build_average_dataframe__()
        return self.average_df


        


Now I need to convert these to useful dataframes

In [108]:
pld = PerfLogData()
adf = pld.get_average_dataframe()
df = pld.get_dataframe()
for col in adf.columns: print(col)
    

type
test
polybench-time arithmetic mean
polybench-time harmonic mean
polybench-time geometric mean
polybench-time standard deviation
alloc
task-clock arithmetic mean
task-clock harmonic mean
task-clock geometric mean
task-clock standard deviation
cpus-utilized arithmetic mean
cpus-utilized harmonic mean
cpus-utilized geometric mean
cpus-utilized standard deviation
context-switches arithmetic mean
context-switches harmonic mean
context-switches geometric mean
context-switches standard deviation
context-switches-M-per-sec arithmetic mean
context-switches-M-per-sec harmonic mean
context-switches-M-per-sec geometric mean
context-switches-M-per-sec standard deviation
cpu-migrations arithmetic mean
cpu-migrations harmonic mean
cpu-migrations geometric mean
cpu-migrations standard deviation
cpu-migrations-K-per-sec arithmetic mean
cpu-migrations-K-per-sec harmonic mean
cpu-migrations-K-per-sec geometric mean
cpu-migrations-K-per-sec standard deviation
page-faults arithmetic mean
page-faults 

In [59]:
def select_data(df, Dataset='EXTRALARGE_DATASET', Type='dram', Test='2mm'):
    return df.loc[(df['Dataset'] == Dataset) & (df['Type'] == Type) & (df['Test'] == Test)]

def select_data(df, type, test):
    return df.loc[(df['type'] == type) & (df['test'] == test)]

#print(select_data(df, 'dram0', '2mm'))
print(df.test.unique(), len(df.test.unique()))
print(df.type.unique(), len(df.type.unique()))
     
average_df = get_average_dataframe(df)


['2mm' '3mm' 'atax' 'bicg' 'cholesky' 'doitgen' 'gemm' 'gemver' 'gesummv'
 'mvt' 'symm' 'syr2k' 'syrk' 'trisolv' 'trmm' 'durbin' 'dynprog'
 'gramschmidt' 'lu' 'ludcmp' 'correlation' 'covariance' 'floyd-warshall'
 'reg_detect' 'adi' 'fdtd-2d' 'fdtd-apml' 'jacobi-1d-imper'
 'jacobi-2d-imper' 'seidel-2d'] 30
['dram0' 'dram1' 'pmem1' 'pmem7'] 4


In [109]:
print('Test             Memory   Task Clock  PB Time       Instructions  LLC Load Misses    IPC  DTLB-LM  Page Faults')
for test in df.test.unique():
    for dftype in df.type.unique():
        seldata = select_data(average_df, dftype, test)
        print('{:16} {:8} {:10.2f} {:8.2f} {:18.2f} {:16.2f} {:6.2f} {:8.2f} {:8.2f}'.format(test, dftype,
                                    seldata['task-clock arithmetic mean'].values[0],
                                    seldata['polybench-time arithmetic mean'].values[0],
                                    seldata['instructions arithmetic mean'].values[0],
                                    seldata['LLC-load-misses arithmetic mean'].values[0],
                                    seldata['instructions-per-cycle arithmetic mean'].values[0],
                                    seldata['dTLB-load-misses-of-all-dTLB-cache-hits arithmetic mean'].values[0],
                                    seldata['page-faults-M-per-sec arithmetic mean'].values[0]
                                                                                    ))



Test             Memory   Task Clock  PB Time       Instructions  LLC Load Misses    IPC  DTLB-LM  Page Faults
2mm              dram0      11667.14    11.63     57545426065.20       6334574.60   1.35    16.07  1889.26
2mm              dram1      11486.71    11.45     57524149814.80       6475998.20   1.36    15.54  1918.87
2mm              pmem1      23542.77    21.01     59597380843.80         69420.60   0.69    16.62   947.60
2mm              pmem7      17329.73    15.94     59563873474.60        136920.40   0.94    15.59  1287.49
3mm              dram0      17731.50    17.69     81207578951.60      11048485.80   1.25    16.42  1739.62
3mm              dram1      17340.93    17.30     81214361307.60       8911634.60   1.27    15.99  1777.81
3mm              pmem1      33748.41    31.21     83322856720.80         95886.80   0.67    15.66   921.53
3mm              pmem7      24158.93    22.77     83270758268.80        246186.00   0.94    16.07  1287.32
atax             dram0       1221

reg_detect       dram1      30503.58    30.50    151790930805.80      36812270.40   1.36     0.00  1626.13
reg_detect       pmem1     333314.89   330.90    155098341454.60       1631421.20   0.13     0.00   149.65
reg_detect       pmem7      93172.21    91.82    154086824825.80       2054305.80   0.45     0.00   535.37
adi              dram0      46650.87    45.91    123685246969.60     155641516.20   0.72     0.07  8040.16
adi              dram1      46904.53    46.16    123730369324.80     153085134.40   0.72     0.07  7996.71
adi              pmem1     259819.49   254.77    127224344489.80       4141685.80   0.14     0.07  1444.61
adi              pmem7     136203.29   133.80    126741156664.80       5334855.60   0.25     0.07  2755.78
fdtd-2d          dram0      33575.67    32.83    110796133537.40     356753098.60   0.90     0.02 11171.20
fdtd-2d          dram1      33988.83    33.25    110789547552.20     344924138.80   0.89     0.02 11035.41
fdtd-2d          pmem1     278714.45 