## Log Extraction

### Field Extraction & Dataframe Conversion

In [None]:
from perf_logger import PerfLogger
import perf_log_packet_pb2
import pandas as pd
from google.protobuf.json_format import MessageToDict

pd.options.plotting.backend = "plotly"
# pd.options.plotting.backend = "matplotlib"


def packet_parse(pkt, fields_dict, msg_dict):
    out_dict = dict()
    queue = [(pkt, fields_dict, "")]
    while len(queue) > 0:
        cur_obj = queue[0][0]
        cur_fields_dict = queue[0][1]
        cur_name = queue[0][2]
        queue.pop(0)
        if len(cur_name) > 0:
            cur_name += "."

        # deal with fields with repeated attributes
        for subf, next_dict in cur_fields_dict.items():
            if subf == "top":
                for cur_top in cur_obj.top:
                    name = cur_name + subf + "-"
                    if cur_top.HasField("socket_id"):
                        name += "S{}".format(getattr(cur_top, "socket_id"))
                    if cur_top.HasField("core_id"):
                        name += "C{}".format(getattr(cur_top, "core_id"))
                    if cur_top.HasField("thread_id"):
                        name += "T{}".format(getattr(cur_top, "thread_id"))

                    if "msg" in next_dict and cur_top.HasField("msg"):
                        msg = cur_top.msg
                        msg_id = msg.id
                        msg_content_dict = dict()
                        msg_content_dict["timestamp"] = out_dict["sim_timestamp"]
                        msg_content_dict["msg"] = msg.msg
                        msg_content_dict["int_regs"] = []
                        msg_content_dict["fp_regs"] = []
                        msg_content_dict["mem_vals"] = []
                        for int_reg in msg.int_regs:
                            msg_content_dict["int_regs"].append(int_reg)
                        for fp_reg in msg.fp_regs:
                            msg_content_dict["fp_regs"].append(fp_reg)
                        for val in msg.mem_vals:
                            msg_content_dict["mem_vals"].append(val)
                        if name not in msg_dict:
                            msg_dict[name] = dict()
                        if msg_id not in msg_dict[name]:
                            msg_dict[name][msg_id] = []
                        msg_dict[name][msg_id].append(msg_content_dict)

                    if next_dict is not None:
                        queue.append((cur_top, next_dict, name))
                    else:
                        out_dict[name] = cur_top
            elif subf == "updown":
                # TODO: multiprocessing?
                for cur_updown in cur_obj.updown:
                    name = cur_name + subf + "-"
                    if cur_updown.HasField("updown_id"):
                        name += "U{}".format(getattr(cur_updown, "updown_id"))
                    if cur_updown.HasField("lane_id"):
                        name += "L{}".format(getattr(cur_updown, "lane_id"))
                    # Disabled for now
                    # if cur_updown.HasField('thread_id'):
                    #     name += 'T{}'.format(getattr(cur_updown, 'thread_id'))
                    # if cur_updown.HasField('event_base'):
                    #     name += 'EB{}'.format(getattr(cur_updown, 'event_base'))
                    # if cur_updown.HasField('event_label'):
                    #     name += 'EL{}'.format(getattr(cur_updown, 'event_label'))

                    if "msg" in next_dict and cur_updown.HasField("msg"):
                        msg = cur_updown.msg
                        msg_id = msg.id
                        msg_content_dict = dict()
                        msg_content_dict["timestamp"] = out_dict["sim_timestamp"]
                        msg_content_dict["msg"] = msg.msg
                        msg_content_dict["int_regs"] = []
                        msg_content_dict["mem_vals"] = []
                        for int_reg in msg.int_regs:
                            msg_content_dict["int_regs"].append(int_reg)
                        for val in msg.mem_vals:
                            msg_content_dict["mem_vals"].append(val)
                        if name not in msg_dict:
                            msg_dict[name] = dict()
                        if msg_id not in msg_dict[name]:
                            msg_dict[name][msg_id] = []
                        msg_dict[name][msg_id].append(msg_content_dict)

                    if next_dict is not None:
                        queue.append((cur_updown, next_dict, name))
                    else:
                        out_dict[name] = cur_updown
            elif subf == "dram_per_ctrlr_stats":
                for cur_ctrlr in cur_obj.dram_per_ctrlr_stats:
                    name = cur_name + subf + "-"
                    if cur_ctrlr.HasField("ctrlr_id"):
                        name += "C{}".format(getattr(cur_ctrlr, "ctrlr_id"))
                    if next_dict is not None:
                        queue.append((cur_ctrlr, next_dict, name))
                    else:
                        out_dict[name] = cur_ctrlr
            elif subf == "msg" and next_dict is None:
                continue
            elif cur_obj.HasField(subf):
                obj = getattr(cur_obj, subf)
                name = cur_name + subf

                if subf == "system" and "msg" in next_dict and obj.HasField("msg"):
                    msg = obj.msg
                    msg_id = msg.id
                    msg_content_dict = dict()
                    msg_content_dict["timestamp"] = out_dict["sim_timestamp"]
                    msg_content_dict["msg"] = msg.msg
                    if name not in msg_dict:
                        msg_dict[name] = dict()
                    if msg_id not in msg_dict[name]:
                        msg_dict[name][msg_id] = []
                    msg_dict[name][msg_id].append(msg_content_dict)

                if next_dict is not None:
                    queue.append((obj, next_dict, name))
                else:
                    out_dict[name] = obj
    return out_dict


def extract_fields(logger, fields):
    # Header parsing
    hdr = logger.read(perf_log_packet_pb2.PerfLogHeader())
    hdr_dict = MessageToDict(hdr, preserving_proto_field_name=True)  # convert message to python dictionary
    print(hdr_dict)

    if 'sim_timestamp' not in fields:
        fields = ['sim_timestamp'] + fields

    df = pd.DataFrame(columns=['sim_timestamp'])
    msg_dict = dict()

    split_fields = [field.split('.') for field in fields]

    # Convert list of fields to parse to nested dicts
    fields_dict = dict()
    for field in split_fields:
        cur_level = fields_dict
        for i, subf in enumerate(field):
            if i == len(field) - 1:
                cur_level[subf] = None
            else:
                if subf not in cur_level:
                    cur_level[subf] = dict()
                cur_level = cur_level[subf]

    # Packet parsing
    pkt_cnt = 0
    while True:
        pkt = logger.read(perf_log_packet_pb2.PerfLogPacket())
        if pkt is None:
            break
        pkt_cnt += 1
        row_dict = packet_parse(pkt, fields_dict, msg_dict)
        if pkt_cnt % 1000 == 0:
            print('Processed packet # {}, timestamp {}'.format(pkt_cnt, row_dict['sim_timestamp']))

        if len(row_dict) > 1:
            df = pd.concat([df, pd.DataFrame(row_dict, index=[0])])

    return df.set_index('sim_timestamp'), msg_dict

### Logger Instantiation

In [None]:
logger = PerfLogger('gem5_perf_log.bin', write=False)

### Gem5 Log Extraction

In [None]:
df, msg_dict = extract_fields(logger, [
                                       'top.msg',

                                       'top.core_stats.cycles',
                                       'top.core_stats.num_insts',
                                       'top.core_stats.num_insts_issued',
                                       'top.core_stats.num_insts_committed',

                                       'top.l1_cache_stats.dcache_total_accesses',
                                       'top.l1_cache_stats.icache_total_accesses',
                                       'top.l2_cache_stats.total_accesses',
                                       'system.l3_cache_stats.total_accesses',

                                       'top.l1_cache_stats.dcache_total_misses',
                                       'top.l1_cache_stats.icache_total_misses',
                                       'top.l2_cache_stats.total_misses',
                                       'system.l3_cache_stats.total_misses',

                                       'system.dram_per_ctrlr_stats.bytes_read',
                                       'system.dram_per_ctrlr_stats.bytes_write',
                                    #    'system.dram_per_ctrlr_stats.bw_read',
                                    #    'system.dram_per_ctrlr_stats.bw_write',

                                       'updown.cycle_stats.busy_cycles',
                                       'updown.cycle_stats.exec_cycles', 
                                       'updown.cycle_stats.idle_cycles',
                                       'updown.cycle_stats.stall_cycles',

                                       'updown.msg',

                                       'updown.action_stats.total_actions',
                                       'updown.action_stats.yield_actions',

                                       'updown.trans_stats.total_trans',

                                       'updown.queue_stats.event_q_len',
                                       'updown.queue_stats.operand_q_len',

                                       'updown.local_mem_stats.read_bytes',
                                       'updown.local_mem_stats.write_bytes',
                                       'updown.mem_intf_stats.read_bytes',
                                       'updown.mem_intf_stats.write_bytes',
                                      ])
logger.reset()
# print(msg_dict)
df

## Plotting

In [None]:
fig = df.plot.scatter(title='TOP Instruction Stats',
                      x=df.index, 
                      y=[
                         'top-S0C0.core_stats.num_insts',
                         'top-S0C0.core_stats.num_insts_issued',
                         'top-S0C0.core_stats.num_insts_committed',
                         ])
fig.show()

In [None]:
fig = df.plot.scatter(title='TOP Cache Stats',
                      x=df.index, 
                      y=[
                         'top-S0C0.l1_cache_stats.dcache_total_accesses',
                         'top-S0C0.l1_cache_stats.dcache_total_misses',
                         'top-S0C0.l1_cache_stats.icache_total_accesses',
                         'top-S0C0.l1_cache_stats.icache_total_misses',
                         'top-S0C0.l2_cache_stats.total_accesses',
                         'top-S0C0.l2_cache_stats.total_misses',
                         'system.l3_cache_stats.total_accesses',
                         'system.l3_cache_stats.total_misses',
                         ])
fig.show()

In [None]:
fig = df.plot.scatter(title='DRAM Stats',
                      x=df.index, 
                      y=[
                         'system.dram_per_ctrlr_stats-C0.bytes_read',
                         'system.dram_per_ctrlr_stats-C0.bytes_read',
                         'system.dram_per_ctrlr_stats-C1.bytes_write',
                         'system.dram_per_ctrlr_stats-C1.bytes_write',
                         'system.dram_per_ctrlr_stats-C2.bytes_read',
                         'system.dram_per_ctrlr_stats-C2.bytes_write',
                         'system.dram_per_ctrlr_stats-C3.bytes_read',
                         'system.dram_per_ctrlr_stats-C3.bytes_write',
                         'system.dram_per_ctrlr_stats-C4.bytes_read',
                         'system.dram_per_ctrlr_stats-C4.bytes_write',
                         'system.dram_per_ctrlr_stats-C5.bytes_read',
                         'system.dram_per_ctrlr_stats-C5.bytes_write',
                         'system.dram_per_ctrlr_stats-C6.bytes_read',
                         'system.dram_per_ctrlr_stats-C6.bytes_write',
                         'system.dram_per_ctrlr_stats-C7.bytes_read',
                         'system.dram_per_ctrlr_stats-C7.bytes_write',
                         'updown-U0L0.mem_intf_stats.read_bytes',
                         'updown-U0L0.mem_intf_stats.write_bytes',
                         'updown-U0L1.mem_intf_stats.read_bytes',
                         'updown-U0L1.mem_intf_stats.write_bytes',
                         'updown-U0L2.mem_intf_stats.read_bytes',
                         'updown-U0L2.mem_intf_stats.write_bytes',
                         'updown-U0L3.mem_intf_stats.read_bytes',
                         'updown-U0L3.mem_intf_stats.write_bytes',
                         ])
fig.show()

In [None]:

fig = df.plot.scatter(title='UpDown Lane Cycles',
                      x=df.index,
                      y=[
                         'updown-U0L0.cycle_stats.cycles',
                         'updown-U0L0.cycle_stats.exec_cycles',
                         'updown-U0L1.cycle_stats.cycles',
                         'updown-U0L1.cycle_stats.exec_cycles',
                         'updown-U0L2.cycle_stats.cycles',
                         'updown-U0L2.cycle_stats.exec_cycles',
                         'updown-U0L3.cycle_stats.cycles',
                         'updown-U0L3.cycle_stats.exec_cycles',
                         ])
fig.show()

In [None]:
fig = df.plot.scatter(title='UpDown Lane Local Memory Stats',
                      x=df.index,
                      y=[
                         'updown-U0L0.local_mem_stats.read_bytes',
                         'updown-U0L0.local_mem_stats.write_bytes',
                         'updown-U0L1.local_mem_stats.read_bytes',
                         'updown-U0L1.local_mem_stats.write_bytes',
                         'updown-U0L2.local_mem_stats.read_bytes',
                         'updown-U0L2.local_mem_stats.write_bytes',
                         'updown-U0L3.local_mem_stats.read_bytes',
                         'updown-U0L3.local_mem_stats.write_bytes',
                         ])
fig.show()

In [None]:
fig = df.plot.scatter(title='UpDown Lane Event Queue Stats',
                      x=df.index,
                      y=[
                         'updown-U0L0.queue_stats.event_q_len',
                         'updown-U0L1.queue_stats.event_q_len',
                         'updown-U0L2.queue_stats.event_q_len',
                         'updown-U0L3.queue_stats.event_q_len',
                         ])
fig.show()

In [None]:
fig = df.plot.scatter(title='UpDown Lane Operand Queue Stats',
                      x=df.index,
                      y=[
                         'updown-U0L0.queue_stats.operand_q_len',
                         'updown-U0L1.queue_stats.operand_q_len',
                         'updown-U0L2.queue_stats.operand_q_len',
                         'updown-U0L3.queue_stats.operand_q_len',
                         ])
fig.show()