# Dynamic Critical Section Size
- figure out what the average size of the critical section is, weighted by the frequency of the critical section
- this gives us an "upper bound" on the potential performance speedup of instruction fusion if we limit ourselves to only fuse within critical sections

In [88]:
import os

files = {}

path = '../cleaned data'

csv_files = [i for i in os.listdir(path) if i.endswith('.csv')]
file_names = [i.split(".")[0] for i in csv_files] # the files are in the same order

print(f"csv_files={csv_files}")
print(f"file_names={file_names}")

csv_files=['hmmer.csv', 'omnetpp.csv', 'mcf.csv', 'xalancbmk.csv', 'gobmk.csv', 'libquantum.csv', 'sjeng.csv', 'astar.csv', 'bzip2.csv', 'h264ref.csv']
file_names=['hmmer', 'omnetpp', 'mcf', 'xalancbmk', 'gobmk', 'libquantum', 'sjeng', 'astar', 'bzip2', 'h264ref']


In [89]:
# read the csv and import into dataframe objects
import pandas as pd

for file in csv_files:
    file_name = file.split(".")[0]
    df = pd.read_csv(os.path.join(path, file))
    files[file_name] = {
        'data': df
    }

In [93]:
# define the branch instructions
branch_instructions = [
    'beq',
    'bne',
    'blt',
    'bge',
    'bltu',
    'bgeu',
    'cjal',
    'cjalr',
    'j',
    'jalr',
    'beqz',
    'bnez',
    'blez',
    'bgez',
    'bltz',
    'bgtz',
    'c.bnez',
    'c.beqz',
    'jr',
    'ret'
]

exception_instructions = [
    'cld',
    'csw',
    'clh',
    'csc',
    'csd',
    'csh',
    'clhu',
    'csb'
]

In [96]:
# count the number of instructions between 2 successive branch instructions
instructions = []
for file_name, file in files.items():
    print(f"file_name={file_name}")
    df = file['data']
    branches = df[df['instruction'].isin(branch_instructions)]
    addresses = branches['addr'].apply(lambda x: int(x, 16)).tolist()
    print(addresses)
    total_instructions = 0
    critical_section_num = 0

    start_address = int(df.iloc[0]['addr'], 16)
    count = df.iloc[0]['count']
    new_critical_section = True
    for index, row in df.iterrows():
        if new_critical_section:
            start_address = int(row['addr'], 16)
            count = row['count']
            new_critical_section = False
        
        if row['instruction'] in branch_instructions or row['instruction'] in exception_instructions or abs(row['count'] - count) > 1:
            total_instructions += count*(int(row['addr'], 16) - start_address)
            critical_section_num += count
            new_critical_section = True

        if count - row['count'] > 1: # treat as new critical section - don't want to fuse across them
            prev_address = df.iloc[index-1]['addr']
            prev_instruction = df.iloc[index-1]['instruction']
            print(f"count of address {row['addr']} not the same as count of address {prev_address}: "
                f"{row['count']} != {count}, previous instruction is {prev_instruction}")
            if prev_instruction not in instructions:
                instructions.append(prev_instruction)

    file['avg_block_size'] = total_instructions/critical_section_num

print(instructions)

file_name=hmmer
[331052, 331100, 331116, 331120, 331124, 331128, 331140, 331144, 331148, 331156, 331168, 331180, 331184, 331190, 331194, 331222, 331232, 331240, 331258, 331278, 331282, 331284, 331304, 331354, 331358, 331366, 331374, 331378, 331400, 331406, 331408, 331414, 331422, 331426, 331432, 331440, 331444, 331450, 331462, 331474, 331492, 331510, 331518, 331526, 331534, 331556, 331574, 331580, 331586, 331608, 331634, 331660, 331670, 331698, 331748, 331780, 331788, 331802, 331808, 331816, 331820, 331828, 331840, 331852, 331860, 331874, 331928, 331952, 331976, 332068, 332076, 332084, 332090, 332120, 332132, 332136, 332176, 332228, 332238, 332244, 332274, 332276, 332280, 332308, 343510, 343534, 343540, 343574, 343628, 343814, 343846, 343880, 343914, 343950, 343982, 344014, 344050, 344082, 344114, 344148, 344202, 344232, 344262, 344292, 344322, 344352, 344382, 344424, 344466, 344508, 344538, 344568, 344598, 344612, 346020, 346310, 346346, 346398, 346428, 346432, 346448, 346452, 346496,

In [97]:
for file_name, file in files.items():
    print(f"{file_name}: {round(file['avg_block_size'], 3)}")

hmmer: 11.323
omnetpp: 6.639
mcf: 4.493
xalancbmk: 5.846
gobmk: 8.908
libquantum: 5.339
sjeng: 11.229
astar: 7.641
bzip2: 10.91
h264ref: 8.853
