In [30]:
import os
import re
import matplotlib.pyplot as plt

def extract_data_from_nohup_out(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    # Extract block access histogram data
    block_access_data = re.findall(r'(\d+\.\d+)%\s+of\s+ALL\s+BLOCKS\s+(\d+)\s+Accesses\s+BLOCKS\s+(\d+)', content)

    # Extract row utilization histogram data
    row_utilization_data = re.findall(r'(\d+\.\d+)%\s+of\s+ALL\s+ROWS\s+(\d+\.\d+)%\s+of\s+ROW\s+NUM_ROWS\s+(\d+)', content)

    return block_access_data, row_utilization_data

def generate_graphs(block_access_data, row_utilization_data, name, output_dir):

    # Shorten name if needed
    shorten_name = name[0:min(len(name), 30)]

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Extract x and y values for block access count graph
    if len(block_access_data) > 1 and len(row_utilization_data) > 1:
        block_access_counts, block_access_blocks = zip(*[(int(access), int(blocks)) for _, access, blocks in block_access_data if int(access) != 0])
        plt.figure()
        plt.bar(block_access_counts, block_access_blocks, width=0.5)  # Set width to 1 for each bar
        plt.xticks(range(1, max(max(block_access_counts), 5) + 1, 1))  # Set x-axis ticks to intervals of 1
        plt.xlabel('Number of Accesses')
        plt.ylabel('Number of Blocks')
        plt.title(f'{shorten_name}\nBlock Access Count')

        # Add text labels above each bar
        for i, blocks in enumerate(block_access_blocks):
            plt.text(block_access_counts[i], blocks, str(blocks), ha='center', va='bottom')

        plt.savefig(os.path.join(output_dir, f'{name}_block_access_count.png'))
        plt.close()

        # Extract x and y values for row utilization graph
        row_utilizations, row_counts = zip(*[(float(utilization), int(rows)) for _, utilization, rows in row_utilization_data if float(utilization) != 0.0])
        plt.figure()
        plt.bar(row_utilizations, row_counts)
        plt.xlim(0, 100)  # Set x-axis limits to 0-100%
        plt.xlabel('Utilization (%)')
        plt.ylabel('Number of Rows')
        plt.title(f'{shorten_name}\nRow Utilization')
        plt.savefig(os.path.join(output_dir, f'{name}_row_utilization.png'))
        plt.close()
    else:
        print(f"ERROR: {name} has 0 BLOCKS ACCESSED")

def process_nohup_out_files(directory, output_dir):
    for root, _, files in os.walk(directory):
        for file in files:
            if file == 'nohup.out':
                file_path = os.path.join(root, file)
                block_access_data, row_utilization_data = extract_data_from_nohup_out(file_path)
                print(f"Generating graph for {file_path}")
                generate_graphs(block_access_data, row_utilization_data, os.path.basename(root), output_dir)

# User-specified directories
user_directory = "log_results/1C_16WLLC"
graph_output_dir = "graphs"

# Generate graphs for each nohup.out file in the specified directory
process_nohup_out_files(user_directory, graph_output_dir)


Generating graph for log_results/1C_16WLLC/657.xz_s-3167B.champsimtrace.xz/nohup.out
Generating graph for log_results/1C_16WLLC/638.imagick_s-824B.champsimtrace.xz/nohup.out
Generating graph for log_results/1C_16WLLC/parsec_2.1.raytrace.simlarge.prebuilt.drop_23500M.length_250M.champsimtrace.xz/nohup.out
Generating graph for log_results/1C_16WLLC/602.gcc_s-1850B.champsimtrace.xz/nohup.out
Generating graph for log_results/1C_16WLLC/ligra_BFSCC.com-lj.ungraph.gcc_6.3.0_O3.drop_22000M.length_250M.champsimtrace.xz/nohup.out
Generating graph for log_results/1C_16WLLC/ligra_Components.com-lj.ungraph.gcc_6.3.0_O3.drop_22750M.length_250M.champsimtrace.xz/nohup.out
Generating graph for log_results/1C_16WLLC/628.pop2_s-17B.champsimtrace.xz/nohup.out
Generating graph for log_results/1C_16WLLC/648.exchange2_s-1247B.champsimtrace.xz/nohup.out
Generating graph for log_results/1C_16WLLC/649.fotonik3d_s-10881B.champsimtrace.xz/nohup.out
Generating graph for log_results/1C_16WLLC/623.xalancbmk_s-592B.c