# Fusion lengths
Plot a frequency diagram of fusion lengths against the dynamic count for each file.

In [1]:
import numpy as numpy
import matplotlib.pyplot as plt
import pandas as pd
import math
import json
import os

In [2]:
with open('config.json') as f:
    data = json.load(f)

if (data['dirName'] != ""):
    full_path = f"{data['dirPath']}/{data['dirName']}"
else:
    # get the most recently run experiment
    path = data['dirPath']
    folder_names = [f for f in os.listdir(path) if os.path.isdir(os.path.join(path, f))]
    most_recent = folder_names[-1]
    print(f"defaulting to most recent experiment: {most_recent}")
    full_path = f"{path}/{most_recent}"

defaulting to most recent experiment: 2025_01_09_02:11:46.836674


In [3]:
csv_path = f"{full_path}/fusionLengths.csv"
img_path = f"{full_path}/fusionLengths.png"

In [4]:
df = pd.read_csv(csv_path)

In [5]:
aggregated_data = df.groupby(['file', 'rule_title', 'user_defined_key', 'fusion_length']).agg(
    total_count=('count', 'sum'),
)

In [7]:
aggregated_data = aggregated_data.reset_index()
grouped = aggregated_data.groupby(['user_defined_key', 'file', 'rule_title'])

In [None]:
num_groups = len(grouped)
cols = 4
rows = math.ceil(num_groups / cols)
fig, axes = plt.subplots(rows, cols, figsize=(15, 5 * rows))
axes = axes.flatten()

for i, (group_name, group_data) in enumerate(grouped):
    data = group_data.reset_index()
    group_title = (
        f"{group_name[1]}: {group_name[2]} "
        f"Max {data['user_defined_key']}\n"
    )

    axes[i].bar(data['fusion_length'], data['total_count'])
    axes[i].set_title(f'{group_name}')
    axes[i].set_xlabel('Fusion Length')
    axes[i].set_ylabel('Dynamic Count')
    axes[i].tick_params(axis='x')
    max_x = max(group_data['fusion_length'].max()+1, 5)
    axes[i].set_xlim(left=0, right=max_x)
    axes[i].grid(axis='y')

for j in range(i + 1, len(axes)):
    fig.delaxes(axes[j])

plt.tight_layout()
plt.savefig(img_path, dpi=150, bbox_inches='tight')
plt.show()