In [None]:
import re
import sys
from pathlib import Path

import pandas as pd
from IPython.core.display_functions import display
from matplotlib import pyplot as plt
from matplotlib.image import AxesImage

sys.path.append(str(Path.cwd().parent))
sys.path.append(str(Path.cwd()))
from data_analysis.CLI import ExperimentDir

In [None]:
exp = ExperimentDir.LOCAL
if len(sys.argv) > 1:
    try:
        exp = ExperimentDir.value_to_enum(sys.argv[1])
    except ValueError as e:
        print(f"Experiment was set to {exp}", e)

exp_dir = Path(ExperimentDir.host_dir(exp)) / 'runs'
pngs = list(exp_dir.glob('run.*.parsed/*png'))

generic_config_regex = r".+?\.(.+?)-(.+?)(?:-(.+?))_(.+?)-(\d+)\.cfg.+"
local_config_regex = r".+?\.(.+?)-(.+?)-nc(\d+)_(.+?)-(\d+)\.cfg.+"
extract_params_from_config = lambda cfg: re.match(local_config_regex,
                                                  cfg).groups()
extract_cfg_name = lambda cfg: re.match(r"^.+?\.(.+?)\..+$", cfg).group(1)

configs = []
for png in pngs:
    workflow, mode, ncores, node, trial = extract_params_from_config(png.parent.name)
    configs.append({
        'cfg_name': extract_cfg_name(png.parent.name),
        'path': png,
        'workflow': workflow,
        "mode": mode,
        'ncores': int(ncores),
        'node': node,
        'trial': int(trial),
        'image_name': png.name,
    })

df = pd.DataFrame(configs, columns=['cfg_name', 'path', 'workflow', 'mode', 'ncores', 'node', 'trial', 'image_name'])
df

In [None]:
data_gathered = df.groupby(['image_name', 'mode', 'workflow', 'ncores']).size().reset_index(name='count')
data_gathered.to_csv('info.csv', index=False)
data_gathered

In [None]:
data_gathered_by_node = df.groupby(['image_name', 'mode', 'workflow', 'ncores', 'node']).size().reset_index(
    name='count')
data_gathered_by_node.to_csv('info.by_node.csv', index=False)
data_gathered_by_node

In [None]:
workflow = 'daa'
image_name = 'run_avg_cpu_util_all.png'
df_slice = df[(df.workflow == workflow) & (df.image_name == image_name)]
df_slice

In [None]:
df_count = df_slice.groupby(['node', 'ncores']).size().reset_index(name='count')
df_max_count_per_ncores = df_count.groupby('ncores')['count'].max().reset_index(name='max_count')
total_count = df_max_count_per_ncores['max_count'].sum()
# df_max
display(df_count, df_max_count_per_ncores, total_count)

In [None]:
scale = 3

# for unique workflogs
for workflow in df.workflow.unique():
    for image_name in df.image_name.unique():
        img_name = f"{workflow}_{image_name}"
        print(f"Generating {img_name}")

        df_slice = df[(df.workflow == workflow) & (df.image_name == image_name)]
        df_count = df_slice.groupby(['node', 'ncores']).size().reset_index(name='count')
        df_max_count_per_ncores = df_count.groupby('ncores')['count'].max().reset_index(name='max_count')

        x_len = len(df_slice.node.unique())
        y_len = df_max_count_per_ncores['max_count'].sum()
        fig, ax = plt.subplots(y_len, x_len, figsize=(x_len * 5 * scale, y_len * 2 * scale))

        for col, node in enumerate(df_slice.node.unique()):
            row_offset = 0
            df_slice_node = df_slice[df_slice.node == node]
            for ncore in df_max_count_per_ncores.ncores:
                max_ncore_group_rows = \
                df_max_count_per_ncores[df_max_count_per_ncores.ncores == ncore].max_count.values[0]
                for ncore_group_row, (_, cfg) in enumerate(df_slice_node[df_slice_node.ncores == ncore].iterrows()):
                    row = row_offset + ncore_group_row
                    ax[row, col].imshow(plt.imread(cfg.path))
                    ax[row, col].axis('off')


                    def adjust_style(axx):
                        axx.axis('on')
                        axx.set_xticks([])
                        axx.set_yticks([])
                        axx.spines[:].set_visible(False)


                    if col == 0 or col == x_len - 1:
                        adjust_style(ax[row, col])
                        ax[row, col].set_ylabel(f"{cfg.ncores} ncores", fontsize=20)
                    if col == x_len - 1:
                        ax[row, col].yaxis.set_label_position('right')

                    if row == 0 or row == y_len - 1:
                        adjust_style(ax[row, col])
                        ax[row, col].set_xlabel(f"{node}", fontsize=20)
                    if row == 0:
                        ax[row, col].xaxis.set_label_position('top')

                    if ncore_group_row == 0 and row != 0:
                        ax[row, col].spines['top'].set_visible(True)

                row_offset += max_ncore_group_rows

        for i in range(x_len):
            for j in range(y_len):
                children = ax[j, i].get_children()
                if not any(isinstance(child, AxesImage) for child in children):
                    ax[j, i].axis('off')
                    ax[j, i].set_xticks([])
                    ax[j, i].set_yticks([])

        plt.tight_layout()
        plt.savefig(img_name)
        # plt.show()

