In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Load the performance data into a Thicket dataframe
tk = th.Thicket.from_caliperreader(glob("cali/*.cali"))


In [None]:
# Ensure metadata columns are converted to performance data
tk.metadata_column_to_perfdata("input_type")
tk.metadata_column_to_perfdata("input_size")
tk.metadata_column_to_perfdata("num_procs")

# Print the dataframe columns to check for input_type, input_size, and num_procs
print("Columns in dataframe:", tk.dataframe.columns)

# Reset the index and organize the data for easy plotting
tk.dataframe = tk.dataframe.reset_index().set_index(["node", "num_procs", "input_size", "input_type"]).sort_index()

# Print the head of the dataframe to verify structure
print(tk.dataframe.head())

In [None]:
# Function to plot strong scaling for each input size
def plot_strong_scaling(df, input_size, title):
    plt.figure(figsize=(15, 7))
    
    for input_type in df.index.get_level_values('input_type').unique():
        subset = df.xs((input_size, input_type), level=('input_size', 'input_type'), drop_level=False)

        # Filter out non-positive values for plotting
        subset = subset[subset['Avg time/rank'] > 0]
        
        if not subset.empty:
            plt.plot(subset.index.get_level_values('num_procs'), subset['Avg time/rank'], label=f'{input_type}', marker='o')
    
    if pd.notna(input_size):
        input_size_str = f"$2^{{{int(np.log2(input_size))}}}$"
    else:
        input_size_str = "Unknown Size"

    plt.title(f"{title}: Strong Scaling (Input Size: {input_size_str})")
    plt.xlabel('Number of Processes')
    plt.ylabel('Avg Time per Rank (seconds)')
    plt.xscale('log', base=2)
    plt.yscale('log')
    plt.legend()
    plt.grid(True)
    plt.show()

# Function to plot weak scaling for each input type
def plot_weak_scaling(df, input_type, title):
    plt.figure(figsize=(15, 7))
    
    subset = df.xs(input_type, level='input_type', drop_level=False)

    # Filter out non-positive values for plotting
    subset = subset[subset['Avg time/rank'] > 0]

    if not subset.empty:
        plt.plot(subset.index.get_level_values('num_procs'), subset['Avg time/rank'], label=f'{input_type}', marker='o')

    plt.title(f"{title}: Weak Scaling ({input_type})")
    plt.xlabel('Number of Processes')
    plt.ylabel('Avg Time per Rank (seconds)')
    plt.xscale('log', base=2)
    plt.yscale('log')
    plt.grid(True)
    plt.legend()
    plt.show()

# Function to plot strong scaling speedup
def plot_speedup(df, input_type, base_time, title):
    plt.figure(figsize=(15, 7))
    
    subset = df.xs(input_type, level='input_type', drop_level=False)

    # Filter out non-positive values for speedup calculation
    subset = subset[subset['Avg time/rank'] > 0]

    if not subset.empty:
        speedup = base_time / subset['Avg time/rank']
        plt.plot(subset.index.get_level_values('num_procs'), speedup, label=f'Speedup ({input_type})', marker='o')

        plt.title(f"{title}: Strong Scaling Speedup ({input_type})")
        plt.xlabel('Number of Processes')
        plt.ylabel('Speedup')
        plt.xscale('log', base=2)
        plt.grid(True)
        plt.legend()
        plt.show()

In [None]:
for node in tk.graph.traverse():
    if node.frame['name'] == 'comp_large' or node.frame['name'] == 'comm' or node.frame['name'] == 'main':
        input_sizes = tk.dataframe.loc[node].index.get_level_values("input_size").unique()
        input_types = tk.dataframe.loc[node].index.get_level_values("input_type").unique()

        # Exclude 'NaN' and 'Random' from input_types
        input_types = [t for t in input_types if pd.notna(t) and t != 'Random']

        # Strong scaling plots for each input size
        for input_size in input_sizes:
            plot_strong_scaling(tk.dataframe.loc[node], input_size, node.frame['name'])

        # Strong scaling speedup plots for each input type
        # Get the base time by filtering num_procs == 2 within the current node subset
        node_df = tk.dataframe.loc[node]  # Filter the dataframe for the current node
        base_time = node_df[node_df.index.get_level_values('num_procs') == 2]['Avg time/rank'].values[0]

        for input_type in input_types:
            plot_speedup(tk.dataframe.loc[node], input_type, base_time, node.frame['name'])

        # Weak scaling plots for each input type
        for input_type in input_types:
            plot_weak_scaling(tk.dataframe.loc[node], input_type, node.frame['name'])