In [None]:
import sys
from glob import glob

import matplotlib.pyplot as plt
import pandas as pd
import math

import thicket as th

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

Read all files

In [None]:
#1_trial is a name of a folder containing the cali files, you may create a folder with a different name and replace the folder name here
tk = th.Thicket.from_caliperreader(glob("cali_files/*.cali"))

In [None]:
print(tk.show_metric_columns())

In [None]:
print(tk.tree(metric_column="Avg time/rank"))

Group Performance data by `matrix_size` in the Thicket metadata table.

In [12]:
tk.metadata_column_to_perfdata("num_procs")
tk.metadata_column_to_perfdata("input_size")
tk.metadata_column_to_perfdata("input_type")

tk.dataframe = tk.dataframe.reset_index().set_index(["node", "num_procs", "input_size", "input_type"]).sort_index()


In [None]:
# tk.dataframe
tk.dataframe.head(10)

In [None]:
tk.dataframe.xs(2**16, level='input_size').head(10) #Testing 

In [None]:
# Define common variables
processes = [2, 4, 8, 16, 32, 64, 128, 256, 512]
matrix_sizes = [2**16, 2**18, 2**20, 2**22, 2**24, 2**26, 2**28]
input_types = ["sorted", "random", "reverseSorted", "perturbed"]

# Change font size for all plots
plt.rcParams.update({"font.size": 20})

# Define the function names
function_names = [
    "data_init_runtime",
    "correctness_check",
    "comm_large",
    "comm_small",
    "comp_large",
    "comp_small"
]

# First, generate the graphs for the metrics
for matrix_size in matrix_sizes:
    exponent_N = int(math.log2(matrix_size))

    for input_type in input_types:
        try:
            df_matrix_input = tk.dataframe.xs((matrix_size, input_type), level=('input_size', 'input_type'))
        except KeyError:
            continue

        for func_name in function_names:
            df_func = df_matrix_input[df_matrix_input['name'] == func_name]

            if df_func.empty:
                continue

            num_procs = df_func.index.get_level_values('num_procs').unique()

            # Ensure num_procs is sorted for plotting
            num_procs = sorted(num_procs)

            # Get the values for each metric
            min_values = df_func['Min time/rank']
            max_values = df_func['Max time/rank']
            avg_values = df_func['Avg time/rank']
            variance_values = df_func['Variance time/rank']

            # Plot the data
            plt.figure(figsize=(12, 6))
            plt.plot(num_procs, min_values.values, label='Min Time/rank', marker='o')
            plt.plot(num_procs, max_values.values, label='Max Time/rank', marker='s')
            plt.plot(num_procs, avg_values.values, label='Avg Time/rank', marker='^')
            plt.plot(num_procs, variance_values.values, label='Variance Time/rank', marker='d')

            plt.xlabel('Number of Processes')
            plt.ylabel('Time (s)')
            plt.title(f'{func_name} Times for Matrix Size 2^{exponent_N}, Input Type: {input_type}')
            plt.legend()
            plt.grid(True, which="both", ls="--", linewidth=0.5)
            plt.xscale('log', base=2)
            plt.xticks(num_procs, num_procs)
            plt.savefig(f'{func_name}_times_matrix_2^{exponent_N}_input_{input_type}.jpeg', format='jpeg')
            plt.close()

# Now, create the total time graphs separately
for func_name in function_names:
    for input_type in input_types:
        plt.figure(figsize=(12, 6))
        legend_entries = []
        for matrix_size in matrix_sizes:
            exponent_N = int(math.log2(matrix_size))

            try:
                df_matrix_input = tk.dataframe.xs((matrix_size, input_type), level=('input_size', 'input_type'))
            except KeyError:
                continue

            df_func = df_matrix_input[df_matrix_input['name'] == func_name]

            if df_func.empty:
                continue

            num_procs = df_func.index.get_level_values('num_procs').unique()
            num_procs = sorted(num_procs)
            total_time_values = df_func['Total time']

            plt.plot(num_procs, total_time_values.values, label=f'2^{exponent_N}', marker='o')
            legend_entries.append(f'2^{exponent_N}')

        if not legend_entries:
            plt.close()
            continue

        plt.xlabel('Number of Processes')
        plt.ylabel('Total Time (s)')
        plt.title(f'{func_name} Total Time for Input Type: {input_type}')
        plt.legend(title='Matrix Sizes')
        plt.grid(True, which="both", ls="--", linewidth=0.5)
        plt.xscale('log', base=2)
        plt.xticks(processes, processes)
        plt.savefig(f'{func_name}_total_time_input_{input_type}.jpeg', format='jpeg')
        plt.close()

print("Done")

In [None]:
# Define common variables
processes = [2, 4, 8, 16, 32, 64, 128, 256, 512]
matrix_sizes = [2**i for i in range(16, 29, 2)]  # 2^16 to 2^28
input_types = ["sorted", "random", "reverseSorted", "perturbed"]

# Change font size for all plots
plt.rcParams.update({"font.size": 20})

# Define the function names for the plots
function_names = ["comp_large", "comm", "main"]

# Ensure that 'tk.dataframe' has a multi-index with levels ['input_size', 'input_type', 'num_procs']
# and columns including 'name' and 'Total time'

# Generate strong scaling plots for each input size with lines for input types
for function_name in function_names:
    for matrix_size in matrix_sizes:
        exponent_N = int(math.log2(matrix_size))
        plt.figure(figsize=(12, 6))
        legend_entries = []
        for input_type in input_types:
            try:
                df = tk.dataframe.xs(
                    (matrix_size, input_type), level=('input_size', 'input_type')
                )
            except KeyError:
                continue  # Skip if this combination doesn't exist

            df_func = df[df['name'] == function_name]
            if df_func.empty:
                continue  # Skip if no data for this function

            num_procs = df_func.index.get_level_values('num_procs').unique()
            num_procs = sorted(num_procs)
            avd_time = df_func['Avg time/rank']

            plt.plot(num_procs, avd_time.values, label=input_type, marker='o')
            legend_entries.append(input_type)

        if not legend_entries:
            plt.close()
            continue  # Skip if no data was plotted

        plt.xlabel('Number of Processes')
        plt.ylabel('Avg Time (s)')
        plt.title(f'Strong Scaling - {function_name} for Input Size 2^{exponent_N}')
        plt.legend(title='Input Types')
        plt.grid(True, which="both", ls="--", linewidth=0.5)
        plt.xscale('log', base=2)
        plt.xticks(processes, processes)
        plt.savefig(f'type1_pics/strong_scaling_{function_name}_size_2^{exponent_N}.jpeg', format='jpeg')
        plt.close()
        
print("Done")

In [None]:
# Generate strong scaling speedup plots for each input type
for input_type in input_types:
    for function_name in function_names:
        plt.figure(figsize=(12, 6))
        speedup_data = {}
        for matrix_size in matrix_sizes:
            try:
                df = tk.dataframe.xs(
                    (matrix_size, input_type), level=('input_size', 'input_type')
                )
            except KeyError:
                continue

            df_func = df[df['name'] == function_name]
            if df_func.empty:
                continue

            num_procs = df_func.index.get_level_values('num_procs').unique()
            num_procs = sorted(num_procs)
            total_time = df_func['Total time']

            # Use time at smallest number of processes as baseline
            T1 = total_time.iloc[0]
            speedup = T1 / total_time.values

            plt.plot(num_procs, speedup, label=f'Size 2^{int(math.log2(matrix_size))}', marker='o')

        plt.xlabel('Number of Processes')
        plt.ylabel('Speedup')
        plt.title(f'Strong Scaling Speedup for {function_name}, Input Type: {input_type}')
        plt.legend(title='Input Sizes')
        plt.grid(True, which="both", ls="--", linewidth=0.5)
        plt.xscale('log', base=2)
        plt.xticks(processes, processes)
        plt.savefig(f'type2_pics/strong_scaling_speedup_{function_name}_input_{input_type}.jpeg', format='jpeg')
        plt.close()

print("Done")


In [None]:
# Prepare the specified weak scaling pairs
weak_scaling_pairs = [
    (2, 2**20),
    (8, 2**22),
    (32, 2**24),
    (128, 2**26),
    (512, 2**28),
]

# Prepare mapping of markers and colors to matrix_sizes
matrix_sizes_list = [2**20, 2**22, 2**24, 2**26, 2**28]
marker_styles = ['o', 's', '^', 'd', 'x']  # Different markers for each matrix size
colors = ['blue', 'green', 'red', 'purple', 'orange']  # Different colors for each matrix size
matrix_size_marker_map = dict(zip(matrix_sizes_list, marker_styles))
matrix_size_color_map = dict(zip(matrix_sizes_list, colors))

# Generate weak scaling plots for each input type
for input_type in input_types:
    for function_name in function_names:
        num_procs_list = []
        avg_time_per_rank_list = []
        matrix_size_list_for_plot = []
        for num_procs, matrix_size in weak_scaling_pairs:
            exponent_N = int(math.log2(matrix_size))
            try:
                df = tk.dataframe.xs(
                    (matrix_size, input_type, num_procs), level=('input_size', 'input_type', 'num_procs')
                )
            except KeyError:
                print(f"Data not found for matrix_size=2^{exponent_N}, num_procs={num_procs}, input_type={input_type}")
                continue  # Skip if this combination doesn't exist

            df_func = df[df['name'] == function_name]
            if df_func.empty:
                print(f"No data for function {function_name} at matrix_size=2^{exponent_N}, num_procs={num_procs}")
                continue  # Skip if no data for this function

            avg_time_per_rank = df_func['Avg time/rank'].values[0]
            avg_time_per_rank_list.append(avg_time_per_rank)
            num_procs_list.append(num_procs)
            matrix_size_list_for_plot.append(matrix_size)

        if avg_time_per_rank_list:
            plt.figure(figsize=(12, 6))
            # Plot the line connecting the data points
            plt.plot(num_procs_list, avg_time_per_rank_list, linestyle='-', color='gray', alpha=0.5)

            # Plot the data points with different markers and colors for each matrix_size
            for num_procs, avg_time, matrix_size in zip(num_procs_list, avg_time_per_rank_list, matrix_size_list_for_plot):
                marker = matrix_size_marker_map[matrix_size]
                color = matrix_size_color_map[matrix_size]
                plt.plot(num_procs, avg_time, marker=marker, markersize=10, linestyle='None', color=color)

            # Create custom legend entries
            handles = []
            labels = []
            for ms in matrix_sizes_list:
                if ms in matrix_size_list_for_plot:
                    marker = matrix_size_marker_map[ms]
                    color = matrix_size_color_map[ms]
                    handles.append(plt.Line2D([0], [0], color=color, marker=marker, linestyle='None', markersize=10))
                    labels.append(f'2^{int(math.log2(ms))}')
                
            plt.xlabel('Number of Processes')
            plt.ylabel('Avg Time per Rank (s)')
            plt.title(f'Weak Scaling for {function_name}, Input Type: {input_type}')
            plt.grid(True, which="both", ls="--", linewidth=0.5)
            plt.xscale('log', base=2)
            plt.xticks(num_procs_list, num_procs_list)
            plt.legend(handles, labels, title='Matrix Size', loc='best')
            plt.savefig(f'type3_pics/weak_scaling_{function_name}_input_{input_type}.jpeg', format='jpeg')
            plt.close()
        else:
            print(f"No data to plot for {function_name}, Input Type: {input_type}")

print("Done")
