In [7]:
import sys
sys.path.append("/scratch/group/csce-435-f25/python-3.10.8/lib/python3.10/site-packages")
from glob import glob

import matplotlib.pyplot as plt
import pandas as pd

import thicket as th

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

ImportError: failed to find libmagic.  Check your installation

Read all files

In [6]:
# Read all .cali files from the current directory or a specific folder
tk = th.Thicket.from_caliperreader(glob("*.cali"))

NameError: name 'th' is not defined

View Calltree

In [None]:
print(tk.tree(metric_column="Avg time/rank"))

Group Performance data by `matrix_size` and `num_procs` in the Thicket metadata table.

In [None]:
tk.metadata_column_to_perfdata("num_procs")
tk.metadata_column_to_perfdata("matrix_size")

tk.dataframe = tk.dataframe.reset_index().set_index(["node", "num_procs", "matrix_size"]).sort_index()

tk.dataframe.head()

In [None]:
# Define common variables
processes = [2, 4, 8, 16, 32, 64]
matrix_size = 1024

In [None]:
# Change font size for all plots
plt.rcParams.update({"font.size": 14})

## Worker Process Time Plots for Matrix Size 1024x1024

In [None]:
# Extract worker receive times
worker_receive_data = []
for p in processes:
    try:
        row = tk.dataframe.loc[('worker_recieve', p, matrix_size)]
        worker_receive_data.append({
            'procs': p,
            'min': row['Min time/rank'],
            'max': row['Max time/rank'],
            'avg': row['Avg time/rank']
        })
    except KeyError:
        print(f"Warning: No data for worker_recieve with {p} processes")

df_receive = pd.DataFrame(worker_receive_data)

# Plot Worker Receive Times
fig, ax = plt.subplots(figsize=(12, 7))
ax.plot(df_receive['procs'], df_receive['min'], marker='o', linewidth=2, markersize=8, label='Min')
ax.plot(df_receive['procs'], df_receive['max'], marker='s', linewidth=2, markersize=8, label='Max')
ax.plot(df_receive['procs'], df_receive['avg'], marker='^', linewidth=2, markersize=8, label='Avg')
ax.set_xlabel('Number of Processes')
ax.set_ylabel('Time (seconds)')
ax.set_title(f'Worker Receive Time - Matrix Size {matrix_size}x{matrix_size}')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xticks(processes)
plt.tight_layout()
plt.savefig(f'worker_receive_{matrix_size}.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Extract worker calculation times
worker_calc_data = []
for p in processes:
    try:
        row = tk.dataframe.loc[('worker_calculation', p, matrix_size)]
        worker_calc_data.append({
            'procs': p,
            'min': row['Min time/rank'],
            'max': row['Max time/rank'],
            'avg': row['Avg time/rank']
        })
    except KeyError:
        print(f"Warning: No data for worker_calculation with {p} processes")

df_calc = pd.DataFrame(worker_calc_data)

# Plot Worker Calculation Times
fig, ax = plt.subplots(figsize=(12, 7))
ax.plot(df_calc['procs'], df_calc['min'], marker='o', linewidth=2, markersize=8, label='Min')
ax.plot(df_calc['procs'], df_calc['max'], marker='s', linewidth=2, markersize=8, label='Max')
ax.plot(df_calc['procs'], df_calc['avg'], marker='^', linewidth=2, markersize=8, label='Avg')
ax.set_xlabel('Number of Processes')
ax.set_ylabel('Time (seconds)')
ax.set_title(f'Worker Calculation Time - Matrix Size {matrix_size}x{matrix_size}')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xticks(processes)
plt.tight_layout()
plt.savefig(f'worker_calculation_{matrix_size}.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Extract worker send times
worker_send_data = []
for p in processes:
    try:
        row = tk.dataframe.loc[('worker_send', p, matrix_size)]
        worker_send_data.append({
            'procs': p,
            'min': row['Min time/rank'],
            'max': row['Max time/rank'],
            'avg': row['Avg time/rank']
        })
    except KeyError:
        print(f"Warning: No data for worker_send with {p} processes")

df_send = pd.DataFrame(worker_send_data)

# Plot Worker Send Times
fig, ax = plt.subplots(figsize=(12, 7))
ax.plot(df_send['procs'], df_send['min'], marker='o', linewidth=2, markersize=8, label='Min')
ax.plot(df_send['procs'], df_send['max'], marker='s', linewidth=2, markersize=8, label='Max')
ax.plot(df_send['procs'], df_send['avg'], marker='^', linewidth=2, markersize=8, label='Avg')
ax.set_xlabel('Number of Processes')
ax.set_ylabel('Time (seconds)')
ax.set_title(f'Worker Send Time - Matrix Size {matrix_size}x{matrix_size}')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xticks(processes)
plt.tight_layout()
plt.savefig(f'worker_send_{matrix_size}.png', dpi=300, bbox_inches='tight')
plt.show()

## Master Process Time Plots for Matrix Size 1024x1024

In [None]:
# Extract master times (whole computation, initialization, send/receive)
whole_comp_data = []
master_init_data = []
master_sr_data = []

for p in processes:
    try:
        # Whole computation
        row_comp = tk.dataframe.loc[('whole_computation', p, matrix_size)]
        whole_comp_data.append({
            'procs': p,
            'time': row_comp['Avg time/rank']
        })
        
        # Master initialization
        row_init = tk.dataframe.loc[('master_initialization', p, matrix_size)]
        master_init_data.append({
            'procs': p,
            'time': row_init['Avg time/rank']
        })
        
        # Master send/receive
        row_sr = tk.dataframe.loc[('master_send_recieve', p, matrix_size)]
        master_sr_data.append({
            'procs': p,
            'time': row_sr['Avg time/rank']
        })
    except KeyError as e:
        print(f"Warning: Missing data for {p} processes: {e}")

df_whole = pd.DataFrame(whole_comp_data)
df_init = pd.DataFrame(master_init_data)
df_sr = pd.DataFrame(master_sr_data)

In [None]:
# Plot Master Times
fig, ax = plt.subplots(figsize=(12, 7))
ax.plot(df_whole['procs'], df_whole['time'], marker='o', linewidth=2, markersize=8, label='Whole Computation')
ax.plot(df_init['procs'], df_init['time'], marker='s', linewidth=2, markersize=8, label='Initialization')
ax.plot(df_sr['procs'], df_sr['time'], marker='^', linewidth=2, markersize=8, label='Send & Receive')
ax.set_xlabel('Number of Processes')
ax.set_ylabel('Time (seconds)')
ax.set_title(f'Master Process Times - Matrix Size {matrix_size}x{matrix_size}')
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xticks(processes)
plt.tight_layout()
plt.savefig(f'master_times_{matrix_size}.png', dpi=300, bbox_inches='tight')
plt.show()

## View the raw data

In [None]:
# Display the dataframes to verify the data
print("Worker Receive Times:")
print(df_receive)
print("\nWorker Calculation Times:")
print(df_calc)
print("\nWorker Send Times:")
print(df_send)
print("\nMaster Times:")
print("Whole Computation:")
print(df_whole)
print("\nInitialization:")
print(df_init)
print("\nSend & Receive:")
print(df_sr)