In [2]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

import matplotlib as mpl
mpl.use("pgf")
mpl.rcParams.update({
    "pgf.texsystem": "pdflatex",
    "text.usetex": True,
    "pgf.rcfonts": False,
    "font.family": "sans-serif",
    "font.size": 9,
    "axes.titlesize": 10,
    "axes.labelsize": 9,
    "legend.fontsize": 8,
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
    "text.latex.preamble": r"""
\usepackage{sansmath}
\sansmath
\providecommand{\mathdefault}[1]{#1}
\providecommand{\textdefault}[1]{#1}
"""
})

DATA_DIR = Path('../data/')

In [3]:
def plot_scaling(df, baseline_nodes, output_path, title_prefix="Strong"):
    """
    Plots speedup and efficiency vs. number of nodes (log-scaled x-axis).

    Parameters
    ----------
    df : pandas.DataFrame
        Must contain columns: 'nodes', 'speedup', 'efficiency'
    baseline_nodes : int or float
        The baseline number of nodes used for ideal scaling line
    output_path : str
        Path to save the figure (e.g. '../slide/images/strong_speed_eff.pgf')
    title_prefix : str
        Prefix for subplot titles (e.g. 'Strong' or 'Weak')
    """

    fig, axes = plt.subplots(1, 2, figsize=(6.5, 3.2), constrained_layout=True)

    # --- Subplot 1: Speedup ---
    ax = axes[0]
    ax.plot(df['nodes'], df['speedup'], marker='o', linestyle='-', label='Measured')
    ax.plot(df['nodes'], df['nodes'] / baseline_nodes, marker='x', linestyle='--', label='Ideal')
    ax.set_title(f'{title_prefix} Scaling: Speedup vs. Nodes')
    ax.set_xlabel('Number of Nodes')
    ax.set_ylabel('Speedup')
    ax.set_xscale('log', base=2)
    ax.set_yscale('log', base=2)
    ax.set_xticks(df['nodes'])
    ax.set_xticklabels(df['nodes'])
    ax.set_yticks(df['nodes'])
    ax.set_yticklabels(df['nodes'])
    ax.grid(True, which='both', linestyle=':')
    ax.legend()

    # --- Subplot 2: Efficiency ---
    ax = axes[1]
    ax.plot(df['nodes'], df['efficiency'], marker='o', linestyle='-', label='Measured')
    ax.plot(df['nodes'], np.ones_like(df['nodes']), marker='x', linestyle='--', label='Ideal')
    ax.set_title(f'{title_prefix} Scaling: Efficiency vs. Nodes')
    ax.set_xlabel('Number of Nodes')
    ax.set_ylabel('Efficiency')
    ax.set_xscale('log', base=2)
    ax.set_xticks(df['nodes'])
    ax.set_xticklabels(df['nodes'])
    ax.grid(True, which='both', linestyle=':')
    ax.legend()

    # Save and close
    fig.savefig(output_path, bbox_inches='tight', pad_inches=0.02)
    plt.close(fig)
    print(f"Saved {title_prefix.lower()} scaling plot → {output_path}")


# Thread Scaling

In [9]:
NAME = 'threads.csv'
threads_df = pd.read_csv(DATA_DIR / NAME)
threads_df.head()

Unnamed: 0,size_x,size_y,Ntasks,Niterations,total_time,comm_time,comp_time,slurm_job_id,slurm_job_name,partition,nodes,cpus_per_task,tasks_per_node,ntasks,omp_threads
0,16384,16384,1,500,6.414015,0.000122,6.413418,21778350,omp_scaling_112t,,1,112,1,1,112
1,16384,16384,1,500,8.421359,4.5e-05,8.420848,21778349,omp_scaling_84t,,1,84,1,1,84
2,16384,16384,1,500,12.419889,9.4e-05,12.419446,21778348,omp_scaling_56t,,1,56,1,1,56
3,16384,16384,1,500,21.706531,5.6e-05,21.706215,21778347,omp_scaling_32t,,1,32,1,1,32
4,16384,16384,1,500,43.212054,4e-05,43.211781,21778346,omp_scaling_16t,,1,16,1,1,16


In [10]:
threads_sorted = threads_df.sort_values(by='omp_threads')

serial_time = threads_sorted[threads_sorted['omp_threads'] == 1]['total_time'].iloc[0]
threads_sorted['speedup'] = serial_time / threads_sorted['total_time']
threads_sorted['efficiency'] = threads_sorted['speedup'] / threads_sorted['omp_threads']
threads_sorted[['omp_threads', 'total_time', 'speedup', 'efficiency']]


Unnamed: 0,omp_threads,total_time,speedup,efficiency
8,1,194.144426,1.0,1.0
7,2,112.609094,1.724056,0.862028
6,4,67.49175,2.876565,0.719141
5,8,53.84015,3.605941,0.450743
4,16,43.212054,4.49283,0.280802
3,32,21.706531,8.944056,0.279502
2,56,12.419889,15.631736,0.279138
1,84,8.421359,23.053812,0.27445
0,112,6.414015,30.268783,0.270257


In [11]:
fig, axes = plt.subplots(1, 2, figsize=(6.5, 3.2), constrained_layout=True)

# Subplot 1: Speedup
ax = axes[0]
ax.plot(threads_sorted['omp_threads'], threads_sorted['speedup'],
        marker='o', linestyle='-', label='Measured')
ax.plot(threads_sorted['omp_threads'], threads_sorted['omp_threads'],
        marker='x', linestyle='--', label='Ideal')
ax.set_title('Speedup vs. OMP Threads')
ax.set_xlabel('OMP Threads')
ax.set_ylabel('Speedup')
ax.set_xscale('log', base=2)
ax.set_yscale('log', base=2)
ax.set_yticks(threads_sorted['omp_threads'],threads_sorted['omp_threads'])
ax.set_xticks(threads_sorted['omp_threads'],threads_sorted['omp_threads'])
ax.grid(True, which='both', linestyle=':')
ax.legend()

# Subplot 2: Efficiency
ax = axes[1]
ax.plot(threads_sorted['omp_threads'], threads_sorted['efficiency'],
        marker='o', linestyle='-', label='Measured')
ax.plot(threads_sorted['omp_threads'], np.ones_like(threads_sorted['omp_threads']),
        marker='x', linestyle='--', label='Ideal')
ax.set_title('Efficiency vs. OMP Threads')
ax.set_xlabel('OMP Threads')
ax.set_ylabel('Efficiency')
ax.set_xscale('log', base=2)
ax.set_xticks(threads_sorted['omp_threads'],threads_sorted['omp_threads'])
ax.set_ylim(0, 1.1)
ax.grid(True, which='both', linestyle=':')
ax.legend()

# Salva PGF con bounding box stretto
fig.savefig("../slide/images/threads_speed_eff.pgf", bbox_inches="tight", pad_inches=0.02)
plt.close(fig)

# Strong Scaling

In [None]:
NAME = 'strong.csv'
strong_df = pd.read_csv(DATA_DIR / NAME)
strong_df.head()

In [11]:
strong_df_sorted = strong_df.sort_values(by='nodes')

baseline_time = strong_df_sorted['total_time'].iloc[0]
baseline_nodes = strong_df_sorted['nodes'].iloc[0]

strong_df_sorted['speedup'] = baseline_time / strong_df_sorted['total_time']
strong_df_sorted['efficiency'] = strong_df_sorted['speedup'] / (strong_df_sorted['nodes'] / baseline_nodes)

plot_scaling(
    df=strong_df_sorted,
    baseline_nodes=baseline_nodes,
    output_path="../slide/images/strong_speed_eff.pgf",
    title_prefix="Strong"
)

Saved strong scaling plot → ../slide/images/strong_speed_eff.pgf


# Weak Scaling

In [12]:
NAME = 'weak.csv'
weak_df = pd.read_csv(DATA_DIR / NAME)
weak_df_sorted = weak_df.sort_values(by='nodes')

In [13]:
baseline_nodes = weak_df_sorted['nodes'].iloc[0]
baseline_time = weak_df_sorted['total_time'].iloc[0]
weak_df_sorted['efficiency'] = baseline_time / weak_df_sorted['total_time']
weak_df_sorted['speedup'] = weak_df_sorted['efficiency'] * weak_df_sorted['nodes']  # optional for completeness

plot_scaling(
    df=weak_df_sorted,
    baseline_nodes=baseline_nodes,
    output_path="../slide/images/weak_speed_eff.pgf",
    title_prefix="Weak"
)

Saved weak scaling plot → ../slide/images/weak_speed_eff.pgf


In [14]:
def plot_weak_scaling(df, output_path):
    """
    Plots weak scaling time components and efficiency side by side.

    Parameters
    ----------
    df : pandas.DataFrame
        Must contain columns: 'nodes', 'total_time', 'comm_time', 'comp_time', 'efficiency'
    output_path : str
        Path to save the figure (e.g. '../slide/images/weak_times_eff.pgf')
    """

    # Ensure sorted order
    df = df.sort_values(by='nodes')

    fig, axes = plt.subplots(1, 2, figsize=(6.5, 3.2), constrained_layout=True)

    # --- Left subplot: Time components ---
    ax = axes[0]
    ax.plot(df['nodes'], df['total_time'], marker='o', linestyle='-', label='Total')
    ax.plot(df['nodes'], df['comm_time'],  marker='o', linestyle='-', label='Comm')
    ax.plot(df['nodes'], df['comp_time'],  marker='o', linestyle='-', label='Comp')
    ax.set_title('Time Components (Weak Scaling)')
    ax.set_xlabel('Number of Nodes')
    ax.set_ylabel('Time (seconds)')
    ax.set_xscale('log', base=2)
    ax.set_xticks(df['nodes'])
    ax.set_xticklabels(df['nodes'])
    ax.grid(True, which='both', linestyle=':')
    ax.legend()

    # --- Right subplot: Efficiency ---
    ax = axes[1]
    ax.plot(df['nodes'], df['efficiency'], marker='o', linestyle='-', label='Measured')
    ax.plot(df['nodes'], np.ones_like(df['nodes']), marker='x', linestyle='--', label='Ideal')
    ax.set_title('Efficiency (Weak Scaling)')
    ax.set_xlabel('Number of Nodes')
    ax.set_ylabel('Efficiency')
    ax.set_xscale('log', base=2)
    ax.set_xticks(df['nodes'])
    ax.set_xticklabels(df['nodes'])
    ax.set_ylim(0, 1.1)
    ax.grid(True, which='both', linestyle=':')
    ax.legend()

    # Save figure
    fig.savefig(output_path, bbox_inches='tight', pad_inches=0.02)
    plt.close(fig)
    print(f"Saved weak scaling plot → {output_path}")


In [None]:
baseline_time = weak_df['total_time'].iloc[0]
weak_df['efficiency'] = baseline_time / weak_df['total_time']

plot_weak_scaling(
    df=weak_df,
    output_path="../slide/images/weak_times_eff.pgf"
)

Saved weak scaling plot → ../slide/images/weak_times_eff.pgf
