In [2]:
import pandas as pd
import numpy as np

import matplotlib as mpl
mpl.use("pgf")
mpl.rcParams.update({
    "pgf.texsystem": "pdflatex",
    "text.usetex": True,
    "pgf.rcfonts": False,
    "font.family": "sans-serif",
    "font.size": 9,               # testi compatti per slide 12pt
    "axes.titlesize": 10,
    "axes.labelsize": 9,
    "legend.fontsize": 8,
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
    "text.latex.preamble": r"""
\usepackage{sansmath}
\sansmath
\providecommand{\mathdefault}[1]{#1}
\providecommand{\textdefault}[1]{#1}
"""
})

In [1]:
import numpy as np
import matplotlib.pyplot as plt

def plot_scaling(df, baseline_nodes, output_path, title_prefix="Strong"):
    """
    Plots speedup and efficiency vs. number of nodes (log-scaled x-axis).

    Parameters
    ----------
    df : pandas.DataFrame
        Must contain columns: 'nodes', 'speedup', 'efficiency'
    baseline_nodes : int or float
        The baseline number of nodes used for ideal scaling line
    output_path : str
        Path to save the figure (e.g. '../slide/images/strong_speed_eff.pgf')
    title_prefix : str
        Prefix for subplot titles (e.g. 'Strong' or 'Weak')
    """

    fig, axes = plt.subplots(1, 2, figsize=(6.5, 3.2), constrained_layout=True)

    # --- Subplot 1: Speedup ---
    ax = axes[0]
    ax.plot(df['nodes'], df['speedup'], marker='o', linestyle='-', label='Measured')
    ax.plot(df['nodes'], df['nodes'] / baseline_nodes, marker='x', linestyle='--', label='Ideal')
    ax.set_title(f'{title_prefix} Scaling: Speedup vs. Nodes')
    ax.set_xlabel('Number of Nodes')
    ax.set_ylabel('Speedup')
    ax.set_xscale('log', base=2)
    ax.set_yscale('log', base=2)
    ax.set_xticks(df['nodes'])
    ax.set_xticklabels(df['nodes'])
    ax.set_yticks(df['nodes'])
    ax.set_yticklabels(df['nodes'])
    ax.grid(True, which='both', linestyle=':')
    ax.legend()

    # --- Subplot 2: Efficiency ---
    ax = axes[1]
    ax.plot(df['nodes'], df['efficiency'], marker='o', linestyle='-', label='Measured')
    ax.plot(df['nodes'], np.ones_like(df['nodes']), marker='x', linestyle='--', label='Ideal')
    ax.set_title(f'{title_prefix} Scaling: Efficiency vs. Nodes')
    ax.set_xlabel('Number of Nodes')
    ax.set_ylabel('Efficiency')
    ax.set_xscale('log', base=2)
    ax.set_xticks(df['nodes'])
    ax.set_xticklabels(df['nodes'])
    ax.grid(True, which='both', linestyle=':')
    ax.legend()

    # Save and close
    fig.savefig(output_path, bbox_inches='tight', pad_inches=0.02)
    plt.close(fig)
    print(f"Saved {title_prefix.lower()} scaling plot → {output_path}")


In [3]:
weak_df = pd.read_csv('weak.csv')
weak_df.head()

Unnamed: 0,size_x,size_y,Ntasks,Niterations,total_time,comm_time,comp_time,slurm_job_id,slurm_job_name,partition,nodes,cpus_per_task,tasks_per_node,ntasks,omp_threads
0,16384,8192,8,500,3.102643,0.146427,2.992595,21731207,weak_1n_8mpi_14omp_16384x8192,,1,14,8,8,14
1,65536,32768,128,500,3.140334,0.17949,2.997634,21731212,weak_16n_128mpi_14omp_65536x32768,,16,14,8(x16),128,14
2,32768,32768,64,500,3.132983,0.175612,3.003354,21731211,weak_8n_64mpi_14omp_32768x32768,,8,14,8(x8),64,14
3,16384,16384,16,500,3.173391,0.232937,2.981805,21731208,weak_2n_16mpi_14omp_16384x16384,,2,14,8(x2),16,14
4,32768,16384,32,500,3.15609,0.202555,2.99512,21731210,weak_4n_32mpi_14omp_32768x16384,,4,14,8(x4),32,14


In [5]:
import matplotlib.pyplot as plt

# Sort the DataFrame by 'nodes' to ensure the line plot connects points in order
weak_df_sorted = weak_df.sort_values(by='nodes')

# Create the plot
ax = weak_df_sorted.plot(x='nodes', y=['total_time', 'comm_time', 'comp_time'],
						   kind='line', marker='o', figsize=(10, 6),
						   title='Time Components vs. Number of Nodes (Weak Scaling)')

# Set labels and grid
ax.set_xlabel("Number of Nodes")
ax.set_ylabel("Time (seconds)")
ax.grid(True)
ax.set_xticks(weak_df_sorted['nodes']) # Ensure all node counts are shown as ticks

# Display the plot
plt.show()

  plt.show()


In [6]:
# Calculate efficiency for weak scaling
# Efficiency = Time on 1 node / Time on N nodes
baseline_time = weak_df_sorted['total_time'].iloc[0]
weak_df_sorted['efficiency'] = baseline_time / weak_df_sorted['total_time']


In [None]:

# Create the efficiency plot
ax_eff = weak_df_sorted.plot(x='nodes', y='efficiency',
							 kind='line', marker='o', figsize=(10, 6),
							 title='Weak Scaling Efficiency vs. Number of Nodes',
							 ylim=(0, 1.2)) # Set y-axis limit to better visualize efficiency

# Set labels and grid
ax_eff.set_xlabel("Number of Nodes")
ax_eff.set_ylabel("Efficiency")
ax_eff.grid(True)
ax_eff.set_xticks(weak_df_sorted['nodes']) # Ensure all node counts are shown as ticks

# Display the plot
plt.show()

In [7]:
baseline_nodes = weak_df_sorted['nodes'].iloc[0]
baseline_time = weak_df_sorted['total_time'].iloc[0]
weak_df_sorted['efficiency'] = baseline_time / weak_df_sorted['total_time']
weak_df_sorted['speedup'] = weak_df_sorted['efficiency'] * weak_df_sorted['nodes']  # optional for completeness

plot_scaling(
    df=weak_df_sorted,
    baseline_nodes=baseline_nodes,
    output_path="../slide/images/weak_speed_eff.pgf",
    title_prefix="Weak"
)


Saved weak scaling plot → ../slide/images/weak_speed_eff.pgf


In [8]:
import numpy as np
import matplotlib.pyplot as plt

def plot_weak_scaling(df, output_path):
    """
    Plots weak scaling time components and efficiency side by side.

    Parameters
    ----------
    df : pandas.DataFrame
        Must contain columns: 'nodes', 'total_time', 'comm_time', 'comp_time', 'efficiency'
    output_path : str
        Path to save the figure (e.g. '../slide/images/weak_times_eff.pgf')
    """

    # Ensure sorted order
    df = df.sort_values(by='nodes')

    fig, axes = plt.subplots(1, 2, figsize=(6.5, 3.2), constrained_layout=True)

    # --- Left subplot: Time components ---
    ax = axes[0]
    ax.plot(df['nodes'], df['total_time'], marker='o', linestyle='-', label='Total')
    ax.plot(df['nodes'], df['comm_time'],  marker='o', linestyle='-', label='Comm')
    ax.plot(df['nodes'], df['comp_time'],  marker='o', linestyle='-', label='Comp')
    ax.set_title('Time Components (Weak Scaling)')
    ax.set_xlabel('Number of Nodes')
    ax.set_ylabel('Time (seconds)')
    ax.set_xscale('log', base=2)
    ax.set_xticks(df['nodes'])
    ax.set_xticklabels(df['nodes'])
    ax.grid(True, which='both', linestyle=':')
    ax.legend()

    # --- Right subplot: Efficiency ---
    ax = axes[1]
    ax.plot(df['nodes'], df['efficiency'], marker='o', linestyle='-', label='Measured')
    ax.plot(df['nodes'], np.ones_like(df['nodes']), marker='x', linestyle='--', label='Ideal')
    ax.set_title('Efficiency (Weak Scaling)')
    ax.set_xlabel('Number of Nodes')
    ax.set_ylabel('Efficiency')
    ax.set_xscale('log', base=2)
    ax.set_xticks(df['nodes'])
    ax.set_xticklabels(df['nodes'])
    ax.set_ylim(0, 1.1)
    ax.grid(True, which='both', linestyle=':')
    ax.legend()

    # Save figure
    fig.savefig(output_path, bbox_inches='tight', pad_inches=0.02)
    plt.close(fig)
    print(f"Saved weak scaling plot → {output_path}")


In [9]:
# Compute weak scaling efficiency
baseline_time = weak_df['total_time'].iloc[0]
weak_df['efficiency'] = baseline_time / weak_df['total_time']

# Plot and save
plot_weak_scaling(
    df=weak_df,
    output_path="../slide/images/weak_times_eff.pgf"
)


Saved weak scaling plot → ../slide/images/weak_times_eff.pgf
