In [20]:
import pandas as pd
import numpy as np

import matplotlib as mpl
mpl.use("pgf")
mpl.rcParams.update({
    "pgf.texsystem": "pdflatex",
    "text.usetex": True,
    "pgf.rcfonts": False,
    "font.family": "sans-serif",
    "font.size": 9,               # testi compatti per slide 12pt
    "axes.titlesize": 10,
    "axes.labelsize": 9,
    "legend.fontsize": 8,
    "xtick.labelsize": 8,
    "ytick.labelsize": 8,
    "text.latex.preamble": r"""
\usepackage{sansmath}
\sansmath
\providecommand{\mathdefault}[1]{#1}
\providecommand{\textdefault}[1]{#1}
"""
})

In [21]:
strong_df = pd.read_csv('strong.csv')
strong_df.head()

Unnamed: 0,size_x,size_y,Ntasks,Niterations,total_time,comm_time,comp_time,slurm_job_id,slurm_job_name,partition,nodes,cpus_per_task,tasks_per_node,ntasks,omp_threads
0,65536,65536,128,500,6.425851,0.304926,6.205846,21731295,strong_scaling_16n_128mpi_14omp,,16,14,8(x16),128,14
1,65536,65536,64,500,12.918936,0.417404,12.658198,21731294,strong_scaling_8n_64mpi_14omp,,8,14,8(x8),64,14
2,65536,65536,32,500,29.042056,3.924667,28.178181,21731293,strong_scaling_4n_32mpi_14omp,,4,14,8(x4),32,14
3,65536,65536,16,500,51.90406,1.520818,50.962839,21731292,strong_scaling_2n_16mpi_14omp,,2,14,8(x2),16,14
4,65536,65536,8,500,103.545381,3.117491,101.733921,21731291,strong_scaling_1n_8mpi_14omp,,1,14,8,8,14


In [None]:
import matplotlib.pyplot as plt

# Sort the DataFrame by 'nodes' to ensure the line plot connects points in order
strong_df_sorted = strong_df.sort_values(by='nodes')

# Create the plot
ax = strong_df_sorted.plot(x='nodes', y=['total_time', 'comm_time', 'comp_time'],
						   kind='line', marker='o', figsize=(5, 3.2),
						   title='Time Components vs. Number of Nodes (Strong Scaling)')

# Set labels and grid
ax.set_xlabel("Number of Nodes")
ax.set_ylabel("Time (seconds)")
ax.set_xscale('log', base=2)
ax.grid(True, linestyle=':')
ax.set_xticks(strong_df_sorted['nodes'],strong_df_sorted['nodes'])
ax.legend(["Total time", "Comm time", "Comp time"])

# Save correctly
fig = ax.get_figure()
fig.tight_layout()
fig.savefig('../slide/images/strong_times.pgf')
plt.close(fig)

In [23]:
# Calculate Speedup
# Speedup is defined as T(1)/T(N), where T(1) is the time on the baseline number of nodes
# and T(N) is the time on N nodes.
baseline_time = strong_df_sorted['total_time'].iloc[0]
baseline_nodes = strong_df_sorted['nodes'].iloc[0]

strong_df_sorted['speedup'] = baseline_time / strong_df_sorted['total_time']

# Calculate Efficiency
# Efficiency is Speedup(N) / (N / N_baseline)
strong_df_sorted['efficiency'] = strong_df_sorted['speedup'] / (strong_df_sorted['nodes'] / baseline_nodes)


In [None]:
fig, axes = plt.subplots(1, 2, figsize=(6.5, 3.2), constrained_layout=True)

# Subplot 1: Speedup
ax = axes[0]
ax.plot(strong_df_sorted['nodes'], strong_df_sorted['speedup'],
        marker='o', linestyle='-', label='Measured')
ax.plot(strong_df_sorted['nodes'], strong_df_sorted['nodes']/baseline_nodes,
        marker='x', linestyle='--', label='Ideal')
ax.set_title('Speedup vs. Number of Nodes')
ax.set_xlabel('Number of Nodes')
ax.set_ylabel('Speedup')
ax.set_xscale('log', base=2)
ax.set_yscale('log', base=2)
ax.set_yticks(strong_df_sorted['nodes'],strong_df_sorted['nodes'])
ax.set_xticks(strong_df_sorted['nodes'],strong_df_sorted['nodes'])
ax.grid(True, which='both', linestyle=':')
ax.legend()

# Subplot 2: Efficiency
ax = axes[1]
ax.plot(strong_df_sorted['nodes'], strong_df_sorted['efficiency'],
        marker='o', linestyle='-', label='Measured')
ax.plot(strong_df_sorted['nodes'], np.ones_like(strong_df_sorted['nodes']),
        marker='x', linestyle='--', label='Ideal')
ax.set_title('Efficiency vs. Number of Nodes')
ax.set_xlabel('Number of Nodes')
ax.set_ylabel('Efficiency')
ax.set_xscale('log', base=2)
ax.set_xticks(strong_df_sorted['nodes'],strong_df_sorted['nodes'])
# ax.set_ylim(0, 1.1)
ax.grid(True, which='both', linestyle=':')
ax.legend()

# Salva PGF con bounding box stretto
fig.savefig("../slide/images/strong_speed_eff.pgf", bbox_inches="tight", pad_inches=0.02)
plt.close(fig)