In [18]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np
import matplotlib.cm as cm
import matplotlib.colors as mcolors

def process_csv_files(csv_files, implementation_names):
    # Create a directory for plots if it doesn't exist
    base_dir = "performance_plots_comparison"
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)
    
    implementation_cmaps = [plt.cm.Blues, plt.cm.Reds, plt.cm.Greens]
    
   
    cpu_markers = ['o', 's', '^', 'D', 'x', '*', 'p', 'h']
    
    # Dictionary to store dataframes by implementation
    all_dfs = {}
    
    for i, (csv_file, impl_name) in enumerate(zip(csv_files, implementation_names)):
        df = pd.read_csv(csv_file, sep=';', header=None)
        df.columns = ['file_name', 'cpu_count', 'num_points', 'measurement', 'measurement_unit', 
                      'task_name', 'percentage', 'run_time', 'variance', 'metric', 'metric_unit']
        
        df['implementation'] = impl_name
        
        all_dfs[impl_name] = df
    
    combined_df = pd.concat(all_dfs.values())
    
    # Get unique tasks across all implementations
    all_tasks = combined_df['task_name'].unique()
    
    # Combine all CPU counts in one graph
    for task in all_tasks:
        plt.figure(figsize=(12, 8))
        
        task_df = combined_df[combined_df['task_name'] == task]
        
        if task_df.empty:
            continue
            
        cpu_counts = sorted(task_df['cpu_count'].unique())

        impl_lines = {}
        cpu_markers_used = {} 
        
        for i, impl_name in enumerate(implementation_names):
            if impl_name in all_dfs:

                impl_df = all_dfs[impl_name]
                impl_task_df = impl_df[impl_df['task_name'] == task]

                cpu_cmap = implementation_cmaps[i]
                
                for j, cpu in enumerate(cpu_counts):
                    cpu_data = impl_task_df[impl_task_df['cpu_count'] == cpu].sort_values('num_points')
                    
                    if not cpu_data.empty:
                        try:
                            num_points_pd = pd.to_numeric(cpu_data['num_points'], errors='coerce')
                            measurement_pd = pd.to_numeric(cpu_data['measurement'], errors='coerce')
                            
                            mask = ~(num_points_pd.isna() | measurement_pd.isna())
                            num_points = np.array(num_points_pd[mask])
                            elapsed_time_values = np.array(measurement_pd[mask])
                            
                            # Only plot if we have valid data points
                            if len(num_points) > 0:
                                # Calculate color: use lighter for lower CPU counts, darker for higher counts
                                norm_factor = 0.3 + 0.6 * (j / max(len(cpu_counts) - 1, 1))
                                hue_color = cpu_cmap(norm_factor)
                                
                                marker = cpu_markers[j % len(cpu_markers)]
                                cpu_markers_used[cpu] = marker
                                
                                line, = plt.loglog(num_points, elapsed_time_values, 
                                                marker=marker, linestyle='-', 
                                                color=hue_color)
                                
                                if impl_name not in impl_lines:
                                    impl_lines[impl_name] = line
                                
                        except Exception as e:
                            print(f"Warning: Could not convert data for {impl_name}, task {task}, CPU {cpu}: {e}")
                            continue
        
        # Only save plots that have data
        if impl_lines:
            plt.xlabel('Number of Particles (log scale)')
            
            units = task_df['measurement_unit'].unique()
            if len(units) == 1 and pd.notna(units[0]) and units[0] != "":
                plt.ylabel(f'Measurement ({units[0]}) (log scale)')
            else:
                plt.ylabel('Measurement (log scale)')
            
            plt.title(f'Performance Comparison: {task}\nAll CPU Counts')
            plt.grid(True, which="both", ls="-")
            
            impl_handles = []
            impl_labels = []
            for impl_name, line in impl_lines.items():
                i = implementation_names.index(impl_name)
                dark_color = implementation_cmaps[i](0.9)  # Use darkest hue
                dark_line = plt.Line2D([0], [0], color=dark_color, marker='o', linestyle='-')
                impl_handles.append(dark_line)
                impl_labels.append(impl_name)
            
            # Create CPU count legend entries
            cpu_legend_elements = []
            reference_cmap = implementation_cmaps[0] 
            
            # Create entries for each CPU count
            for cpu in sorted(cpu_markers_used.keys()):
                j = cpu_counts.index(cpu)
                norm_factor = 0.3 + 0.6 * (j / max(len(cpu_counts) - 1, 1))
                cpu_color = reference_cmap(norm_factor)
                marker = cpu_markers_used[cpu]
                
                cpu_legend_elements.append(plt.Line2D([0], [0], color=cpu_color, 
                                                     marker=marker, linestyle='-', 
                                                     label=f'CPU {cpu}'))
            
            # Add implementation legend
            legend1 = plt.legend(impl_handles, impl_labels, title="Implementation", loc='upper left')
            plt.gca().add_artist(legend1)
            
            # Add CPU count legend
            plt.legend(handles=cpu_legend_elements, title="CPU Count", loc='lower right')
            
            safe_task_name = task.replace(" ", "_").replace("/", "_").replace("\\", "_").replace(".", "_")
            
            output_filename = os.path.join(base_dir, f"{safe_task_name}_all_cpus.png")
            plt.savefig(output_filename, dpi=150)
            plt.close()
            print(f"Combined plot saved to {output_filename}")
        else:
            plt.close()
            print(f"No valid data for task {task}")
    
    plt.figure(figsize=(12, 8))
    has_data = False

    # GFLOPS plots
    
    impl_lines = {}
    cpu_markers_used = {}
    
    all_cpu_counts = sorted(combined_df['cpu_count'].unique())
    
    for i, impl_name in enumerate(implementation_names):
        if impl_name in all_dfs:
            impl_df = all_dfs[impl_name]
            
            impl_cpu_counts = sorted(impl_df['cpu_count'].unique())
            
            cpu_cmap = implementation_cmaps[i]
            
            for j, cpu in enumerate(impl_cpu_counts):
                cpu_group = impl_df[impl_df['cpu_count'] == cpu]
                
                task_clock_data = cpu_group[cpu_group['task_name'] == 'task-clock'].sort_values('num_points')
                fp_ops_data = cpu_group[cpu_group['task_name'] == 'fp_ret_sse_avx_ops.all'].sort_values('num_points')
                
                if (not task_clock_data.empty and not fp_ops_data.empty and 
                    len(task_clock_data) == len(fp_ops_data) and
                    all(task_clock_data['num_points'].values == fp_ops_data['num_points'].values)):
                    
                    try:
                        fp_ops_pd = pd.to_numeric(fp_ops_data['measurement'], errors='coerce')
                        task_clock_pd = pd.to_numeric(task_clock_data['measurement'], errors='coerce')
                        num_points_pd = pd.to_numeric(fp_ops_data['num_points'], errors='coerce')
                        
                        mask = ~(fp_ops_pd.isna() | task_clock_pd.isna() | num_points_pd.isna())
                        
                        fp_ops = np.array(fp_ops_pd[mask])
                        task_clock = np.array(task_clock_pd[mask])
                        num_points = np.array(num_points_pd[mask])
                        
                        if len(fp_ops) > 0:
                            gflops = fp_ops / (task_clock / 1000.0) / 1.0e9
                            cpu_idx = all_cpu_counts.index(cpu)
                            norm_factor = 0.3 + 0.6 * (cpu_idx / max(len(all_cpu_counts) - 1, 1))
                            hue_color = cpu_cmap(norm_factor)
                            
                            marker = cpu_markers[cpu_idx % len(cpu_markers)]
                            cpu_markers_used[cpu] = marker
                            
                            line, = plt.plot(num_points, gflops, marker=marker, linestyle='-', 
                                            color=hue_color)
                            plt.xscale('log')
                            
                            has_data = True
                            
                            if impl_name not in impl_lines:
                                impl_lines[impl_name] = line
                            
                    except Exception as e:
                        print(f"Warning: Could not calculate GFLOPS for {impl_name}, CPU {cpu}: {e}")
                        continue
    
    if has_data:
        plt.xlabel('Number of Particles (log scale)')
        plt.ylabel('GFLOPS')
        plt.title('GFLOPS Performance Comparison\nAll CPU Counts')
        plt.grid(True, which="both", ls="-")
        
        impl_handles = []
        impl_labels = []
        for impl_name, line in impl_lines.items():
            i = implementation_names.index(impl_name)
            dark_color = implementation_cmaps[i](0.9)
            dark_line = plt.Line2D([0], [0], color=dark_color, marker='o', linestyle='-')
            impl_handles.append(dark_line)
            impl_labels.append(impl_name)
        
        cpu_legend_elements = []
        reference_cmap = implementation_cmaps[0]
        
        for cpu in sorted(cpu_markers_used.keys()):
            cpu_idx = all_cpu_counts.index(cpu)
            norm_factor = 0.3 + 0.6 * (cpu_idx / max(len(all_cpu_counts) - 1, 1))
            cpu_color = reference_cmap(norm_factor)
            marker = cpu_markers_used[cpu]
            
            cpu_legend_elements.append(plt.Line2D([0], [0], color=cpu_color, 
                                                 marker=marker, linestyle='-', 
                                                 label=f'CPU {cpu}'))
        
        legend1 = plt.legend(impl_handles, impl_labels, title="Implementation", loc='upper left')
        plt.gca().add_artist(legend1)
        
        plt.legend(handles=cpu_legend_elements, title="CPU Count", loc='lower right')
        
        output_filename = os.path.join(base_dir, "GFLOPS_performance_all_cpus.png")
        plt.savefig(output_filename, dpi=150)
        plt.close()
        print(f"Combined GFLOPS plot saved to {output_filename}")
    else:
        plt.close()
        print(f"Warning: Not enough valid data to create combined GFLOPS plot")

    # Elapsed Time plots

    plt.figure(figsize=(12, 8))
    has_data = False
    
    impl_lines = {}
    cpu_markers_used = {} 
    
    for i, impl_name in enumerate(implementation_names):
        if impl_name in all_dfs:

            impl_df = all_dfs[impl_name]
            impl_task_df = impl_df[impl_df['task_name'] == 'task-clock']

            cpu_cmap = implementation_cmaps[i]
            
            for j, cpu in enumerate(cpu_counts):
                cpu_data = impl_task_df[impl_task_df['cpu_count'] == cpu].sort_values('num_points')
                
                if not cpu_data.empty:
                    try:
                        num_points_pd = pd.to_numeric(cpu_data['num_points'], errors='coerce')
                        measurement_pd = pd.to_numeric(cpu_data['measurement'], errors='coerce')
                        metric_pd = pd.to_numeric(cpu_data['metric'], errors='coerce')
                        
                        mask = ~(num_points_pd.isna() | measurement_pd.isna() | metric_pd.isna())
                        num_points = np.array(num_points_pd[mask])
                        elapsed_time_values = np.array(measurement_pd[mask]) / np.array(metric_pd[mask]) / 1e3
                        
                        # Only plot if we have valid data points
                        if len(num_points) > 0:
                            # Calculate color: use lighter for lower CPU counts, darker for higher counts
                            norm_factor = 0.3 + 0.6 * (j / max(len(cpu_counts) - 1, 1))
                            hue_color = cpu_cmap(norm_factor)
                            
                            marker = cpu_markers[j % len(cpu_markers)]
                            cpu_markers_used[cpu] = marker
                            
                            line, = plt.loglog(num_points, elapsed_time_values, 
                                            marker=marker, linestyle='-', 
                                            color=hue_color)
                            
                            if impl_name not in impl_lines:
                                impl_lines[impl_name] = line
                            
                    except Exception as e:
                        print(f"Warning: Could not convert data for {impl_name}, task {task}, CPU {cpu}: {e}")
                        continue
    
    # Only save plots that have data
    if impl_lines:
        plt.xlabel('Number of Particles (log scale)')
        
        plt.ylabel('Elapsed time (seconds)')
        
        plt.title(f'Performance Comparison: Elapsed Time\nAll CPU Counts')
        plt.grid(True, which="both", ls="-")
        
        impl_handles = []
        impl_labels = []
        for impl_name, line in impl_lines.items():
            i = implementation_names.index(impl_name)
            dark_color = implementation_cmaps[i](0.9)  # Use darkest hue
            dark_line = plt.Line2D([0], [0], color=dark_color, marker='o', linestyle='-')
            impl_handles.append(dark_line)
            impl_labels.append(impl_name)
        
        # Create CPU count legend entries
        cpu_legend_elements = []
        reference_cmap = implementation_cmaps[0] 
        
        # Create entries for each CPU count
        for cpu in sorted(cpu_markers_used.keys()):
            j = cpu_counts.index(cpu)
            norm_factor = 0.3 + 0.6 * (j / max(len(cpu_counts) - 1, 1))
            cpu_color = reference_cmap(norm_factor)
            marker = cpu_markers_used[cpu]
            
            cpu_legend_elements.append(plt.Line2D([0], [0], color=cpu_color, 
                                                    marker=marker, linestyle='-', 
                                                    label=f'CPU {cpu}'))
        
        # Add implementation legend
        legend1 = plt.legend(impl_handles, impl_labels, title="Implementation", loc='upper left')
        plt.gca().add_artist(legend1)
        
        # Add CPU count legend
        plt.legend(handles=cpu_legend_elements, title="CPU Count", loc='upper right')
        
        output_filename = os.path.join(base_dir, f"elapsed_time_all_cpus.png")
        plt.savefig(output_filename, dpi=150)
        plt.close()
        print(f"Combined plot saved to {output_filename}")
    else:
        plt.close()
        print(f"No valid data to create combined elapsed time plot")

    # Elapsed Time per point plots

    plt.figure(figsize=(12, 8))
    has_data = False

    impl_lines = {}
    cpu_markers_used = {} 
    
    for i, impl_name in enumerate(implementation_names):
        if impl_name in all_dfs:

            impl_df = all_dfs[impl_name]
            impl_task_df = impl_df[impl_df['task_name'] == 'task-clock']

            cpu_cmap = implementation_cmaps[i]
            
            for j, cpu in enumerate(cpu_counts):
                cpu_data = impl_task_df[impl_task_df['cpu_count'] == cpu].sort_values('num_points')
                
                if not cpu_data.empty:
                    try:
                        num_points_pd = pd.to_numeric(cpu_data['num_points'], errors='coerce')
                        measurement_pd = pd.to_numeric(cpu_data['measurement'], errors='coerce')
                        metric_pd = pd.to_numeric(cpu_data['metric'], errors='coerce')
                        
                        mask = ~(num_points_pd.isna() | measurement_pd.isna() | metric_pd.isna())
                        num_points = np.array(num_points_pd[mask])
                        elapsed_time_values = np.array(measurement_pd[mask]) / np.array(metric_pd[mask]) / num_points
                        
                        # Only plot if we have valid data points
                        if len(num_points) > 0:
                            # Calculate color: use lighter for lower CPU counts, darker for higher counts
                            norm_factor = 0.3 + 0.6 * (j / max(len(cpu_counts) - 1, 1))
                            hue_color = cpu_cmap(norm_factor)
                            
                            marker = cpu_markers[j % len(cpu_markers)]
                            cpu_markers_used[cpu] = marker
                            
                            line, = plt.loglog(num_points, elapsed_time_values, 
                                            marker=marker, linestyle='-', 
                                            color=hue_color)
                            
                            if impl_name not in impl_lines:
                                impl_lines[impl_name] = line
                            
                    except Exception as e:
                        print(f"Warning: Could not convert data for {impl_name}, task {task}, CPU {cpu}: {e}")
                        continue
    
    # Only save plots that have data
    if impl_lines:
        plt.xlabel('Number of Particles (log scale)')
        
        plt.ylabel('Elapsed time per point (msec)')
        
        plt.title(f'Performance Comparison: Elapsed Time Per Point\nAll CPU Counts')
        plt.grid(True, which="both", ls="-")
        
        impl_handles = []
        impl_labels = []
        for impl_name, line in impl_lines.items():
            i = implementation_names.index(impl_name)
            dark_color = implementation_cmaps[i](0.9)  # Use darkest hue
            dark_line = plt.Line2D([0], [0], color=dark_color, marker='o', linestyle='-')
            impl_handles.append(dark_line)
            impl_labels.append(impl_name)
        
        # Create CPU count legend entries
        cpu_legend_elements = []
        reference_cmap = implementation_cmaps[0] 
        
        # Create entries for each CPU count
        for cpu in sorted(cpu_markers_used.keys()):
            j = cpu_counts.index(cpu)
            norm_factor = 0.3 + 0.6 * (j / max(len(cpu_counts) - 1, 1))
            cpu_color = reference_cmap(norm_factor)
            marker = cpu_markers_used[cpu]
            
            cpu_legend_elements.append(plt.Line2D([0], [0], color=cpu_color, 
                                                    marker=marker, linestyle='-', 
                                                    label=f'CPU {cpu}'))
        
        # Add implementation legend
        legend1 = plt.legend(impl_handles, impl_labels, title="Implementation", loc='upper left')
        plt.gca().add_artist(legend1)
        
        # Add CPU count legend
        plt.legend(handles=cpu_legend_elements, title="CPU Count", loc='upper right')
        
        output_filename = os.path.join(base_dir, f"elapsed_time_per_point_all_cpus.png")
        plt.savefig(output_filename, dpi=150)
        plt.close()
        print(f"Combined plot saved to {output_filename}")
    else:
        plt.close()
        print(f"No valid data to create combined elapsed time plot")
    
    print(f"All plots have been saved to '{base_dir}'")

csv_files = [
    "nbody_llm_opt_combined.csv",
    "nbody_man_opt_combined.csv",
    "perf_benchmark_example.csv"
]
implementation_names = [
    "LLM BH",
    "Manual BH",
    "Benchmark (Rebound)"
]

# Process all files
process_csv_files(csv_files, implementation_names)

Combined plot saved to performance_plots_comparison/task-clock_all_cpus.png
Combined plot saved to performance_plots_comparison/context-switches_all_cpus.png
Combined plot saved to performance_plots_comparison/cpu-migrations_all_cpus.png
Combined plot saved to performance_plots_comparison/page-faults_all_cpus.png
Combined plot saved to performance_plots_comparison/cycles_all_cpus.png
Combined plot saved to performance_plots_comparison/stalled-cycles-frontend_all_cpus.png
Combined plot saved to performance_plots_comparison/stalled-cycles-backend_all_cpus.png
Combined plot saved to performance_plots_comparison/instructions_all_cpus.png
Combined plot saved to performance_plots_comparison/branches_all_cpus.png
Combined plot saved to performance_plots_comparison/branch-misses_all_cpus.png
Combined plot saved to performance_plots_comparison/L1-dcache-loads_all_cpus.png
Combined plot saved to performance_plots_comparison/L1-dcache-load-misses_all_cpus.png
No valid data for task LLC-loads
No v

  plt.savefig(output_filename, dpi=150)


Combined plot saved to performance_plots_comparison/fp_ret_sse_avx_ops_dp_mult_flops_all_cpus.png
Combined plot saved to performance_plots_comparison/fp_ret_sse_avx_ops_sp_add_sub_flops_all_cpus.png
Combined plot saved to performance_plots_comparison/fp_ret_sse_avx_ops_sp_div_flops_all_cpus.png
Combined plot saved to performance_plots_comparison/fp_ret_sse_avx_ops_sp_mult_add_flops_all_cpus.png
Combined plot saved to performance_plots_comparison/fp_ret_sse_avx_ops_sp_mult_flops_all_cpus.png
Combined GFLOPS plot saved to performance_plots_comparison/GFLOPS_performance_all_cpus.png
Combined plot saved to performance_plots_comparison/elapsed_time_all_cpus.png
Combined plot saved to performance_plots_comparison/elapsed_time_per_point_all_cpus.png
All plots have been saved to 'performance_plots_comparison'


<Figure size 1200x800 with 0 Axes>