# Map Folding Algorithm Performance Analysis

Analyze benchmark data from the map folding algorithm implementation.

In [1]:
import numpy
import pandas
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn

# Configure plotting style
seaborn.set_style('whitegrid')
plt.rcParams['figure.figsize'] = [12, 6]

# Load benchmark data
pathBenchmarkFile = Path('marks/benchmarks.npy')
arrayBenchmarks = numpy.load(pathBenchmarkFile, allow_pickle=True)

# Convert to DataFrame
dfBenchmarks = pandas.DataFrame(arrayBenchmarks)

# Add derived columns
dfBenchmarks['dimensionCount'] = dfBenchmarks['dimensions'].apply(lambda x: len(x) if x is not None else 0)
dfBenchmarks['dimensionProduct'] = dfBenchmarks['dimensions'].apply(lambda x: numpy.prod(x) if x is not None else 0)
dfBenchmarks['dimensionsStr'] = dfBenchmarks['dimensions'].apply(lambda x: 'x'.join(map(str, x)) if x is not None else 'None')

## Summary Statistics

In [2]:
print(dfBenchmarks.groupby('dimensionsStr')['time'].describe())
print("Median (seconds):")
print(dfBenchmarks.groupby('dimensionsStr')['time'].median())


               count      mean       std       min       25%       50%  \
dimensionsStr                                                            
2x11           100.0  0.245076  0.089574  0.233962  0.234656  0.235062   
2x2x2x2x2      100.0  0.045520  0.000581  0.045046  0.045196  0.045312   
3x3x3          100.0  2.442653  0.008753  2.430416  2.435601  2.440241   
3x8            100.0  1.254273  0.005344  1.248475  1.251006  1.253290   
5x5            100.0  1.466555  0.007084  1.457515  1.462099  1.465302   

                    75%       max  
dimensionsStr                      
2x11           0.236145  1.131336  
2x2x2x2x2      0.045554  0.048291  
3x3x3          2.447204  2.467829  
3x8            1.255986  1.288280  
5x5            1.468853  1.494854  
Median (seconds):
dimensionsStr
2x11         0.235062
2x2x2x2x2    0.045312
3x3x3        2.440241
3x8          1.253290
5x5          1.465302
Name: time, dtype: float64
{}


## Performance by Dimension Configuration

In [None]:
plt.figure(figsize=(14, 6))
seaborn.boxplot(data=dfBenchmarks, x='dimensionsStr', y='time')
plt.xticks(rotation=45)
plt.title('Execution Time Distribution by Dimension Configuration')
plt.xlabel('Dimensions')
plt.ylabel('Time (seconds)')
plt.tight_layout()
plt.show()

## Task Division Analysis

In [None]:
# Plot execution time vs tasks for each dimension configuration
plt.figure(figsize=(14, 6))
seaborn.scatterplot(data=dfBenchmarks, x='tasks', y='time', hue='dimensionsStr', style='dimensionsStr')
plt.title('Execution Time vs Tasks by Dimension Configuration')
plt.xlabel('Number of Tasks')
plt.ylabel('Time (seconds)')
plt.legend(title='Dimensions', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

## Scaling Analysis

In [None]:
plt.figure(figsize=(14, 6))
seaborn.scatterplot(data=dfBenchmarks, x='dimensionProduct', y='time', hue='tasks')
plt.title('Execution Time vs Problem Size')
plt.xlabel('Total Number of Positions (Product of Dimensions)')
plt.ylabel('Time (seconds)')
plt.yscale('log')
plt.xscale('log')
plt.legend(title='Tasks')
plt.tight_layout()
plt.show()