# Map Folding Algorithm Performance Analysis

Analyze benchmark data from the map folding algorithm implementation.

In [1]:
import numpy
import pandas
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn

# Configure plotting style
seaborn.set_style('whitegrid')
plt.rcParams['figure.figsize'] = [12, 6]

# Load benchmark data
pathBenchmarkFile = Path('marks/benchmarks.npy')
arrayBenchmarks = numpy.load(pathBenchmarkFile, allow_pickle=True)

# Convert to DataFrame
dfBenchmarks = pandas.DataFrame(arrayBenchmarks)

# Add derived columns
dfBenchmarks['dimensionCount'] = dfBenchmarks['dimensions'].apply(lambda x: len(x) if x is not None else 0)
dfBenchmarks['dimensionProduct'] = dfBenchmarks['dimensions'].apply(lambda x: numpy.prod(x) if x is not None else 0)
dfBenchmarks['dimensionsStr'] = dfBenchmarks['dimensions'].apply(lambda x: 'x'.join(map(str, x)) if x is not None else 'None')

## Summary Statistics

In [2]:
print(dfBenchmarks.groupby('dimensionsStr')['time'].describe())
print("Median (seconds):")
print(dfBenchmarks.groupby('dimensionsStr')['time'].median())


               count      mean       std       min       25%       50%  \
dimensionsStr                                                            
2x11           100.0  0.235197  0.134748  0.217152  0.219576  0.220380   
2x12           100.0  1.057869  0.027285  1.036244  1.040272  1.045666   
2x2x2x2x2      100.0  0.049763  0.001299  0.048802  0.049063  0.049281   
3x3x3          100.0  2.546541  0.033016  2.514271  2.528437  2.536016   
3x8            100.0  1.333479  0.011206  1.321228  1.328139  1.330312   
5x5            100.0  1.583322  0.024486  1.560000  1.566933  1.572816   

                    75%       max  
dimensionsStr                      
2x11           0.222073  1.568561  
2x12           1.062377  1.161405  
2x2x2x2x2      0.049641  0.056244  
3x3x3          2.546568  2.725729  
3x8            1.334394  1.386403  
5x5            1.592023  1.666888  
Median (seconds):
dimensionsStr
2x11         0.220380
2x12         1.045666
2x2x2x2x2    0.049281
3x3x3        2.536016

## Performance by Dimension Configuration

In [None]:
plt.figure(figsize=(14, 6))
seaborn.boxplot(data=dfBenchmarks, x='dimensionsStr', y='time')
plt.xticks(rotation=45)
plt.title('Execution Time Distribution by Dimension Configuration')
plt.xlabel('Dimensions')
plt.ylabel('Time (seconds)')
plt.tight_layout()
plt.show()

## Task Division Analysis

In [None]:
# Plot execution time vs tasks for each dimension configuration
plt.figure(figsize=(14, 6))
seaborn.scatterplot(data=dfBenchmarks, x='tasks', y='time', hue='dimensionsStr', style='dimensionsStr')
plt.title('Execution Time vs Tasks by Dimension Configuration')
plt.xlabel('Number of Tasks')
plt.ylabel('Time (seconds)')
plt.legend(title='Dimensions', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

## Scaling Analysis

In [None]:
plt.figure(figsize=(14, 6))
seaborn.scatterplot(data=dfBenchmarks, x='dimensionProduct', y='time', hue='tasks')
plt.title('Execution Time vs Problem Size')
plt.xlabel('Total Number of Positions (Product of Dimensions)')
plt.ylabel('Time (seconds)')
plt.yscale('log')
plt.xscale('log')
plt.legend(title='Tasks')
plt.tight_layout()
plt.show()