# Map Folding Algorithm Performance Analysis

Analyze benchmark data from the map folding algorithm implementation.

In [1]:
import numpy
import pandas
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn

# Configure plotting style
seaborn.set_style('whitegrid')
plt.rcParams['figure.figsize'] = [12, 6]

# Load benchmark data
pathBenchmarkFile = Path('marks/benchmarks.npy')
arrayBenchmarks = numpy.load(pathBenchmarkFile, allow_pickle=True)

# Convert to DataFrame
dfBenchmarks = pandas.DataFrame(arrayBenchmarks)

# Add derived columns
dfBenchmarks['dimensionCount'] = dfBenchmarks['dimensions'].apply(lambda x: len(x) if x is not None else 0)
dfBenchmarks['dimensionProduct'] = dfBenchmarks['dimensions'].apply(lambda x: numpy.prod(x) if x is not None else 0)
dfBenchmarks['dimensionsStr'] = dfBenchmarks['dimensions'].apply(lambda x: 'x'.join(map(str, x)) if x is not None else 'None')

## Summary Statistics

In [2]:
print(dfBenchmarks.groupby('dimensionsStr')['time'].describe())
print("Median (seconds):")
print(dfBenchmarks.groupby('dimensionsStr')['time'].median())


               count      mean       std       min       25%       50%  \
dimensionsStr                                                            
2x12           100.0  1.112894  0.113472  1.093596  1.095571  1.097549   
2x13           100.0  5.515276  0.044003  5.473378  5.487635  5.498309   
2x2x2x2x2      100.0  0.046568  0.001637  0.045665  0.045790  0.046042   
3x3x3          100.0  2.455627  0.028821  2.430622  2.437876  2.442567   
3x8            100.0  1.250104  0.012729  1.242117  1.244246  1.247095   
5x5            100.0  1.479333  0.024573  1.450476  1.459897  1.469719   

                    75%       max  
dimensionsStr                      
2x12           1.100608  2.228861  
2x13           5.522102  5.680837  
2x2x2x2x2      0.046638  0.058066  
3x3x3          2.457623  2.581989  
3x8            1.249450  1.334568  
5x5            1.496669  1.554487  
Median (seconds):
dimensionsStr
2x12         1.097549
2x13         5.498309
2x2x2x2x2    0.046042
3x3x3        2.442567

## Performance by Dimension Configuration

In [None]:
plt.figure(figsize=(14, 6))
seaborn.boxplot(data=dfBenchmarks, x='dimensionsStr', y='time')
plt.xticks(rotation=45)
plt.title('Execution Time Distribution by Dimension Configuration')
plt.xlabel('Dimensions')
plt.ylabel('Time (seconds)')
plt.tight_layout()
plt.show()

## Task Division Analysis

In [None]:
# Plot execution time vs tasks for each dimension configuration
plt.figure(figsize=(14, 6))
seaborn.scatterplot(data=dfBenchmarks, x='tasks', y='time', hue='dimensionsStr', style='dimensionsStr')
plt.title('Execution Time vs Tasks by Dimension Configuration')
plt.xlabel('Number of Tasks')
plt.ylabel('Time (seconds)')
plt.legend(title='Dimensions', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

## Scaling Analysis

In [None]:
plt.figure(figsize=(14, 6))
seaborn.scatterplot(data=dfBenchmarks, x='dimensionProduct', y='time', hue='tasks')
plt.title('Execution Time vs Problem Size')
plt.xlabel('Total Number of Positions (Product of Dimensions)')
plt.ylabel('Time (seconds)')
plt.yscale('log')
plt.xscale('log')
plt.legend(title='Tasks')
plt.tight_layout()
plt.show()